From 9b2690f6390499abb891b4eeb8fa47929778935a Mon Sep 17 00:00:00 2001 From: Garret Patti <42485635+garretpatti@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:40:25 -0400 Subject: [PATCH] add tesseract ocr --- package-lock.json | 117 ++++++++++++++++++++++++++++- package.json | 3 +- src/app/api/ai-settings/route.ts | 9 ++- src/app/manage/ai-tagging/page.tsx | 70 +++++++++++++++++ src/lib/ai-tagger.ts | 59 +++++++++++++-- src/lib/app-settings.ts | 19 +++++ src/lib/thumbnails.ts | 31 ++++++++ 7 files changed, 299 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index 192a537..9dc18cb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,7 +17,8 @@ "node-cron": "^4.2.1", "react": "^19.2.4", "react-dom": "^19.2.4", - "sharp": "^0.34.5" + "sharp": "^0.34.5", + "tesseract.js": "^7.0.0" }, "devDependencies": { "@tailwindcss/postcss": "^4.2.2", @@ -2950,6 +2951,12 @@ "readable-stream": "^3.4.0" } }, + "node_modules/bmp-js": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz", + "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", + "license": "MIT" + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -4803,6 +4810,12 @@ "hermes-estree": "0.25.1" } }, + "node_modules/idb-keyval": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz", + "integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==", + "license": "Apache-2.0" + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -5288,6 +5301,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-url": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz", + "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", + "license": "MIT" + }, "node_modules/is-weakmap": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", @@ -6167,6 +6186,26 @@ "semver": "bin/semver.js" } }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/node-releases": { "version": "2.0.36", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", @@ -6315,6 +6354,15 @@ "wrappy": "1" } }, + "node_modules/opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==", + "license": "MIT", + "bin": { + "opencollective-postinstall": "index.js" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -6747,6 +6795,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/regenerator-runtime": { + "version": "0.13.11", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", + "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", + "license": "MIT" + }, "node_modules/regexp.prototype.flags": { "version": "1.5.4", "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz", @@ -7585,6 +7639,30 @@ "streamx": "^2.12.5" } }, + "node_modules/tesseract.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-7.0.0.tgz", + "integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "bmp-js": "^0.1.0", + "idb-keyval": "^6.2.0", + "is-url": "^1.2.4", + "node-fetch": "^2.6.9", + "opencollective-postinstall": "^2.0.3", + "regenerator-runtime": "^0.13.3", + "tesseract.js-core": "^7.0.0", + "wasm-feature-detect": "^1.8.0", + "zlibjs": "^0.3.1" + } + }, + "node_modules/tesseract.js-core": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz", + "integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==", + "license": "Apache-2.0" + }, "node_modules/text-decoder": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", @@ -7655,6 +7733,12 @@ "node": ">=8.0" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/ts-api-utils": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz", @@ -7955,6 +8039,28 @@ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", "license": "MIT" }, + "node_modules/wasm-feature-detect": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz", + "integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==", + "license": "Apache-2.0" + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -8237,6 +8343,15 @@ "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, + "node_modules/zlibjs": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz", + "integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/zod": { "version": "4.3.6", "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", diff --git a/package.json b/package.json index 0575972..1eca2bf 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,8 @@ "node-cron": "^4.2.1", "react": "^19.2.4", "react-dom": "^19.2.4", - "sharp": "^0.34.5" + "sharp": "^0.34.5", + "tesseract.js": "^7.0.0" }, "devDependencies": { "@tailwindcss/postcss": "^4.2.2", diff --git a/src/app/api/ai-settings/route.ts b/src/app/api/ai-settings/route.ts index 1de721b..f1f8a35 100644 --- a/src/app/api/ai-settings/route.ts +++ b/src/app/api/ai-settings/route.ts @@ -1,6 +1,6 @@ import { NextRequest, NextResponse } from 'next/server' import { requireAdmin } from '@/lib/auth' -import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries } from '@/lib/app-settings' +import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries, type OcrMode } from '@/lib/app-settings' export async function GET(request: NextRequest) { const auth = await requireAdmin(request) @@ -34,6 +34,9 @@ export async function PUT(request: NextRequest) { maxTokensDescribe?: number maxTokensExtract?: number maxTokensTranslate?: number + ocrMode?: string + ocrLanguages?: string + ocrConfidenceThreshold?: number } try { body = await request.json() @@ -47,6 +50,7 @@ export async function PUT(request: NextRequest) { promptDescribe, promptTagger, promptExtract, promptTranslate, maxRetries, maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate, + ocrMode, ocrLanguages, ocrConfidenceThreshold, } = body if (typeof endpoint !== 'string') { @@ -75,6 +79,9 @@ export async function PUT(request: NextRequest) { typeof maxTokensDescribe === 'number' ? maxTokensDescribe : undefined, typeof maxTokensExtract === 'number' ? maxTokensExtract : undefined, typeof maxTokensTranslate === 'number' ? maxTokensTranslate : undefined, + (ocrMode === 'hybrid' || ocrMode === 'tesseract' || ocrMode === 'llm') ? (ocrMode as OcrMode) : undefined, + typeof ocrLanguages === 'string' ? ocrLanguages : undefined, + typeof ocrConfidenceThreshold === 'number' ? ocrConfidenceThreshold : undefined, ) if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) { diff --git a/src/app/manage/ai-tagging/page.tsx b/src/app/manage/ai-tagging/page.tsx index c3db484..e1a9942 100644 --- a/src/app/manage/ai-tagging/page.tsx +++ b/src/app/manage/ai-tagging/page.tsx @@ -20,6 +20,9 @@ interface AiSettings { maxTokensDescribe: number maxTokensExtract: number maxTokensTranslate: number + ocrMode: 'hybrid' | 'tesseract' | 'llm' + ocrLanguages: string + ocrConfidenceThreshold: number } interface AiJob { @@ -76,6 +79,7 @@ export default function AiTaggingPage() { promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '', maxRetries: 3, maxTokensTag: 8192, maxTokensDescribe: 8192, maxTokensExtract: 8192, maxTokensTranslate: 8192, + ocrMode: 'hybrid', ocrLanguages: 'eng', ocrConfidenceThreshold: 70, }) const [loading, setLoading] = useState(true) const [saving, setSaving] = useState(false) @@ -644,6 +648,72 @@ export default function AiTaggingPage() { /> + +
+ {(['hybrid', 'tesseract', 'llm'] as const).map((mode) => ( + + ))} +
+

+ Hybrid runs local OCR first and falls back to the LLM when confidence is low. Tesseract only never calls the LLM. LLM only uses the original behaviour. +

+
+ + + setSettings((s) => ({ ...s, ocrLanguages: e.target.value }))} + placeholder="eng" + className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> +

+ {`Tesseract language packs to use, joined with '+'. For Japanese manga use jpn+jpn_vert. Language data is downloaded automatically on first use.`} +

+
+ + + + setSettings((s) => ({ ...s, ocrConfidenceThreshold: Math.max(0, Math.min(100, parseInt(e.target.value) || 70)) })) + } + className="w-24 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> +

+ In hybrid mode, Tesseract results below this confidence score (0–100) fall back to the LLM. Default is 70. +

+
+ // ─── Text extraction ───────────────────────────────────────────────────────── /** - * Extract text (OCR) from an image using the vision model. + * Run Tesseract OCR on a preprocessed image file. + * Returns the extracted text and a mean confidence score (0–100). + * A confidence of 0 with empty text means no recognisable text was found. + */ +async function extractWithTesseract( + imagePath: string, + languages: string, +): Promise<{ text: string; confidence: number }> { + const { createWorker } = await import('tesseract.js') + const workerPath = path.join(process.cwd(), 'node_modules/tesseract.js/src/worker-script/node/index.js') + const worker = await createWorker(languages, 1, { workerPath }) + try { + const { data } = await worker.recognize(imagePath) + return { text: data.text.trim(), confidence: data.confidence } + } finally { + await worker.terminate() + } +} + +/** + * Extract text (OCR) from an image using the configured OCR mode: + * - hybrid: try Tesseract first; fall back to LLM if confidence is below threshold + * - tesseract: local Tesseract only, no LLM call + * - llm: LLM vision API only (original behaviour) + * * Only works for images in mixed libraries. * Translation is not performed automatically — call translateItemText() separately. * Returns { extractedText, translatedText } where translatedText is always null. @@ -517,10 +541,6 @@ export async function generateItemDescription(itemKey: string): Promise export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> { const libraryId = itemKey.split(':')[0] const config = getEffectiveAiConfig(libraryId) - const extractModel = config.modelExtract || config.model - if (!config.endpoint || !extractModel) { - throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) - } const db = getDb() const item = db @@ -547,12 +567,39 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText: throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' }) } + const { ocrMode, ocrLanguages, ocrConfidenceThreshold } = config + + // ── Tesseract path ──────────────────────────────────────────────────────── + if (ocrMode === 'tesseract' || ocrMode === 'hybrid') { + const ocrImagePath = await getOcrImagePath(resolvedMedia.path, libraryId) + const { text, confidence } = await extractWithTesseract(ocrImagePath, ocrLanguages) + + const useTesseractResult = ocrMode === 'tesseract' || confidence >= ocrConfidenceThreshold + if (useTesseractResult) { + console.log(`[ocr] tesseract used for ${itemKey} (confidence=${confidence}, mode=${ocrMode})`) + if (!text) { + db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey) + return { extractedText: '', translatedText: null } + } + db.prepare('UPDATE media_items SET extracted_text = ?, extracted_text_translated = NULL WHERE item_key = ?').run(text, itemKey) + return { extractedText: text, translatedText: null } + } + console.log(`[ocr] tesseract confidence too low (${confidence} < ${ocrConfidenceThreshold}), falling back to LLM for ${itemKey}`) + } + + // ── LLM vision path ─────────────────────────────────────────────────────── + const extractModel = config.modelExtract || config.model + if (!config.endpoint || !extractModel) { + throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) + } + const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId) const base64Images = [fs.readFileSync(thumbnailPath, 'base64')] const customInstruction = config.promptExtract ? ' ' + config.promptExtract : '' const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction} If there is no text in the image, respond with exactly: [NO TEXT]` + console.log(`[ocr] llm used for ${itemKey} (mode=${ocrMode})`) const extractedText = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt, config.maxTokensExtract) if (!extractedText || extractedText === '[NO TEXT]') { diff --git a/src/lib/app-settings.ts b/src/lib/app-settings.ts index 739338d..2be6a14 100644 --- a/src/lib/app-settings.ts +++ b/src/lib/app-settings.ts @@ -46,6 +46,8 @@ const DEFAULT_PROMPT_EXTRACT = 'Be mindful of different colors of text that may indicate different speakers or emphasis.' const DEFAULT_PROMPT_TRANSLATE = 'Return ONLY the translated text with no additional commentary.' +export type OcrMode = 'hybrid' | 'tesseract' | 'llm' + export interface AiConfig { endpoint: string model: string @@ -62,6 +64,9 @@ export interface AiConfig { maxTokensDescribe: number maxTokensExtract: number maxTokensTranslate: number + ocrMode: OcrMode + ocrLanguages: string + ocrConfidenceThreshold: number } export function getAiConfig(): AiConfig { @@ -84,10 +89,15 @@ export function getAiConfig(): AiConfig { const maxTokensDescribe = parseInt(getSetting('ai_max_tokens_describe') ?? '8192', 10) || 8192 const maxTokensExtract = parseInt(getSetting('ai_max_tokens_extract') ?? '8192', 10) || 8192 const maxTokensTranslate = parseInt(getSetting('ai_max_tokens_translate') ?? '8192', 10) || 8192 + const rawOcrMode = getSetting('ai_ocr_mode') ?? 'hybrid' + const ocrMode: OcrMode = rawOcrMode === 'tesseract' || rawOcrMode === 'llm' ? rawOcrMode : 'hybrid' + const ocrLanguages = getSetting('ai_ocr_languages') ?? 'eng' + const ocrConfidenceThreshold = parseInt(getSetting('ai_ocr_confidence_threshold') ?? '70', 10) || 70 return { endpoint, model, modelTagging, modelDescribe, modelExtract, modelTranslate, enabled, promptDescribe, promptTagger, promptExtract, promptTranslate, maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate, + ocrMode, ocrLanguages, ocrConfidenceThreshold, } } @@ -107,6 +117,9 @@ export function updateAiConfig( maxTokensDescribe?: number, maxTokensExtract?: number, maxTokensTranslate?: number, + ocrMode?: OcrMode, + ocrLanguages?: string, + ocrConfidenceThreshold?: number, ): void { setSetting('ai_endpoint', endpoint) setSetting('ai_model', model) @@ -123,6 +136,9 @@ export function updateAiConfig( if (maxTokensDescribe !== undefined) setSetting('ai_max_tokens_describe', String(Math.max(1, Math.floor(maxTokensDescribe)))) if (maxTokensExtract !== undefined) setSetting('ai_max_tokens_extract', String(Math.max(1, Math.floor(maxTokensExtract)))) if (maxTokensTranslate !== undefined) setSetting('ai_max_tokens_translate', String(Math.max(1, Math.floor(maxTokensTranslate)))) + if (ocrMode !== undefined) setSetting('ai_ocr_mode', ocrMode) + if (ocrLanguages !== undefined) setSetting('ai_ocr_languages', ocrLanguages.trim() || 'eng') + if (ocrConfidenceThreshold !== undefined) setSetting('ai_ocr_confidence_threshold', String(Math.max(0, Math.min(100, Math.floor(ocrConfidenceThreshold))))) } export function getPreferredLanguage(): string { @@ -249,6 +265,9 @@ export function getEffectiveAiConfig(libraryId: string): AiConfig { maxTokensDescribe: overrides.maxTokensDescribe ?? global.maxTokensDescribe, maxTokensExtract: overrides.maxTokensExtract ?? global.maxTokensExtract, maxTokensTranslate: overrides.maxTokensTranslate ?? global.maxTokensTranslate, + ocrMode: global.ocrMode, + ocrLanguages: global.ocrLanguages, + ocrConfidenceThreshold: global.ocrConfidenceThreshold, } } diff --git a/src/lib/thumbnails.ts b/src/lib/thumbnails.ts index 144a446..c043121 100644 --- a/src/lib/thumbnails.ts +++ b/src/lib/thumbnails.ts @@ -60,6 +60,19 @@ async function generateAiImage(src: string, dest: string): Promise { fs.renameSync(tmp, dest) } +/** Generate a grayscale, contrast-normalised PNG for local OCR (Tesseract). + * PNG is lossless and avoids JPEG artefacts that can degrade OCR accuracy. */ +async function generateOcrImage(src: string, dest: string): Promise { + const tmp = dest + '.tmp' + await sharp(src) + .resize(AI_IMAGE_WIDTH, undefined, { withoutEnlargement: true }) + .grayscale() + .normalise() + .png() + .toFile(tmp) + fs.renameSync(tmp, dest) +} + /** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */ function run(bin: string, args: string[]): Promise { return new Promise((resolve, reject) => { @@ -190,6 +203,24 @@ export async function getAiImagePath( return cacheFile } +/** + * Returns the absolute path to a preprocessed PNG suitable for local OCR. + * The image is converted to grayscale and contrast-normalised for better + * Tesseract accuracy. Cached with an `_ocr` suffix. + */ +export async function getOcrImagePath( + absoluteFilePath: string, + libraryId: string +): Promise { + ensureCacheDir() + const key = cacheKey(libraryId, absoluteFilePath) + const cacheFile = path.join(CACHE_DIR, key + '_ocr.png') + const cached = getCachedPath(cacheFile, absoluteFilePath) + if (cached) return cached + await generateOcrImage(absoluteFilePath, cacheFile) + return cacheFile +} + /** * Returns the absolute path to a cached thumbnail JPEG for the given file. * Generates it on first call (or when the source has been modified).