add tesseract ocr

This commit is contained in:
Garret Patti
2026-04-13 19:40:25 -04:00
parent 1350a6f94b
commit 9b2690f639
7 changed files with 299 additions and 9 deletions

View File

@@ -46,6 +46,8 @@ const DEFAULT_PROMPT_EXTRACT =
'Be mindful of different colors of text that may indicate different speakers or emphasis.'
const DEFAULT_PROMPT_TRANSLATE = 'Return ONLY the translated text with no additional commentary.'
export type OcrMode = 'hybrid' | 'tesseract' | 'llm'
export interface AiConfig {
endpoint: string
model: string
@@ -62,6 +64,9 @@ export interface AiConfig {
maxTokensDescribe: number
maxTokensExtract: number
maxTokensTranslate: number
ocrMode: OcrMode
ocrLanguages: string
ocrConfidenceThreshold: number
}
export function getAiConfig(): AiConfig {
@@ -84,10 +89,15 @@ export function getAiConfig(): AiConfig {
const maxTokensDescribe = parseInt(getSetting('ai_max_tokens_describe') ?? '8192', 10) || 8192
const maxTokensExtract = parseInt(getSetting('ai_max_tokens_extract') ?? '8192', 10) || 8192
const maxTokensTranslate = parseInt(getSetting('ai_max_tokens_translate') ?? '8192', 10) || 8192
const rawOcrMode = getSetting('ai_ocr_mode') ?? 'hybrid'
const ocrMode: OcrMode = rawOcrMode === 'tesseract' || rawOcrMode === 'llm' ? rawOcrMode : 'hybrid'
const ocrLanguages = getSetting('ai_ocr_languages') ?? 'eng'
const ocrConfidenceThreshold = parseInt(getSetting('ai_ocr_confidence_threshold') ?? '70', 10) || 70
return {
endpoint, model, modelTagging, modelDescribe, modelExtract, modelTranslate, enabled,
promptDescribe, promptTagger, promptExtract, promptTranslate,
maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate,
ocrMode, ocrLanguages, ocrConfidenceThreshold,
}
}
@@ -107,6 +117,9 @@ export function updateAiConfig(
maxTokensDescribe?: number,
maxTokensExtract?: number,
maxTokensTranslate?: number,
ocrMode?: OcrMode,
ocrLanguages?: string,
ocrConfidenceThreshold?: number,
): void {
setSetting('ai_endpoint', endpoint)
setSetting('ai_model', model)
@@ -123,6 +136,9 @@ export function updateAiConfig(
if (maxTokensDescribe !== undefined) setSetting('ai_max_tokens_describe', String(Math.max(1, Math.floor(maxTokensDescribe))))
if (maxTokensExtract !== undefined) setSetting('ai_max_tokens_extract', String(Math.max(1, Math.floor(maxTokensExtract))))
if (maxTokensTranslate !== undefined) setSetting('ai_max_tokens_translate', String(Math.max(1, Math.floor(maxTokensTranslate))))
if (ocrMode !== undefined) setSetting('ai_ocr_mode', ocrMode)
if (ocrLanguages !== undefined) setSetting('ai_ocr_languages', ocrLanguages.trim() || 'eng')
if (ocrConfidenceThreshold !== undefined) setSetting('ai_ocr_confidence_threshold', String(Math.max(0, Math.min(100, Math.floor(ocrConfidenceThreshold)))))
}
export function getPreferredLanguage(): string {
@@ -249,6 +265,9 @@ export function getEffectiveAiConfig(libraryId: string): AiConfig {
maxTokensDescribe: overrides.maxTokensDescribe ?? global.maxTokensDescribe,
maxTokensExtract: overrides.maxTokensExtract ?? global.maxTokensExtract,
maxTokensTranslate: overrides.maxTokensTranslate ?? global.maxTokensTranslate,
ocrMode: global.ocrMode,
ocrLanguages: global.ocrLanguages,
ocrConfidenceThreshold: global.ocrConfidenceThreshold,
}
}