reduce api calls for text extraction
This commit is contained in:
@@ -554,6 +554,24 @@ export async function generateItemDescription(itemKey: string): Promise<string>
|
|||||||
* If the extracted text is not in the user's preferred language, auto-translates it.
|
* If the extracted text is not in the user's preferred language, auto-translates it.
|
||||||
* Returns { extractedText, translatedText }.
|
* Returns { extractedText, translatedText }.
|
||||||
*/
|
*/
|
||||||
|
/**
|
||||||
|
* Parse a structured extraction response from the AI.
|
||||||
|
* Returns null if the response cannot be parsed as valid JSON with the expected shape.
|
||||||
|
*/
|
||||||
|
function parseStructuredExtraction(raw: string): { text: string; needsTranslation: boolean } | null {
|
||||||
|
const jsonMatch = raw.match(/\{[\s\S]*\}/)
|
||||||
|
if (!jsonMatch) return null
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(jsonMatch[0])
|
||||||
|
if (typeof parsed.text === 'string' && typeof parsed.needsTranslation === 'boolean') {
|
||||||
|
return { text: parsed.text, needsTranslation: parsed.needsTranslation }
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// fall through
|
||||||
|
}
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
|
export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
|
||||||
const libraryId = itemKey.split(':')[0]
|
const libraryId = itemKey.split(':')[0]
|
||||||
const config = getEffectiveAiConfig(libraryId)
|
const config = getEffectiveAiConfig(libraryId)
|
||||||
@@ -590,9 +608,46 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
|
|||||||
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
|
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
|
||||||
const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
||||||
|
|
||||||
const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${config.promptExtract ? ' ' + config.promptExtract : ''} If there is no text in the image, respond with exactly: [NO TEXT]`
|
const preferredLanguage = getPreferredLanguage()
|
||||||
|
const customInstruction = config.promptExtract ? ' ' + config.promptExtract : ''
|
||||||
|
|
||||||
const extractedText = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt)
|
// When a preferred language is configured, ask the AI to also flag whether translation is needed.
|
||||||
|
// This avoids a separate translation API call for text already in the target language.
|
||||||
|
let systemPrompt: string
|
||||||
|
if (preferredLanguage) {
|
||||||
|
systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction}
|
||||||
|
|
||||||
|
Respond ONLY with a valid JSON object — no markdown, no explanation:
|
||||||
|
{"needsTranslation": boolean, "text": "extracted text"}
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
- Set needsTranslation to true if the text is NOT already written in ${preferredLanguage}.
|
||||||
|
- Set needsTranslation to false if the text IS in ${preferredLanguage}, or if there is no text.
|
||||||
|
- If there is no text in the image, use exactly: {"needsTranslation": false, "text": "[NO TEXT]"}`
|
||||||
|
} else {
|
||||||
|
systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction} If there is no text in the image, respond with exactly: [NO TEXT]`
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawResponse = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt)
|
||||||
|
|
||||||
|
// Parse the response — structured JSON when a preferred language is set, plain text otherwise
|
||||||
|
let extractedText: string
|
||||||
|
let needsTranslation: boolean
|
||||||
|
|
||||||
|
if (preferredLanguage) {
|
||||||
|
const parsed = parseStructuredExtraction(rawResponse)
|
||||||
|
if (parsed) {
|
||||||
|
extractedText = parsed.text
|
||||||
|
needsTranslation = parsed.needsTranslation
|
||||||
|
} else {
|
||||||
|
// Malformed JSON fallback: treat raw response as plain text and attempt translation
|
||||||
|
extractedText = rawResponse
|
||||||
|
needsTranslation = true
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
extractedText = rawResponse
|
||||||
|
needsTranslation = false
|
||||||
|
}
|
||||||
|
|
||||||
if (!extractedText || extractedText === '[NO TEXT]') {
|
if (!extractedText || extractedText === '[NO TEXT]') {
|
||||||
db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
|
db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
|
||||||
@@ -601,10 +656,9 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
|
|||||||
|
|
||||||
db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(extractedText, itemKey)
|
db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(extractedText, itemKey)
|
||||||
|
|
||||||
// Auto-translate if preferred language is set
|
// Only translate if the extraction step determined the text is not already in the preferred language
|
||||||
const preferredLanguage = getPreferredLanguage()
|
|
||||||
let translatedText: string | null = null
|
let translatedText: string | null = null
|
||||||
if (preferredLanguage) {
|
if (preferredLanguage && needsTranslation) {
|
||||||
const translateModel = config.modelTranslate || config.model
|
const translateModel = config.modelTranslate || config.model
|
||||||
try {
|
try {
|
||||||
translatedText = await translateText(config.endpoint, translateModel, extractedText, preferredLanguage, config.promptTranslate)
|
translatedText = await translateText(config.endpoint, translateModel, extractedText, preferredLanguage, config.promptTranslate)
|
||||||
|
|||||||
Reference in New Issue
Block a user