From 1350a6f94bc0c2da1ba1f6676650271f1acd7ecc Mon Sep 17 00:00:00 2001
From: Garret Patti <42485635+garretpatti@users.noreply.github.com>
Date: Mon, 13 Apr 2026 17:45:00 -0400
Subject: [PATCH] separate text extraction and translation

---
 src/components/mixed/ImageLightbox.tsx |  2 +-
 src/components/mixed/MixedView.tsx     | 58 ++++++++++++++++++-
 src/lib/ai-tagger.ts                   | 80 ++------------------------
 3 files changed, 64 insertions(+), 76 deletions(-)
diff --git a/src/components/mixed/ImageLightbox.tsx b/src/components/mixed/ImageLightbox.tsx
index 5c60986..c9117b2 100644
--- a/src/components/mixed/ImageLightbox.tsx
+++ b/src/components/mixed/ImageLightbox.tsx
@@ -414,7 +414,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
                           ;(e.currentTarget as HTMLElement).style.color = 'var(--text-secondary)'
                         }}
                       >
-                        {retranslating ? '⟳ Translating…' : '🌐 Re-translate'}
+                        {retranslating ? '⟳ Translating…' : translatedText ? '🌐 Re-translate' : '🌐 Translate'}
                       </button>
                     </div>
                   </div>
diff --git a/src/components/mixed/MixedView.tsx b/src/components/mixed/MixedView.tsx
index de652f9..07e93db 100644
--- a/src/components/mixed/MixedView.tsx
+++ b/src/components/mixed/MixedView.tsx
@@ -453,6 +453,18 @@ export default function MixedView({ libraryId, initialPath }: Props) {
                       }
                     }
                   }}
+                  onTranslate={async (e) => {
+                    const itemKey = itemKeyFor(e)
+                    const res = await fetch('/api/ai-tagging/translate', {
+                      method: 'POST',
+                      headers: { 'Content-Type': 'application/json' },
+                      body: JSON.stringify({ itemKey }),
+                    })
+                    if (!res.ok) {
+                      const data = await res.json().catch(() => ({}))
+                      throw new Error((data as { error?: string }).error ?? 'Translation failed')
+                    }
+                  }}
                   onDelete={(e) => {
                     const rel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
                     fetch(`/api/browse?libraryId=${encodeURIComponent(libraryId)}&path=${encodeURIComponent(rel)}`, { method: 'DELETE' })
@@ -582,7 +594,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
   )
 }
 
-function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText, onDescribe }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void>; onDescribe?: (e: FileEntry) => Promise<void> }) {
+function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText, onDescribe, onTranslate }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void>; onDescribe?: (e: FileEntry) => Promise<void>; onTranslate?: (e: FileEntry) => Promise<void> }) {
   type ImgState = 'loading' | 'loaded' | 'error'
   const [imgState, setImgState] = useState<ImgState>(
     entry.thumbnailUrl ? 'loading' : 'error'
@@ -601,6 +613,8 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
   const [textExtractError, setTextExtractError] = useState<string | null>(null)
   const [describing, setDescribing] = useState(false)
   const [describeError, setDescribeError] = useState<string | null>(null)
+  const [translating, setTranslating] = useState(false)
+  const [translateError, setTranslateError] = useState<string | null>(null)
 
   useEffect(() => {
     if (!menuOpen) return
@@ -830,6 +844,26 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
                   🔍 Extract Text for Folder
                 </button>
               )}
+              {onTranslate && entry.mediaType === 'image' && (
+                <button
+                  onClick={(e) => {
+                    e.stopPropagation()
+                    setMenuOpen(false)
+                    setTranslating(true)
+                    setTranslateError(null)
+                    onTranslate(entry)
+                      .catch((err) => setTranslateError(err instanceof Error ? err.message : 'Translation failed'))
+                      .finally(() => setTranslating(false))
+                  }}
+                  disabled={translating}
+                  className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
+                  style={{ color: 'var(--text-primary)' }}
+                  onMouseEnter={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)')}
+                  onMouseLeave={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'transparent')}
+                >
+                  🌐 Translate
+                </button>
+              )}
               {onRename && (
                 <button
                   onClick={(e) => {
@@ -929,6 +963,28 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
         </div>
       )}
 
+      {/* Translation status overlay */}
+      {(translating || translateError) && (
+        <div
+          className="absolute inset-x-0 bottom-0 z-10 px-2 py-1.5 text-xs"
+          style={{ backgroundColor: translateError ? 'rgba(127,29,29,0.9)' : 'rgba(0,0,0,0.75)' }}
+          onClick={(e) => e.stopPropagation()}
+        >
+          <span style={{ color: translateError ? '#fca5a5' : 'var(--text-secondary)' }}>
+            {translateError ?? 'Translating…'}
+          </span>
+          {translateError && (
+            <button
+              onClick={() => setTranslateError(null)}
+              className="ml-2 underline text-xs"
+              style={{ color: '#fca5a5' }}
+            >
+              dismiss
+            </button>
+          )}
+        </div>
+      )}
+
       {/* Delete confirmation overlay */}
       {confirming && (
         <div
diff --git a/src/lib/ai-tagger.ts b/src/lib/ai-tagger.ts
index 31c4470..4c84ded 100644
--- a/src/lib/ai-tagger.ts
+++ b/src/lib/ai-tagger.ts
@@ -511,27 +511,9 @@ export async function generateItemDescription(itemKey: string): Promise<string>
 /**
  * Extract text (OCR) from an image using the vision model.
  * Only works for images in mixed libraries.
- * If the extracted text is not in the user's preferred language, auto-translates it.
- * Returns { extractedText, translatedText }.
+ * Translation is not performed automatically — call translateItemText() separately.
+ * Returns { extractedText, translatedText } where translatedText is always null.
  */
-/**
- * Parse a structured extraction response from the AI.
- * Returns null if the response cannot be parsed as valid JSON with the expected shape.
- */
-function parseStructuredExtraction(raw: string): { text: string; needsTranslation: boolean } | null {
-  const jsonMatch = raw.match(/\{[\s\S]*\}/)
-  if (!jsonMatch) return null
-  try {
-    const parsed = JSON.parse(jsonMatch[0])
-    if (typeof parsed.text === 'string' && typeof parsed.needsTranslation === 'boolean') {
-      return { text: parsed.text, needsTranslation: parsed.needsTranslation }
-    }
-  } catch {
-    // fall through
-  }
-  return null
-}
-
 export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
   const libraryId = itemKey.split(':')[0]
   const config = getEffectiveAiConfig(libraryId)
@@ -568,69 +550,19 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
   const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
   const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
 
-  const preferredLanguage = getPreferredLanguage()
   const customInstruction = config.promptExtract ? ' ' + config.promptExtract : ''
+  const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction} If there is no text in the image, respond with exactly: [NO TEXT]`
 
-  // When a preferred language is configured, ask the AI to also flag whether translation is needed.
-  // This avoids a separate translation API call for text already in the target language.
-  let systemPrompt: string
-  if (preferredLanguage) {
-    systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction}
-
-Respond ONLY with a valid JSON object — no markdown, no explanation:
-{"needsTranslation": boolean, "text": "extracted text"}
-
-Rules:
-- Set needsTranslation to true if the text is NOT already written in ${preferredLanguage}.
-- Set needsTranslation to false if the text IS in ${preferredLanguage}, or if there is no text.
-- If there is no text in the image, use exactly: {"needsTranslation": false, "text": "[NO TEXT]"}`
-  } else {
-    systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction} If there is no text in the image, respond with exactly: [NO TEXT]`
-  }
-
-  const rawResponse = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt, config.maxTokensExtract)
-
-  // Parse the response — structured JSON when a preferred language is set, plain text otherwise
-  let extractedText: string
-  let needsTranslation: boolean
-
-  if (preferredLanguage) {
-    const parsed = parseStructuredExtraction(rawResponse)
-    if (parsed) {
-      extractedText = parsed.text
-      needsTranslation = parsed.needsTranslation
-    } else {
-      // Malformed JSON fallback: treat raw response as plain text and attempt translation
-      extractedText = rawResponse
-      needsTranslation = true
-    }
-  } else {
-    extractedText = rawResponse
-    needsTranslation = false
-  }
+  const extractedText = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt, config.maxTokensExtract)
 
   if (!extractedText || extractedText === '[NO TEXT]') {
     db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
     return { extractedText: '', translatedText: null }
   }
 
-  db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(extractedText, itemKey)
+  db.prepare('UPDATE media_items SET extracted_text = ?, extracted_text_translated = NULL WHERE item_key = ?').run(extractedText, itemKey)
 
-  // Only translate if the extraction step determined the text is not already in the preferred language
-  let translatedText: string | null = null
-  if (preferredLanguage && needsTranslation) {
-    const translateModel = config.modelTranslate || config.model
-    try {
-      translatedText = await translateText(config.endpoint, translateModel, extractedText, preferredLanguage, config.promptTranslate, config.maxTokensTranslate)
-      if (translatedText) {
-        db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
-      }
-    } catch (err) {
-      console.warn(`[ai-tagger] Translation failed for "${itemKey}":`, err instanceof Error ? err.message : err)
-    }
-  }
-
-  return { extractedText, translatedText }
+  return { extractedText, translatedText: null }
 }
 
 /**