From 470f34c985788384a324954bf62a4004badaad3b Mon Sep 17 00:00:00 2001
From: Garret Patti <42485635+garretpatti@users.noreply.github.com>
Date: Sun, 12 Apr 2026 19:15:19 -0400
Subject: [PATCH] feed extracted text to image tagger prompt

---
 src/lib/ai-tagger.ts | 38 +++++++++++++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 3 deletions(-)

diff --git a/src/lib/ai-tagger.ts b/src/lib/ai-tagger.ts
index f9959cb..f67ef7f 100644
--- a/src/lib/ai-tagger.ts
+++ b/src/lib/ai-tagger.ts
@@ -100,7 +100,15 @@ function resolveItemImage(libraryRoot: string, item: MediaItemRow): ResolvedMedi
  * If currentTags are provided they are included as context to help the model
  * understand the content before selecting additional tags.
  */
-function buildTagPrompt(tags: Tag[], categories: TagCategory[], currentTags?: Tag[], mediaContext: 'image' | 'video' = 'image'): string {
+interface TagPromptContext {
+  currentTags?: Tag[]
+  mediaContext?: 'image' | 'video'
+  aiDescription?: string | null
+  extractedText?: string | null
+}
+
+function buildTagPrompt(tags: Tag[], categories: TagCategory[], ctx: TagPromptContext = {}): string {
+  const { currentTags, mediaContext = 'image', aiDescription, extractedText } = ctx
   const categoryMap = new Map(categories.map((c) => [c.id, c.name]))
 
   const grouped: Record<string, { id: string; name: string }[]> = {}
@@ -124,6 +132,18 @@ function buildTagPrompt(tags: Tag[], categories: TagCategory[], currentTags?: Ta
     'If no tags match, return an empty array (e.i., [])',
   ]
 
+  if (aiDescription) {
+    parts.push('')
+    parts.push(`AI-generated description of this content: ${aiDescription}`)
+    parts.push('Use this description as additional context when selecting tags.')
+  }
+
+  if (extractedText) {
+    parts.push('')
+    parts.push(`Text extracted from the image: ${extractedText}`)
+    parts.push('Use this text as additional context when selecting tags. If the text contains dialogue, it may provide important clues about the content.')
+  }
+
   if (currentTags && currentTags.length > 0) {
     const currentTagNames = currentTags.map((t) => t.name).join(', ')
     parts.push('')
@@ -257,7 +277,13 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
       }
 
       const { tags: currentItemTags } = getResolvedTagsForItem(item.item_key)
-      const systemPrompt = buildTagPrompt(tags, categories, currentItemTags, resolvedMedia.mediaType)
+      const aiFields = getAiFields(item.item_key)
+      const systemPrompt = buildTagPrompt(tags, categories, {
+        currentTags: currentItemTags,
+        mediaContext: resolvedMedia.mediaType,
+        aiDescription: aiFields.aiDescription,
+        extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText,
+      })
 
       const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPrompt)
 
@@ -338,7 +364,13 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
   }
 
   const { tags: currentItemTags } = getResolvedTagsForItem(itemKey)
-  const systemPromptWithContext = buildTagPrompt(tags, categories, currentItemTags, imagePath.mediaType)
+  const aiFields = getAiFields(itemKey)
+  const systemPromptWithContext = buildTagPrompt(tags, categories, {
+    currentTags: currentItemTags,
+    mediaContext: imagePath.mediaType,
+    aiDescription: aiFields.aiDescription,
+    extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText,
+  })
 
   const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPromptWithContext)
   const validIds = suggestedIds.filter((id) => validTagIds.has(id))