From 470f34c985788384a324954bf62a4004badaad3b Mon Sep 17 00:00:00 2001 From: Garret Patti <42485635+garretpatti@users.noreply.github.com> Date: Sun, 12 Apr 2026 19:15:19 -0400 Subject: [PATCH] feed extracted text to image tagger prompt --- src/lib/ai-tagger.ts | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/src/lib/ai-tagger.ts b/src/lib/ai-tagger.ts index f9959cb..f67ef7f 100644 --- a/src/lib/ai-tagger.ts +++ b/src/lib/ai-tagger.ts @@ -100,7 +100,15 @@ function resolveItemImage(libraryRoot: string, item: MediaItemRow): ResolvedMedi * If currentTags are provided they are included as context to help the model * understand the content before selecting additional tags. */ -function buildTagPrompt(tags: Tag[], categories: TagCategory[], currentTags?: Tag[], mediaContext: 'image' | 'video' = 'image'): string { +interface TagPromptContext { + currentTags?: Tag[] + mediaContext?: 'image' | 'video' + aiDescription?: string | null + extractedText?: string | null +} + +function buildTagPrompt(tags: Tag[], categories: TagCategory[], ctx: TagPromptContext = {}): string { + const { currentTags, mediaContext = 'image', aiDescription, extractedText } = ctx const categoryMap = new Map(categories.map((c) => [c.id, c.name])) const grouped: Record = {} @@ -124,6 +132,18 @@ function buildTagPrompt(tags: Tag[], categories: TagCategory[], currentTags?: Ta 'If no tags match, return an empty array (e.i., [])', ] + if (aiDescription) { + parts.push('') + parts.push(`AI-generated description of this content: ${aiDescription}`) + parts.push('Use this description as additional context when selecting tags.') + } + + if (extractedText) { + parts.push('') + parts.push(`Text extracted from the image: ${extractedText}`) + parts.push('Use this text as additional context when selecting tags. If the text contains dialogue, it may provide important clues about the content.') + } + if (currentTags && currentTags.length > 0) { const currentTagNames = currentTags.map((t) => t.name).join(', ') parts.push('') @@ -257,7 +277,13 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi } const { tags: currentItemTags } = getResolvedTagsForItem(item.item_key) - const systemPrompt = buildTagPrompt(tags, categories, currentItemTags, resolvedMedia.mediaType) + const aiFields = getAiFields(item.item_key) + const systemPrompt = buildTagPrompt(tags, categories, { + currentTags: currentItemTags, + mediaContext: resolvedMedia.mediaType, + aiDescription: aiFields.aiDescription, + extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText, + }) const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPrompt) @@ -338,7 +364,13 @@ export async function tagSingleItem(itemKey: string): Promise { } const { tags: currentItemTags } = getResolvedTagsForItem(itemKey) - const systemPromptWithContext = buildTagPrompt(tags, categories, currentItemTags, imagePath.mediaType) + const aiFields = getAiFields(itemKey) + const systemPromptWithContext = buildTagPrompt(tags, categories, { + currentTags: currentItemTags, + mediaContext: imagePath.mediaType, + aiDescription: aiFields.aiDescription, + extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText, + }) const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPromptWithContext) const validIds = suggestedIds.filter((id) => validTagIds.has(id))