ai-descriptions #21
@@ -100,7 +100,15 @@ function resolveItemImage(libraryRoot: string, item: MediaItemRow): ResolvedMedi
|
|||||||
* If currentTags are provided they are included as context to help the model
|
* If currentTags are provided they are included as context to help the model
|
||||||
* understand the content before selecting additional tags.
|
* understand the content before selecting additional tags.
|
||||||
*/
|
*/
|
||||||
function buildTagPrompt(tags: Tag[], categories: TagCategory[], currentTags?: Tag[], mediaContext: 'image' | 'video' = 'image'): string {
|
interface TagPromptContext {
|
||||||
|
currentTags?: Tag[]
|
||||||
|
mediaContext?: 'image' | 'video'
|
||||||
|
aiDescription?: string | null
|
||||||
|
extractedText?: string | null
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildTagPrompt(tags: Tag[], categories: TagCategory[], ctx: TagPromptContext = {}): string {
|
||||||
|
const { currentTags, mediaContext = 'image', aiDescription, extractedText } = ctx
|
||||||
const categoryMap = new Map(categories.map((c) => [c.id, c.name]))
|
const categoryMap = new Map(categories.map((c) => [c.id, c.name]))
|
||||||
|
|
||||||
const grouped: Record<string, { id: string; name: string }[]> = {}
|
const grouped: Record<string, { id: string; name: string }[]> = {}
|
||||||
@@ -124,6 +132,18 @@ function buildTagPrompt(tags: Tag[], categories: TagCategory[], currentTags?: Ta
|
|||||||
'If no tags match, return an empty array (e.i., [])',
|
'If no tags match, return an empty array (e.i., [])',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if (aiDescription) {
|
||||||
|
parts.push('')
|
||||||
|
parts.push(`AI-generated description of this content: ${aiDescription}`)
|
||||||
|
parts.push('Use this description as additional context when selecting tags.')
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extractedText) {
|
||||||
|
parts.push('')
|
||||||
|
parts.push(`Text extracted from the image: ${extractedText}`)
|
||||||
|
parts.push('Use this text as additional context when selecting tags. If the text contains dialogue, it may provide important clues about the content.')
|
||||||
|
}
|
||||||
|
|
||||||
if (currentTags && currentTags.length > 0) {
|
if (currentTags && currentTags.length > 0) {
|
||||||
const currentTagNames = currentTags.map((t) => t.name).join(', ')
|
const currentTagNames = currentTags.map((t) => t.name).join(', ')
|
||||||
parts.push('')
|
parts.push('')
|
||||||
@@ -257,7 +277,13 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
|
|||||||
}
|
}
|
||||||
|
|
||||||
const { tags: currentItemTags } = getResolvedTagsForItem(item.item_key)
|
const { tags: currentItemTags } = getResolvedTagsForItem(item.item_key)
|
||||||
const systemPrompt = buildTagPrompt(tags, categories, currentItemTags, resolvedMedia.mediaType)
|
const aiFields = getAiFields(item.item_key)
|
||||||
|
const systemPrompt = buildTagPrompt(tags, categories, {
|
||||||
|
currentTags: currentItemTags,
|
||||||
|
mediaContext: resolvedMedia.mediaType,
|
||||||
|
aiDescription: aiFields.aiDescription,
|
||||||
|
extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText,
|
||||||
|
})
|
||||||
|
|
||||||
const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPrompt)
|
const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPrompt)
|
||||||
|
|
||||||
@@ -338,7 +364,13 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const { tags: currentItemTags } = getResolvedTagsForItem(itemKey)
|
const { tags: currentItemTags } = getResolvedTagsForItem(itemKey)
|
||||||
const systemPromptWithContext = buildTagPrompt(tags, categories, currentItemTags, imagePath.mediaType)
|
const aiFields = getAiFields(itemKey)
|
||||||
|
const systemPromptWithContext = buildTagPrompt(tags, categories, {
|
||||||
|
currentTags: currentItemTags,
|
||||||
|
mediaContext: imagePath.mediaType,
|
||||||
|
aiDescription: aiFields.aiDescription,
|
||||||
|
extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText,
|
||||||
|
})
|
||||||
|
|
||||||
const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPromptWithContext)
|
const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPromptWithContext)
|
||||||
const validIds = suggestedIds.filter((id) => validTagIds.has(id))
|
const validIds = suggestedIds.filter((id) => validTagIds.has(id))
|
||||||
|
|||||||
Reference in New Issue
Block a user