customize model based on step

This commit is contained in:
Garret Patti
2026-04-12 19:50:18 -04:00
parent 470f34c985
commit 5ac4b3bd8a
6 changed files with 317 additions and 30 deletions

View File

@@ -6,23 +6,23 @@ export async function GET(request: NextRequest) {
const auth = await requireAdmin(request) const auth = await requireAdmin(request)
if (auth instanceof NextResponse) return auth if (auth instanceof NextResponse) return auth
const { endpoint, model, enabled } = getAiConfig() const { endpoint, model, modelTagging, modelDescribe, modelExtract, modelTranslate, enabled } = getAiConfig()
const preferredLanguage = getPreferredLanguage() const preferredLanguage = getPreferredLanguage()
return NextResponse.json({ endpoint, model, enabled, preferredLanguage }) return NextResponse.json({ endpoint, model, modelTagging, modelDescribe, modelExtract, modelTranslate, enabled, preferredLanguage })
} }
export async function PUT(request: NextRequest) { export async function PUT(request: NextRequest) {
const auth = await requireAdmin(request) const auth = await requireAdmin(request)
if (auth instanceof NextResponse) return auth if (auth instanceof NextResponse) return auth
let body: { endpoint?: string; model?: string; enabled?: boolean; preferredLanguage?: string } let body: { endpoint?: string; model?: string; modelTagging?: string; modelDescribe?: string; modelExtract?: string; modelTranslate?: string; enabled?: boolean; preferredLanguage?: string }
try { try {
body = await request.json() body = await request.json()
} catch { } catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
} }
const { endpoint, model, enabled, preferredLanguage } = body const { endpoint, model, enabled, preferredLanguage, modelTagging, modelDescribe, modelExtract, modelTranslate } = body
if (typeof endpoint !== 'string') { if (typeof endpoint !== 'string') {
return NextResponse.json({ error: 'endpoint is required' }, { status: 400 }) return NextResponse.json({ error: 'endpoint is required' }, { status: 400 })
@@ -34,11 +34,20 @@ export async function PUT(request: NextRequest) {
return NextResponse.json({ error: 'enabled must be a boolean' }, { status: 400 }) return NextResponse.json({ error: 'enabled must be a boolean' }, { status: 400 })
} }
updateAiConfig(endpoint, model, enabled) updateAiConfig(
endpoint,
model,
enabled,
typeof modelTagging === 'string' ? modelTagging : undefined,
typeof modelDescribe === 'string' ? modelDescribe : undefined,
typeof modelExtract === 'string' ? modelExtract : undefined,
typeof modelTranslate === 'string' ? modelTranslate : undefined,
)
if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) { if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) {
setPreferredLanguage(preferredLanguage.trim()) setPreferredLanguage(preferredLanguage.trim())
} }
return NextResponse.json({ endpoint, model, enabled, preferredLanguage: getPreferredLanguage() }) const config = getAiConfig()
return NextResponse.json({ ...config, preferredLanguage: getPreferredLanguage() })
} }

View File

@@ -0,0 +1,38 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth'
import { describeDirectoryItems } from '@/lib/ai-tagger'
export async function POST(request: NextRequest) {
let body: { libraryId?: string; path?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { libraryId, path: dirPath } = body
if (!libraryId || typeof libraryId !== 'string') {
return NextResponse.json({ error: 'libraryId is required' }, { status: 400 })
}
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
try {
const processed = await describeDirectoryItems(libraryId, dirPath ?? '')
return NextResponse.json({ processed })
} catch (err) {
const error = err as Error & { code?: string }
if (error.code === 'NOT_CONFIGURED') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
if (error.code === 'NOT_FOUND') {
return NextResponse.json({ error: error.message }, { status: 404 })
}
if (error.code === 'INVALID_TYPE') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
console.error('[ai-tagging/describe-bulk] Error:', error)
return NextResponse.json({ error: 'Failed to generate descriptions' }, { status: 502 })
}
}

View File

@@ -5,12 +5,16 @@ import { useEffect, useState, useCallback } from 'react'
interface AiSettings { interface AiSettings {
endpoint: string endpoint: string
model: string model: string
modelTagging: string
modelDescribe: string
modelExtract: string
modelTranslate: string
enabled: boolean enabled: boolean
preferredLanguage: string preferredLanguage: string
} }
export default function AiTaggingPage() { export default function AiTaggingPage() {
const [settings, setSettings] = useState<AiSettings>({ endpoint: '', model: '', enabled: false, preferredLanguage: 'English' }) const [settings, setSettings] = useState<AiSettings>({ endpoint: '', model: '', modelTagging: '', modelDescribe: '', modelExtract: '', modelTranslate: '', enabled: false, preferredLanguage: 'English' })
const [loading, setLoading] = useState(true) const [loading, setLoading] = useState(true)
const [saving, setSaving] = useState(false) const [saving, setSaving] = useState(false)
const [saveError, setSaveError] = useState<string | null>(null) const [saveError, setSaveError] = useState<string | null>(null)
@@ -134,7 +138,7 @@ export default function AiTaggingPage() {
</p> </p>
</Field> </Field>
<Field label="Model"> <Field label="Default Model">
<input <input
type="text" type="text"
value={settings.model} value={settings.model}
@@ -150,10 +154,78 @@ export default function AiTaggingPage() {
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/> />
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}> <p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
Model name to use for vision requests. Default model used for all AI tasks unless overridden below.
</p> </p>
</Field> </Field>
<Field label="Tagging Model">
<input
type="text"
value={settings.modelTagging}
onChange={(e) => setSettings((s) => ({ ...s, modelTagging: e.target.value }))}
placeholder="Leave blank to use default"
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/>
</Field>
<Field label="Description Model">
<input
type="text"
value={settings.modelDescribe}
onChange={(e) => setSettings((s) => ({ ...s, modelDescribe: e.target.value }))}
placeholder="Leave blank to use default"
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/>
</Field>
<Field label="Text Extraction Model">
<input
type="text"
value={settings.modelExtract}
onChange={(e) => setSettings((s) => ({ ...s, modelExtract: e.target.value }))}
placeholder="Leave blank to use default"
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/>
</Field>
<Field label="Translation Model">
<input
type="text"
value={settings.modelTranslate}
onChange={(e) => setSettings((s) => ({ ...s, modelTranslate: e.target.value }))}
placeholder="Leave blank to use default"
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/>
</Field>
<Field label="Automatic Tagging"> <Field label="Automatic Tagging">
<label className="flex items-center gap-3 cursor-pointer select-none"> <label className="flex items-center gap-3 cursor-pointer select-none">
<div <div

View File

@@ -362,6 +362,31 @@ export default function MixedView({ libraryId, initialPath }: Props) {
} }
} }
}} }}
onDescribe={async (e) => {
if (e.type === 'directory') {
const dirRel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
const res = await fetch('/api/ai-tagging/describe-bulk', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ libraryId, path: dirRel }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Description generation failed')
}
} else {
const itemKey = itemKeyFor(e)
const res = await fetch('/api/ai-tagging/describe', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Description generation failed')
}
}
}}
onDelete={(e) => { onDelete={(e) => {
const rel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name) const rel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
fetch(`/api/browse?libraryId=${encodeURIComponent(libraryId)}&path=${encodeURIComponent(rel)}`, { method: 'DELETE' }) fetch(`/api/browse?libraryId=${encodeURIComponent(libraryId)}&path=${encodeURIComponent(rel)}`, { method: 'DELETE' })
@@ -491,7 +516,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
) )
} }
function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void> }) { function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText, onDescribe }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void>; onDescribe?: (e: FileEntry) => Promise<void> }) {
type ImgState = 'loading' | 'loaded' | 'error' type ImgState = 'loading' | 'loaded' | 'error'
const [imgState, setImgState] = useState<ImgState>( const [imgState, setImgState] = useState<ImgState>(
entry.thumbnailUrl ? 'loading' : 'error' entry.thumbnailUrl ? 'loading' : 'error'
@@ -508,6 +533,8 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
const [aiTagError, setAiTagError] = useState<string | null>(null) const [aiTagError, setAiTagError] = useState<string | null>(null)
const [textExtracting, setTextExtracting] = useState(false) const [textExtracting, setTextExtracting] = useState(false)
const [textExtractError, setTextExtractError] = useState<string | null>(null) const [textExtractError, setTextExtractError] = useState<string | null>(null)
const [describing, setDescribing] = useState(false)
const [describeError, setDescribeError] = useState<string | null>(null)
useEffect(() => { useEffect(() => {
if (!menuOpen) return if (!menuOpen) return
@@ -538,7 +565,7 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
tabIndex={0} tabIndex={0}
onClick={() => onOpen(entry)} onClick={() => onOpen(entry)}
onKeyDown={(e) => { if (e.key === 'Enter' || e.key === ' ') { e.preventDefault(); onOpen(entry) } }} onKeyDown={(e) => { if (e.key === 'Enter' || e.key === ' ') { e.preventDefault(); onOpen(entry) } }}
className="group relative flex flex-col rounded-xl border overflow-hidden text-xs transition-all cursor-pointer" className="group relative flex flex-col rounded-xl border text-xs transition-all cursor-pointer"
style={{ borderColor: 'var(--border)', backgroundColor: 'var(--surface)', aspectRatio: '1 / 1' }} style={{ borderColor: 'var(--border)', backgroundColor: 'var(--surface)', aspectRatio: '1 / 1' }}
onMouseEnter={(e) => { onMouseEnter={(e) => {
;(e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)' ;(e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)'
@@ -549,6 +576,8 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
;(e.currentTarget as HTMLElement).style.transform = 'translateY(0)' ;(e.currentTarget as HTMLElement).style.transform = 'translateY(0)'
}} }}
> >
{/* Inner wrapper — clips visual content to rounded corners */}
<div className="absolute inset-0 overflow-hidden rounded-xl pointer-events-none">
{/* Thumbnail image — hidden until loaded */} {/* Thumbnail image — hidden until loaded */}
{entry.thumbnailUrl && ( {entry.thumbnailUrl && (
// eslint-disable-next-line @next/next/no-img-element // eslint-disable-next-line @next/next/no-img-element
@@ -606,6 +635,7 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
</div> </div>
)} )}
</div>
{/* Tag button — top-left, shown on hover */} {/* Tag button — top-left, shown on hover */}
<button <button
@@ -618,11 +648,11 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
🏷 🏷
</button> </button>
{/* Kebab menu — top-right, shown on hover */} {/* Kebab menu — bottom-right, shown on hover */}
{(onDelete || onRename || (onAiTag && entry.mediaType === 'image') || (onExtractText && entry.mediaType === 'image') || (onExtractText && entry.type === 'directory')) && ( {(onDelete || onRename || (onAiTag && entry.mediaType === 'image') || (onExtractText && entry.mediaType === 'image') || (onExtractText && entry.type === 'directory') || (onDescribe && (entry.mediaType === 'image' || entry.mediaType === 'video' || entry.type === 'directory'))) && (
<div className="absolute top-2 right-2 opacity-0 group-hover:opacity-100 transition-opacity hidden group-hover:block" ref={menuRef}> <div className="absolute bottom-2 right-2 opacity-0 group-hover:opacity-100 transition-opacity hidden group-hover:block z-10" ref={menuRef}>
<button <button
onClick={(e) => { e.stopPropagation(); setMenuOpen((o) => !o); setConfirming(false); setAiTagError(null) }} onClick={(e) => { e.stopPropagation(); setMenuOpen((o) => !o); setConfirming(false); setAiTagError(null); setDescribeError(null) }}
className="w-6 h-6 rounded-full flex items-center justify-center text-xs" className="w-6 h-6 rounded-full flex items-center justify-center text-xs"
style={{ backgroundColor: 'rgba(0,0,0,0.55)', color: '#fff' }} style={{ backgroundColor: 'rgba(0,0,0,0.55)', color: '#fff' }}
aria-label="More options" aria-label="More options"
@@ -631,7 +661,7 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
</button> </button>
{menuOpen && ( {menuOpen && (
<div <div
className="absolute right-0 top-full mt-1 rounded-lg shadow-lg overflow-hidden z-20 min-w-max" className="absolute right-0 bottom-full mb-1 rounded-lg shadow-lg overflow-hidden z-20 min-w-max"
style={{ backgroundColor: 'var(--surface)', border: '1px solid var(--border)' }} style={{ backgroundColor: 'var(--surface)', border: '1px solid var(--border)' }}
> >
{onAiTag && entry.mediaType === 'image' && ( {onAiTag && entry.mediaType === 'image' && (
@@ -654,6 +684,46 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
AI Tag AI Tag
</button> </button>
)} )}
{onDescribe && (entry.mediaType === 'image' || entry.mediaType === 'video') && (
<button
onClick={(e) => {
e.stopPropagation()
setMenuOpen(false)
setDescribing(true)
setDescribeError(null)
onDescribe(entry)
.catch((err) => setDescribeError(err instanceof Error ? err.message : 'Description generation failed'))
.finally(() => setDescribing(false))
}}
disabled={describing}
className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
style={{ color: 'var(--text-primary)' }}
onMouseEnter={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)')}
onMouseLeave={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'transparent')}
>
📝 Describe
</button>
)}
{onDescribe && entry.type === 'directory' && (
<button
onClick={(e) => {
e.stopPropagation()
setMenuOpen(false)
setDescribing(true)
setDescribeError(null)
onDescribe(entry)
.catch((err) => setDescribeError(err instanceof Error ? err.message : 'Description generation failed'))
.finally(() => setDescribing(false))
}}
disabled={describing}
className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
style={{ color: 'var(--text-primary)' }}
onMouseEnter={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)')}
onMouseLeave={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'transparent')}
>
📝 Describe Folder
</button>
)}
{onExtractText && entry.mediaType === 'image' && ( {onExtractText && entry.mediaType === 'image' && (
<button <button
onClick={(e) => { onClick={(e) => {
@@ -771,6 +841,28 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
</div> </div>
)} )}
{/* Description generation status overlay */}
{(describing || describeError) && (
<div
className="absolute inset-x-0 bottom-0 z-10 px-2 py-1.5 text-xs"
style={{ backgroundColor: describeError ? 'rgba(127,29,29,0.9)' : 'rgba(0,0,0,0.75)' }}
onClick={(e) => e.stopPropagation()}
>
<span style={{ color: describeError ? '#fca5a5' : 'var(--text-secondary)' }}>
{describeError ?? 'Generating description…'}
</span>
{describeError && (
<button
onClick={() => setDescribeError(null)}
className="ml-2 underline text-xs"
style={{ color: '#fca5a5' }}
>
dismiss
</button>
)}
</div>
)}
{/* Delete confirmation overlay */} {/* Delete confirmation overlay */}
{confirming && ( {confirming && (
<div <div

View File

@@ -223,7 +223,8 @@ async function callVisionApi(
*/ */
export async function runAiTagging(library: Library, libraryRoot: string): Promise<void> { export async function runAiTagging(library: Library, libraryRoot: string): Promise<void> {
const config = getAiConfig() const config = getAiConfig()
if (!config.enabled || !config.endpoint || !config.model) return const taggingModel = config.modelTagging || config.model
if (!config.enabled || !config.endpoint || !taggingModel) return
const activeCategoryIds = new Set(getActiveCategoryIdsForLibrary(library.id)) const activeCategoryIds = new Set(getActiveCategoryIdsForLibrary(library.id))
const allTags = getTags() const allTags = getTags()
@@ -285,7 +286,7 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText, extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText,
}) })
const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPrompt) const suggestedIds = await callVisionApi(config.endpoint, taggingModel, base64Images, systemPrompt)
// Filter to valid tags only // Filter to valid tags only
const validIds = suggestedIds.filter((id) => validTagIds.has(id)) const validIds = suggestedIds.filter((id) => validTagIds.has(id))
@@ -317,7 +318,8 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
*/ */
export async function tagSingleItem(itemKey: string): Promise<string[]> { export async function tagSingleItem(itemKey: string): Promise<string[]> {
const config = getAiConfig() const config = getAiConfig()
if (!config.endpoint || !config.model) { const taggingModel = config.modelTagging || config.model
if (!config.endpoint || !taggingModel) {
throw Object.assign(new Error('AI tagging endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) throw Object.assign(new Error('AI tagging endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
} }
@@ -372,7 +374,7 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText, extractedText: aiFields.extractedTextTranslated ?? aiFields.extractedText,
}) })
const suggestedIds = await callVisionApi(config.endpoint, config.model, base64Images, systemPromptWithContext) const suggestedIds = await callVisionApi(config.endpoint, taggingModel, base64Images, systemPromptWithContext)
const validIds = suggestedIds.filter((id) => validTagIds.has(id)) const validIds = suggestedIds.filter((id) => validTagIds.has(id))
for (const tagId of validIds) { for (const tagId of validIds) {
@@ -490,7 +492,8 @@ async function callChatApiText(
*/ */
export async function generateItemDescription(itemKey: string): Promise<string> { export async function generateItemDescription(itemKey: string): Promise<string> {
const config = getAiConfig() const config = getAiConfig()
if (!config.endpoint || !config.model) { const describeModel = config.modelDescribe || config.model
if (!config.endpoint || !describeModel) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
} }
@@ -525,7 +528,7 @@ export async function generateItemDescription(itemKey: string): Promise<string>
const systemPrompt = 'You are a media cataloging assistant. Describe the given image briefly and objectively in 1-3 sentences. Focus on the visual content, subjects, setting, and mood. Do not speculate about context outside the image. Do not preface the description with any phrases like "This image shows" or "This image features". Return only the description text with no additional commentary.' const systemPrompt = 'You are a media cataloging assistant. Describe the given image briefly and objectively in 1-3 sentences. Focus on the visual content, subjects, setting, and mood. Do not speculate about context outside the image. Do not preface the description with any phrases like "This image shows" or "This image features". Return only the description text with no additional commentary.'
const description = await callVisionApiText(config.endpoint, config.model, base64Images, systemPrompt) const description = await callVisionApiText(config.endpoint, describeModel, base64Images, systemPrompt)
db.prepare('UPDATE media_items SET ai_description = ? WHERE item_key = ?').run(description, itemKey) db.prepare('UPDATE media_items SET ai_description = ? WHERE item_key = ?').run(description, itemKey)
@@ -542,7 +545,8 @@ export async function generateItemDescription(itemKey: string): Promise<string>
*/ */
export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> { export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
const config = getAiConfig() const config = getAiConfig()
if (!config.endpoint || !config.model) { const extractModel = config.modelExtract || config.model
if (!config.endpoint || !extractModel) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
} }
@@ -577,7 +581,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
const systemPrompt = 'You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting. Be mindful of different colors of text that may indicate different speakers or emphasis. If there is no text in the image, respond with exactly: [NO TEXT]' const systemPrompt = 'You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting. Be mindful of different colors of text that may indicate different speakers or emphasis. If there is no text in the image, respond with exactly: [NO TEXT]'
const extractedText = await callVisionApiText(config.endpoint, config.model, base64Images, systemPrompt) const extractedText = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt)
if (!extractedText || extractedText === '[NO TEXT]') { if (!extractedText || extractedText === '[NO TEXT]') {
db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey) db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
@@ -590,8 +594,9 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
const preferredLanguage = getPreferredLanguage() const preferredLanguage = getPreferredLanguage()
let translatedText: string | null = null let translatedText: string | null = null
if (preferredLanguage) { if (preferredLanguage) {
const translateModel = config.modelTranslate || config.model
try { try {
translatedText = await translateText(config.endpoint, config.model, extractedText, preferredLanguage) translatedText = await translateText(config.endpoint, translateModel, extractedText, preferredLanguage)
if (translatedText) { if (translatedText) {
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey) db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
} }
@@ -609,7 +614,8 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
*/ */
export async function translateItemText(itemKey: string): Promise<string | null> { export async function translateItemText(itemKey: string): Promise<string | null> {
const config = getAiConfig() const config = getAiConfig()
if (!config.endpoint || !config.model) { const translateModel = config.modelTranslate || config.model
if (!config.endpoint || !translateModel) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
} }
@@ -627,7 +633,7 @@ export async function translateItemText(itemKey: string): Promise<string | null>
const preferredLanguage = getPreferredLanguage() const preferredLanguage = getPreferredLanguage()
if (!preferredLanguage) return null if (!preferredLanguage) return null
const translatedText = await translateText(config.endpoint, config.model, row.extracted_text, preferredLanguage) const translatedText = await translateText(config.endpoint, translateModel, row.extracted_text, preferredLanguage)
if (translatedText) { if (translatedText) {
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey) db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
} }
@@ -662,7 +668,8 @@ async function translateText(
*/ */
export async function extractDirectoryText(libraryId: string, dirPath: string): Promise<number> { export async function extractDirectoryText(libraryId: string, dirPath: string): Promise<number> {
const config = getAiConfig() const config = getAiConfig()
if (!config.endpoint || !config.model) { const extractModel = config.modelExtract || config.model
if (!config.endpoint || !extractModel) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
} }
@@ -706,6 +713,55 @@ export async function extractDirectoryText(libraryId: string, dirPath: string):
return processed return processed
} }
/**
* Generate AI descriptions for all media items in a directory within a mixed library.
* Returns the number of items processed.
*/
export async function describeDirectoryItems(libraryId: string, dirPath: string): Promise<number> {
const config = getAiConfig()
const describeModel = config.modelDescribe || config.model
if (!config.endpoint || !describeModel) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
}
const library = getLibrary(libraryId)
if (!library) {
throw Object.assign(new Error(`Library not found: ${libraryId}`), { code: 'NOT_FOUND' })
}
if (library.type !== 'mixed') {
throw Object.assign(new Error('Description generation is only available for mixed libraries'), { code: 'INVALID_TYPE' })
}
const db = getDb()
const prefix = dirPath
? `${libraryId}:mixed_file:${encodeURIComponent(dirPath + '/')}`
: `${libraryId}:mixed_file:`
const items = db
.prepare('SELECT item_key, item_type, file_path, metadata FROM media_items WHERE item_key LIKE ? AND item_type = ?')
.all(`${prefix}%`, 'mixed_file') as MediaItemRow[]
let processed = 0
for (const item of items) {
if (!item.file_path) continue
const ext = path.extname(item.file_path).toLowerCase()
if (!IMAGE_EXTENSIONS.has(ext) && !VIDEO_EXTENSIONS.has(ext)) continue
try {
await generateItemDescription(item.item_key)
processed++
} catch (err) {
console.warn(
`[ai-tagger] Failed to describe "${item.item_key}":`,
err instanceof Error ? err.message : err
)
}
}
return processed
}
/** /**
* Get the AI fields (description, extracted text, translation) for a media item. * Get the AI fields (description, extracted text, translation) for a media item.
*/ */

View File

@@ -42,20 +42,40 @@ export function setScanLastRan(ts: number): void {
interface AiConfig { interface AiConfig {
endpoint: string endpoint: string
model: string model: string
modelTagging: string
modelDescribe: string
modelExtract: string
modelTranslate: string
enabled: boolean enabled: boolean
} }
export function getAiConfig(): AiConfig { export function getAiConfig(): AiConfig {
const endpoint = getSetting('ai_endpoint') ?? '' const endpoint = getSetting('ai_endpoint') ?? ''
const model = getSetting('ai_model') ?? '' const model = getSetting('ai_model') ?? ''
const modelTagging = getSetting('ai_model_tagging') ?? ''
const modelDescribe = getSetting('ai_model_describe') ?? ''
const modelExtract = getSetting('ai_model_extract') ?? ''
const modelTranslate = getSetting('ai_model_translate') ?? ''
const enabled = getSetting('ai_enabled') === 'true' const enabled = getSetting('ai_enabled') === 'true'
return { endpoint, model, enabled } return { endpoint, model, modelTagging, modelDescribe, modelExtract, modelTranslate, enabled }
} }
export function updateAiConfig(endpoint: string, model: string, enabled: boolean): void { export function updateAiConfig(
endpoint: string,
model: string,
enabled: boolean,
modelTagging?: string,
modelDescribe?: string,
modelExtract?: string,
modelTranslate?: string,
): void {
setSetting('ai_endpoint', endpoint) setSetting('ai_endpoint', endpoint)
setSetting('ai_model', model) setSetting('ai_model', model)
setSetting('ai_enabled', enabled ? 'true' : 'false') setSetting('ai_enabled', enabled ? 'true' : 'false')
if (modelTagging !== undefined) setSetting('ai_model_tagging', modelTagging)
if (modelDescribe !== undefined) setSetting('ai_model_describe', modelDescribe)
if (modelExtract !== undefined) setSetting('ai_model_extract', modelExtract)
if (modelTranslate !== undefined) setSetting('ai_model_translate', modelTranslate)
} }
export function getPreferredLanguage(): string { export function getPreferredLanguage(): string {