diff --git a/src/app/api/ai-settings/route.ts b/src/app/api/ai-settings/route.ts index 593f37a..09e9435 100644 --- a/src/app/api/ai-settings/route.ts +++ b/src/app/api/ai-settings/route.ts @@ -1,27 +1,28 @@ import { NextRequest, NextResponse } from 'next/server' import { requireAdmin } from '@/lib/auth' -import { getAiConfig, updateAiConfig } from '@/lib/app-settings' +import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage } from '@/lib/app-settings' export async function GET(request: NextRequest) { const auth = await requireAdmin(request) if (auth instanceof NextResponse) return auth const { endpoint, model, enabled } = getAiConfig() - return NextResponse.json({ endpoint, model, enabled }) + const preferredLanguage = getPreferredLanguage() + return NextResponse.json({ endpoint, model, enabled, preferredLanguage }) } export async function PUT(request: NextRequest) { const auth = await requireAdmin(request) if (auth instanceof NextResponse) return auth - let body: { endpoint?: string; model?: string; enabled?: boolean } + let body: { endpoint?: string; model?: string; enabled?: boolean; preferredLanguage?: string } try { body = await request.json() } catch { return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) } - const { endpoint, model, enabled } = body + const { endpoint, model, enabled, preferredLanguage } = body if (typeof endpoint !== 'string') { return NextResponse.json({ error: 'endpoint is required' }, { status: 400 }) @@ -34,5 +35,10 @@ export async function PUT(request: NextRequest) { } updateAiConfig(endpoint, model, enabled) - return NextResponse.json({ endpoint, model, enabled }) + + if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) { + setPreferredLanguage(preferredLanguage.trim()) + } + + return NextResponse.json({ endpoint, model, enabled, preferredLanguage: getPreferredLanguage() }) } diff --git a/src/app/api/ai-tagging/describe/route.ts b/src/app/api/ai-tagging/describe/route.ts new file mode 100644 index 0000000..5121c65 --- /dev/null +++ b/src/app/api/ai-tagging/describe/route.ts @@ -0,0 +1,39 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireLibraryAccess } from '@/lib/auth' +import { generateItemDescription } from '@/lib/ai-tagger' + +export async function POST(request: NextRequest) { + let body: { itemKey?: string } + try { + body = await request.json() + } catch { + return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) + } + + const { itemKey } = body + if (!itemKey || typeof itemKey !== 'string') { + return NextResponse.json({ error: 'itemKey is required' }, { status: 400 }) + } + + const libraryId = itemKey.split(':')[0] + const auth = await requireLibraryAccess(request, libraryId) + if (auth instanceof NextResponse) return auth + + try { + const description = await generateItemDescription(itemKey) + return NextResponse.json({ description }) + } catch (err) { + const error = err as Error & { code?: string } + if (error.code === 'NOT_CONFIGURED') { + return NextResponse.json({ error: error.message }, { status: 400 }) + } + if (error.code === 'NOT_FOUND') { + return NextResponse.json({ error: error.message }, { status: 404 }) + } + if (error.code === 'NO_IMAGE') { + return NextResponse.json({ error: error.message }, { status: 404 }) + } + console.error('[ai-tagging/describe] Error:', error) + return NextResponse.json({ error: 'Failed to generate description' }, { status: 502 }) + } +} diff --git a/src/app/api/ai-tagging/extract-text-bulk/route.ts b/src/app/api/ai-tagging/extract-text-bulk/route.ts new file mode 100644 index 0000000..196ca19 --- /dev/null +++ b/src/app/api/ai-tagging/extract-text-bulk/route.ts @@ -0,0 +1,38 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireLibraryAccess } from '@/lib/auth' +import { extractDirectoryText } from '@/lib/ai-tagger' + +export async function POST(request: NextRequest) { + let body: { libraryId?: string; path?: string } + try { + body = await request.json() + } catch { + return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) + } + + const { libraryId, path: dirPath } = body + if (!libraryId || typeof libraryId !== 'string') { + return NextResponse.json({ error: 'libraryId is required' }, { status: 400 }) + } + + const auth = await requireLibraryAccess(request, libraryId) + if (auth instanceof NextResponse) return auth + + try { + const processed = await extractDirectoryText(libraryId, dirPath ?? '') + return NextResponse.json({ processed }) + } catch (err) { + const error = err as Error & { code?: string } + if (error.code === 'NOT_CONFIGURED') { + return NextResponse.json({ error: error.message }, { status: 400 }) + } + if (error.code === 'NOT_FOUND') { + return NextResponse.json({ error: error.message }, { status: 404 }) + } + if (error.code === 'INVALID_TYPE') { + return NextResponse.json({ error: error.message }, { status: 400 }) + } + console.error('[ai-tagging/extract-text-bulk] Error:', error) + return NextResponse.json({ error: 'Failed to extract text' }, { status: 502 }) + } +} diff --git a/src/app/api/ai-tagging/extract-text/route.ts b/src/app/api/ai-tagging/extract-text/route.ts new file mode 100644 index 0000000..58de630 --- /dev/null +++ b/src/app/api/ai-tagging/extract-text/route.ts @@ -0,0 +1,39 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireLibraryAccess } from '@/lib/auth' +import { extractItemText } from '@/lib/ai-tagger' + +export async function POST(request: NextRequest) { + let body: { itemKey?: string } + try { + body = await request.json() + } catch { + return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) + } + + const { itemKey } = body + if (!itemKey || typeof itemKey !== 'string') { + return NextResponse.json({ error: 'itemKey is required' }, { status: 400 }) + } + + const libraryId = itemKey.split(':')[0] + const auth = await requireLibraryAccess(request, libraryId) + if (auth instanceof NextResponse) return auth + + try { + const result = await extractItemText(itemKey) + return NextResponse.json(result) + } catch (err) { + const error = err as Error & { code?: string } + if (error.code === 'NOT_CONFIGURED') { + return NextResponse.json({ error: error.message }, { status: 400 }) + } + if (error.code === 'NOT_FOUND') { + return NextResponse.json({ error: error.message }, { status: 404 }) + } + if (error.code === 'NO_IMAGE' || error.code === 'INVALID_TYPE') { + return NextResponse.json({ error: error.message }, { status: 400 }) + } + console.error('[ai-tagging/extract-text] Error:', error) + return NextResponse.json({ error: 'Failed to extract text' }, { status: 502 }) + } +} diff --git a/src/app/api/ai-tagging/fields/route.ts b/src/app/api/ai-tagging/fields/route.ts new file mode 100644 index 0000000..ee647aa --- /dev/null +++ b/src/app/api/ai-tagging/fields/route.ts @@ -0,0 +1,19 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireLibraryAccess } from '@/lib/auth' +import { getAiFields } from '@/lib/ai-tagger' + +export async function GET(request: NextRequest) { + const { searchParams } = request.nextUrl + const itemKey = searchParams.get('itemKey') + + if (!itemKey) { + return NextResponse.json({ error: 'Missing itemKey' }, { status: 400 }) + } + + const libraryId = itemKey.split(':')[0] + const auth = await requireLibraryAccess(request, libraryId) + if (auth instanceof NextResponse) return auth + + const fields = getAiFields(itemKey) + return NextResponse.json(fields) +} diff --git a/src/app/api/ai-tagging/translate/route.ts b/src/app/api/ai-tagging/translate/route.ts new file mode 100644 index 0000000..740d9fb --- /dev/null +++ b/src/app/api/ai-tagging/translate/route.ts @@ -0,0 +1,36 @@ +import { NextRequest, NextResponse } from 'next/server' +import { requireLibraryAccess } from '@/lib/auth' +import { translateItemText } from '@/lib/ai-tagger' + +export async function POST(request: NextRequest) { + let body: { itemKey?: string } + try { + body = await request.json() + } catch { + return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) + } + + const { itemKey } = body + if (!itemKey || typeof itemKey !== 'string') { + return NextResponse.json({ error: 'itemKey is required' }, { status: 400 }) + } + + const libraryId = itemKey.split(':')[0] + const auth = await requireLibraryAccess(request, libraryId) + if (auth instanceof NextResponse) return auth + + try { + const translatedText = await translateItemText(itemKey) + return NextResponse.json({ translatedText }) + } catch (err) { + const error = err as Error & { code?: string } + if (error.code === 'NOT_CONFIGURED') { + return NextResponse.json({ error: error.message }, { status: 400 }) + } + if (error.code === 'NOT_FOUND') { + return NextResponse.json({ error: error.message }, { status: 404 }) + } + console.error('[ai-tagging/translate] Error:', error) + return NextResponse.json({ error: 'Failed to translate text' }, { status: 502 }) + } +} diff --git a/src/app/manage/ai-tagging/page.tsx b/src/app/manage/ai-tagging/page.tsx index b8bf1b0..4cba8a4 100644 --- a/src/app/manage/ai-tagging/page.tsx +++ b/src/app/manage/ai-tagging/page.tsx @@ -6,10 +6,11 @@ interface AiSettings { endpoint: string model: string enabled: boolean + preferredLanguage: string } export default function AiTaggingPage() { - const [settings, setSettings] = useState({ endpoint: '', model: '', enabled: false }) + const [settings, setSettings] = useState({ endpoint: '', model: '', enabled: false, preferredLanguage: 'English' }) const [loading, setLoading] = useState(true) const [saving, setSaving] = useState(false) const [saveError, setSaveError] = useState(null) @@ -178,6 +179,26 @@ export default function AiTaggingPage() {

+ + setSettings((s) => ({ ...s, preferredLanguage: e.target.value }))} + placeholder="English" + className="w-full rounded-lg px-3 py-2 text-sm outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> +

+ Language used for translating extracted text. Text not in this language will be automatically translated. +

+
+ {saveError && (

(null) const [tagRefreshKey, setTagRefreshKey] = useState(0) + // Text extraction state + const [extractedText, setExtractedText] = useState(null) + const [translatedText, setTranslatedText] = useState(null) + const [extracting, setExtracting] = useState(false) + const [extractError, setExtractError] = useState(null) + const [retranslating, setRetranslating] = useState(false) + + // Determine if this is an image file (for text extraction controls) + const isImage = /\.(jpe?g|png|gif|webp|bmp|tiff?)$/i.test(name) + + // Fetch existing AI fields on mount / item change + useEffect(() => { + if (!itemKey) return + fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(itemKey)}`) + .then((r) => r.json()) + .then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => { + setExtractedText(data.extractedText) + setTranslatedText(data.extractedTextTranslated) + }) + .catch(() => {}) + }, [itemKey]) + useEffect(() => { const handleKey = (e: KeyboardEvent) => { if (e.key === 'Escape') onClose() @@ -168,6 +190,128 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item Tags

+ + {/* Text extraction section β€” only for images */} + {isImage && ( +
+

+ Text Extraction +

+ + + + {extractError && ( +

{extractError}

+ )} + + {extractedText && ( +
+
+

+ Extracted Text +

+
+                        {extractedText}
+                      
+
+ + {translatedText && ( +
+

+ Translation +

+
+                          {translatedText}
+                        
+
+ )} + + +
+ )} +
+ )} ) : ( diff --git a/src/components/mixed/MixedView.tsx b/src/components/mixed/MixedView.tsx index 42bc4db..c52a1ba 100644 --- a/src/components/mixed/MixedView.tsx +++ b/src/components/mixed/MixedView.tsx @@ -335,6 +335,33 @@ export default function MixedView({ libraryId, initialPath }: Props) { fetchAssignments() setFilterRefreshKey((k) => k + 1) }} + onExtractText={async (e) => { + if (e.type === 'directory') { + // Bulk extract for directory + const dirRel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name) + const res = await fetch('/api/ai-tagging/extract-text-bulk', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ libraryId, path: dirRel }), + }) + if (!res.ok) { + const data = await res.json().catch(() => ({})) + throw new Error((data as { error?: string }).error ?? 'Text extraction failed') + } + } else { + // Single image extract + const itemKey = itemKeyFor(e) + const res = await fetch('/api/ai-tagging/extract-text', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ itemKey }), + }) + if (!res.ok) { + const data = await res.json().catch(() => ({})) + throw new Error((data as { error?: string }).error ?? 'Text extraction failed') + } + } + }} onDelete={(e) => { const rel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name) fetch(`/api/browse?libraryId=${encodeURIComponent(libraryId)}&path=${encodeURIComponent(rel)}`, { method: 'DELETE' }) @@ -464,7 +491,7 @@ export default function MixedView({ libraryId, initialPath }: Props) { ) } -function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise; onAiTag?: (e: FileEntry) => Promise }) { +function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise; onAiTag?: (e: FileEntry) => Promise; onExtractText?: (e: FileEntry) => Promise }) { type ImgState = 'loading' | 'loaded' | 'error' const [imgState, setImgState] = useState( entry.thumbnailUrl ? 'loading' : 'error' @@ -479,6 +506,8 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entr const [entryRenameSaving, setEntryRenameSaving] = useState(false) const [aiTagging, setAiTagging] = useState(false) const [aiTagError, setAiTagError] = useState(null) + const [textExtracting, setTextExtracting] = useState(false) + const [textExtractError, setTextExtractError] = useState(null) useEffect(() => { if (!menuOpen) return @@ -590,7 +619,7 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entr {/* Kebab menu β€” top-right, shown on hover */} - {(onDelete || onRename || (onAiTag && entry.mediaType === 'image')) && ( + {(onDelete || onRename || (onAiTag && entry.mediaType === 'image') || (onExtractText && entry.mediaType === 'image') || (onExtractText && entry.type === 'directory')) && (
)} + {onExtractText && entry.mediaType === 'image' && ( + + )} + {onExtractText && entry.type === 'directory' && ( + + )} {onRename && (
)} + {/* Text extraction status overlay */} + {(textExtracting || textExtractError) && ( +
e.stopPropagation()} + > + + {textExtractError ?? 'Extracting text…'} + + {textExtractError && ( + + )} +
+ )} + {/* Delete confirmation overlay */} {confirming && (
(null) + // AI description state + const [aiDescription, setAiDescription] = useState(null) + const [generatingDesc, setGeneratingDesc] = useState(false) + const [descError, setDescError] = useState(null) + // Per-category search text const [categorySearches, setCategorySearches] = useState>({}) @@ -54,10 +59,19 @@ export default function TagSelector({ itemKey, onTagsChanged, refreshKey }: Prop }) }, []) + const fetchAiFields = useCallback(() => { + return fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(itemKey)}`) + .then((r) => r.json()) + .then((data: { aiDescription: string | null }) => { + setAiDescription(data.aiDescription) + }) + .catch(() => {}) + }, [itemKey]) + useEffect(() => { setLoading(true) - Promise.all([fetchAssigned(), fetchAll()]).finally(() => setLoading(false)) - }, [fetchAssigned, fetchAll]) + Promise.all([fetchAssigned(), fetchAll(), fetchAiFields()]).finally(() => setLoading(false)) + }, [fetchAssigned, fetchAll, fetchAiFields]) useEffect(() => { if (refreshKey !== undefined && refreshKey > 0) { @@ -165,8 +179,63 @@ export default function TagSelector({ itemKey, onTagsChanged, refreshKey }: Prop const assignedCategoryMap = Object.fromEntries(assigned.categories.map((c) => [c.id, c])) + const handleGenerateDescription = async () => { + setGeneratingDesc(true) + setDescError(null) + try { + const res = await fetch('/api/ai-tagging/describe', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ itemKey }), + }) + if (!res.ok) { + const data = await res.json().catch(() => ({})) + throw new Error((data as { error?: string }).error ?? 'Failed to generate description') + } + const { description } = await res.json() + setAiDescription(description) + } catch (err) { + setDescError(err instanceof Error ? err.message : 'Failed to generate description') + setTimeout(() => setDescError(null), 4000) + } finally { + setGeneratingDesc(false) + } + } + return (
+ {/* AI description */} +
+ {aiDescription && ( +

+ {aiDescription} +

+ )} +
+ + {descError && ( + {descError} + )} +
+
{/* Assigned tags grouped by category */} {assigned.tags.length > 0 && (
diff --git a/src/lib/ai-tagger.ts b/src/lib/ai-tagger.ts index cc704bb..f9959cb 100644 --- a/src/lib/ai-tagger.ts +++ b/src/lib/ai-tagger.ts @@ -2,7 +2,7 @@ import fs from 'fs' import path from 'path' import type { Library, Tag, TagCategory } from '@/types' import { getDb } from './db' -import { getAiConfig } from './app-settings' +import { getAiConfig, getPreferredLanguage } from './app-settings' import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags' import { getThumbnailPath, getVideoFramePaths } from './thumbnails' import { findFile } from './media-utils' @@ -351,3 +351,343 @@ export async function tagSingleItem(itemKey: string): Promise { return validIds } + +// ─── Vision / Chat text helpers ────────────────────────────────────────────── + +/** + * Call the vision API and return raw text content (no JSON parsing). + */ +async function callVisionApiText( + endpoint: string, + model: string, + base64Images: string[], + systemPrompt: string +): Promise { + const url = endpoint.replace(/\/+$/, '') + '/chat/completions' + + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS) + + try { + const res = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + signal: controller.signal, + body: JSON.stringify({ + model, + messages: [ + { role: 'system', content: systemPrompt }, + { + role: 'user', + content: base64Images.map((b64) => ({ + type: 'image_url', + image_url: { url: `data:image/jpeg;base64,${b64}` }, + })), + }, + ], + max_tokens: 8192, + temperature: 0.1, + }), + }) + + if (!res.ok) { + const text = await res.text().catch(() => '') + throw new Error(`LLM API returned ${res.status}: ${text.slice(0, 200)}`) + } + + const data = await res.json() as { + choices?: Array<{ message?: { content?: string } }> + } + + return data.choices?.[0]?.message?.content?.trim() ?? '' + } finally { + clearTimeout(timeout) + } +} + +/** + * Call the chat completions API with text-only input (no images). + */ +async function callChatApiText( + endpoint: string, + model: string, + systemPrompt: string, + userMessage: string +): Promise { + const url = endpoint.replace(/\/+$/, '') + '/chat/completions' + + const controller = new AbortController() + const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS) + + try { + const res = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + signal: controller.signal, + body: JSON.stringify({ + model, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userMessage }, + ], + max_tokens: 8192, + temperature: 0.1, + }), + }) + + if (!res.ok) { + const text = await res.text().catch(() => '') + throw new Error(`LLM API returned ${res.status}: ${text.slice(0, 200)}`) + } + + const data = await res.json() as { + choices?: Array<{ message?: { content?: string } }> + } + + return data.choices?.[0]?.message?.content?.trim() ?? '' + } finally { + clearTimeout(timeout) + } +} + +// ─── AI description ────────────────────────────────────────────────────────── + +/** + * Generate an AI description for a media item using a vision model. + * Stores the result in the ai_description column and returns it. + */ +export async function generateItemDescription(itemKey: string): Promise { + const config = getAiConfig() + if (!config.endpoint || !config.model) { + throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) + } + + const libraryId = itemKey.split(':')[0] + const db = getDb() + const item = db + .prepare('SELECT item_key, item_type, file_path, metadata FROM media_items WHERE item_key = ?') + .get(itemKey) as MediaItemRow | undefined + if (!item) { + throw Object.assign(new Error(`Item not found: ${itemKey}`), { code: 'NOT_FOUND' }) + } + + const library = getLibrary(libraryId) + if (!library) { + throw Object.assign(new Error(`Library not found: ${libraryId}`), { code: 'NOT_FOUND' }) + } + const libraryRoot = resolveLibraryRoot(library) + + const resolvedMedia = resolveItemImage(libraryRoot, item) + if (!resolvedMedia) { + throw Object.assign(new Error('No image available for this item'), { code: 'NO_IMAGE' }) + } + + let base64Images: string[] + if (resolvedMedia.mediaType === 'video') { + const framePaths = await getVideoFramePaths(resolvedMedia.path, libraryId, VIDEO_FRAME_PERCENTAGES) + base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64')) + } else { + const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image') + base64Images = [fs.readFileSync(thumbnailPath, 'base64')] + } + + const systemPrompt = 'You are a media cataloging assistant. Describe the given image briefly and objectively in 1-3 sentences. Focus on the visual content, subjects, setting, and mood. Do not speculate about context outside the image. Do not preface the description with any phrases like "This image shows" or "This image features". Return only the description text with no additional commentary.' + + const description = await callVisionApiText(config.endpoint, config.model, base64Images, systemPrompt) + + db.prepare('UPDATE media_items SET ai_description = ? WHERE item_key = ?').run(description, itemKey) + + return description +} + +// ─── Text extraction ───────────────────────────────────────────────────────── + +/** + * Extract text (OCR) from an image using the vision model. + * Only works for images in mixed libraries. + * If the extracted text is not in the user's preferred language, auto-translates it. + * Returns { extractedText, translatedText }. + */ +export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> { + const config = getAiConfig() + if (!config.endpoint || !config.model) { + throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) + } + + const libraryId = itemKey.split(':')[0] + const db = getDb() + const item = db + .prepare('SELECT item_key, item_type, file_path, metadata FROM media_items WHERE item_key = ?') + .get(itemKey) as MediaItemRow | undefined + if (!item) { + throw Object.assign(new Error(`Item not found: ${itemKey}`), { code: 'NOT_FOUND' }) + } + if (item.item_type !== 'mixed_file') { + throw Object.assign(new Error('Text extraction is only available for mixed library items'), { code: 'INVALID_TYPE' }) + } + + const library = getLibrary(libraryId) + if (!library) { + throw Object.assign(new Error(`Library not found: ${libraryId}`), { code: 'NOT_FOUND' }) + } + if (library.type !== 'mixed') { + throw Object.assign(new Error('Text extraction is only available for mixed libraries'), { code: 'INVALID_TYPE' }) + } + const libraryRoot = resolveLibraryRoot(library) + + const resolvedMedia = resolveItemImage(libraryRoot, item) + if (!resolvedMedia || resolvedMedia.mediaType !== 'image') { + throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' }) + } + + const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image') + const base64Images = [fs.readFileSync(thumbnailPath, 'base64')] + + const systemPrompt = 'You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting. Be mindful of different colors of text that may indicate different speakers or emphasis. If there is no text in the image, respond with exactly: [NO TEXT]' + + const extractedText = await callVisionApiText(config.endpoint, config.model, base64Images, systemPrompt) + + if (!extractedText || extractedText === '[NO TEXT]') { + db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey) + return { extractedText: '', translatedText: null } + } + + db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(extractedText, itemKey) + + // Auto-translate if preferred language is set + const preferredLanguage = getPreferredLanguage() + let translatedText: string | null = null + if (preferredLanguage) { + try { + translatedText = await translateText(config.endpoint, config.model, extractedText, preferredLanguage) + if (translatedText) { + db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey) + } + } catch (err) { + console.warn(`[ai-tagger] Translation failed for "${itemKey}":`, err instanceof Error ? err.message : err) + } + } + + return { extractedText, translatedText } +} + +/** + * Translate the extracted_text of an item into the preferred language. + * Returns the translated text or null if no text to translate. + */ +export async function translateItemText(itemKey: string): Promise { + const config = getAiConfig() + if (!config.endpoint || !config.model) { + throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) + } + + const db = getDb() + const row = db + .prepare('SELECT extracted_text FROM media_items WHERE item_key = ?') + .get(itemKey) as { extracted_text: string | null } | undefined + if (!row) { + throw Object.assign(new Error(`Item not found: ${itemKey}`), { code: 'NOT_FOUND' }) + } + if (!row.extracted_text) { + return null + } + + const preferredLanguage = getPreferredLanguage() + if (!preferredLanguage) return null + + const translatedText = await translateText(config.endpoint, config.model, row.extracted_text, preferredLanguage) + if (translatedText) { + db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey) + } + + return translatedText +} + +/** + * Translate text to a target language using the chat API. + * Returns null if the text is already in the target language. + */ +async function translateText( + endpoint: string, + model: string, + text: string, + targetLanguage: string +): Promise { + const systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}. Return ONLY the translated text with no additional commentary.` + + const result = await callChatApiText(endpoint, model, systemPrompt, text) + + if (result === '[ALREADY_TARGET_LANGUAGE]' || !result) { + return null + } + + return result +} + +/** + * Extract text from all images in a directory within a mixed library. + * Returns the number of items processed. + */ +export async function extractDirectoryText(libraryId: string, dirPath: string): Promise { + const config = getAiConfig() + if (!config.endpoint || !config.model) { + throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' }) + } + + const library = getLibrary(libraryId) + if (!library) { + throw Object.assign(new Error(`Library not found: ${libraryId}`), { code: 'NOT_FOUND' }) + } + if (library.type !== 'mixed') { + throw Object.assign(new Error('Text extraction is only available for mixed libraries'), { code: 'INVALID_TYPE' }) + } + + const db = getDb() + const prefix = dirPath + ? `${libraryId}:mixed_file:${encodeURIComponent(dirPath + '/')}` + : `${libraryId}:mixed_file:` + + const items = db + .prepare('SELECT item_key, item_type, file_path, metadata FROM media_items WHERE item_key LIKE ? AND item_type = ?') + .all(`${prefix}%`, 'mixed_file') as MediaItemRow[] + + const libraryRoot = resolveLibraryRoot(library) + let processed = 0 + + for (const item of items) { + // Only process images + if (!item.file_path) continue + const ext = path.extname(item.file_path).toLowerCase() + if (!IMAGE_EXTENSIONS.has(ext)) continue + + try { + await extractItemText(item.item_key) + processed++ + } catch (err) { + console.warn( + `[ai-tagger] Failed to extract text from "${item.item_key}":`, + err instanceof Error ? err.message : err + ) + } + } + + return processed +} + +/** + * Get the AI fields (description, extracted text, translation) for a media item. + */ +export function getAiFields(itemKey: string): { aiDescription: string | null; extractedText: string | null; extractedTextTranslated: string | null } { + const db = getDb() + const row = db + .prepare('SELECT ai_description, extracted_text, extracted_text_translated FROM media_items WHERE item_key = ?') + .get(itemKey) as { ai_description: string | null; extracted_text: string | null; extracted_text_translated: string | null } | undefined + if (!row) { + return { aiDescription: null, extractedText: null, extractedTextTranslated: null } + } + return { + aiDescription: row.ai_description, + extractedText: row.extracted_text, + extractedTextTranslated: row.extracted_text_translated, + } +} diff --git a/src/lib/app-settings.ts b/src/lib/app-settings.ts index f45dbf1..59d982e 100644 --- a/src/lib/app-settings.ts +++ b/src/lib/app-settings.ts @@ -57,3 +57,11 @@ export function updateAiConfig(endpoint: string, model: string, enabled: boolean setSetting('ai_model', model) setSetting('ai_enabled', enabled ? 'true' : 'false') } + +export function getPreferredLanguage(): string { + return getSetting('preferred_language') ?? 'English' +} + +export function setPreferredLanguage(language: string): void { + setSetting('preferred_language', language) +} diff --git a/src/lib/db.ts b/src/lib/db.ts index 11e4905..edc5383 100644 --- a/src/lib/db.ts +++ b/src/lib/db.ts @@ -103,6 +103,7 @@ function initDb(db: Database.Database): void { migrateMediaItemsFingerprint(db) migrateMediaTagsToItemKey(db) migrateMediaItemsAiTagged(db) + migrateMediaItemsAiFields(db) seedAppSettings(db) } @@ -114,6 +115,7 @@ function seedAppSettings(db: Database.Database): void { ai_enabled: 'false', ai_endpoint: '', ai_model: '', + preferred_language: 'English', } const insert = db.prepare( 'INSERT OR IGNORE INTO app_settings (key, value) VALUES (?, ?)' @@ -241,6 +243,22 @@ function migrateMediaItemsAiTagged(db: Database.Database): void { } } +function migrateMediaItemsAiFields(db: Database.Database): void { + const row = db + .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='media_items'") + .get() as { sql: string } | undefined + if (!row) return + if (!row.sql.includes('ai_description')) { + db.exec('ALTER TABLE media_items ADD COLUMN ai_description TEXT') + } + if (!row.sql.includes('extracted_text')) { + db.exec('ALTER TABLE media_items ADD COLUMN extracted_text TEXT') + } + if (!row.sql.includes('extracted_text_translated')) { + db.exec('ALTER TABLE media_items ADD COLUMN extracted_text_translated TEXT') + } +} + function migrateLibrariesType(db: Database.Database): void { const row = db .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='libraries'")