ai-descriptions #21

Merged
gpatti merged 3 commits from ai-descriptions into main 2026-04-12 23:55:05 +00:00
13 changed files with 879 additions and 11 deletions
Showing only changes of commit 7e284383b4 - Show all commits

View File

@@ -1,27 +1,28 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireAdmin } from '@/lib/auth'
import { getAiConfig, updateAiConfig } from '@/lib/app-settings'
import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage } from '@/lib/app-settings'
export async function GET(request: NextRequest) {
const auth = await requireAdmin(request)
if (auth instanceof NextResponse) return auth
const { endpoint, model, enabled } = getAiConfig()
return NextResponse.json({ endpoint, model, enabled })
const preferredLanguage = getPreferredLanguage()
return NextResponse.json({ endpoint, model, enabled, preferredLanguage })
}
export async function PUT(request: NextRequest) {
const auth = await requireAdmin(request)
if (auth instanceof NextResponse) return auth
let body: { endpoint?: string; model?: string; enabled?: boolean }
let body: { endpoint?: string; model?: string; enabled?: boolean; preferredLanguage?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { endpoint, model, enabled } = body
const { endpoint, model, enabled, preferredLanguage } = body
if (typeof endpoint !== 'string') {
return NextResponse.json({ error: 'endpoint is required' }, { status: 400 })
@@ -34,5 +35,10 @@ export async function PUT(request: NextRequest) {
}
updateAiConfig(endpoint, model, enabled)
return NextResponse.json({ endpoint, model, enabled })
if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) {
setPreferredLanguage(preferredLanguage.trim())
}
return NextResponse.json({ endpoint, model, enabled, preferredLanguage: getPreferredLanguage() })
}

View File

@@ -0,0 +1,39 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth'
import { generateItemDescription } from '@/lib/ai-tagger'
export async function POST(request: NextRequest) {
let body: { itemKey?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { itemKey } = body
if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
}
const libraryId = itemKey.split(':')[0]
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
try {
const description = await generateItemDescription(itemKey)
return NextResponse.json({ description })
} catch (err) {
const error = err as Error & { code?: string }
if (error.code === 'NOT_CONFIGURED') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
if (error.code === 'NOT_FOUND') {
return NextResponse.json({ error: error.message }, { status: 404 })
}
if (error.code === 'NO_IMAGE') {
return NextResponse.json({ error: error.message }, { status: 404 })
}
console.error('[ai-tagging/describe] Error:', error)
return NextResponse.json({ error: 'Failed to generate description' }, { status: 502 })
}
}

View File

@@ -0,0 +1,38 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth'
import { extractDirectoryText } from '@/lib/ai-tagger'
export async function POST(request: NextRequest) {
let body: { libraryId?: string; path?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { libraryId, path: dirPath } = body
if (!libraryId || typeof libraryId !== 'string') {
return NextResponse.json({ error: 'libraryId is required' }, { status: 400 })
}
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
try {
const processed = await extractDirectoryText(libraryId, dirPath ?? '')
return NextResponse.json({ processed })
} catch (err) {
const error = err as Error & { code?: string }
if (error.code === 'NOT_CONFIGURED') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
if (error.code === 'NOT_FOUND') {
return NextResponse.json({ error: error.message }, { status: 404 })
}
if (error.code === 'INVALID_TYPE') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
console.error('[ai-tagging/extract-text-bulk] Error:', error)
return NextResponse.json({ error: 'Failed to extract text' }, { status: 502 })
}
}

View File

@@ -0,0 +1,39 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth'
import { extractItemText } from '@/lib/ai-tagger'
export async function POST(request: NextRequest) {
let body: { itemKey?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { itemKey } = body
if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
}
const libraryId = itemKey.split(':')[0]
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
try {
const result = await extractItemText(itemKey)
return NextResponse.json(result)
} catch (err) {
const error = err as Error & { code?: string }
if (error.code === 'NOT_CONFIGURED') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
if (error.code === 'NOT_FOUND') {
return NextResponse.json({ error: error.message }, { status: 404 })
}
if (error.code === 'NO_IMAGE' || error.code === 'INVALID_TYPE') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
console.error('[ai-tagging/extract-text] Error:', error)
return NextResponse.json({ error: 'Failed to extract text' }, { status: 502 })
}
}

View File

@@ -0,0 +1,19 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth'
import { getAiFields } from '@/lib/ai-tagger'
export async function GET(request: NextRequest) {
const { searchParams } = request.nextUrl
const itemKey = searchParams.get('itemKey')
if (!itemKey) {
return NextResponse.json({ error: 'Missing itemKey' }, { status: 400 })
}
const libraryId = itemKey.split(':')[0]
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
const fields = getAiFields(itemKey)
return NextResponse.json(fields)
}

View File

@@ -0,0 +1,36 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth'
import { translateItemText } from '@/lib/ai-tagger'
export async function POST(request: NextRequest) {
let body: { itemKey?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { itemKey } = body
if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
}
const libraryId = itemKey.split(':')[0]
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
try {
const translatedText = await translateItemText(itemKey)
return NextResponse.json({ translatedText })
} catch (err) {
const error = err as Error & { code?: string }
if (error.code === 'NOT_CONFIGURED') {
return NextResponse.json({ error: error.message }, { status: 400 })
}
if (error.code === 'NOT_FOUND') {
return NextResponse.json({ error: error.message }, { status: 404 })
}
console.error('[ai-tagging/translate] Error:', error)
return NextResponse.json({ error: 'Failed to translate text' }, { status: 502 })
}
}

View File

@@ -6,10 +6,11 @@ interface AiSettings {
endpoint: string
model: string
enabled: boolean
preferredLanguage: string
}
export default function AiTaggingPage() {
const [settings, setSettings] = useState<AiSettings>({ endpoint: '', model: '', enabled: false })
const [settings, setSettings] = useState<AiSettings>({ endpoint: '', model: '', enabled: false, preferredLanguage: 'English' })
const [loading, setLoading] = useState(true)
const [saving, setSaving] = useState(false)
const [saveError, setSaveError] = useState<string | null>(null)
@@ -178,6 +179,26 @@ export default function AiTaggingPage() {
</p>
</Field>
<Field label="Preferred Language">
<input
type="text"
value={settings.preferredLanguage}
onChange={(e) => setSettings((s) => ({ ...s, preferredLanguage: e.target.value }))}
placeholder="English"
className="w-full rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/>
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
Language used for translating extracted text. Text not in this language will be automatically translated.
</p>
</Field>
{saveError && (
<p
className="text-sm rounded-lg px-3 py-2"

View File

@@ -23,6 +23,28 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
const [aiTagError, setAiTagError] = useState<string | null>(null)
const [tagRefreshKey, setTagRefreshKey] = useState(0)
// Text extraction state
const [extractedText, setExtractedText] = useState<string | null>(null)
const [translatedText, setTranslatedText] = useState<string | null>(null)
const [extracting, setExtracting] = useState(false)
const [extractError, setExtractError] = useState<string | null>(null)
const [retranslating, setRetranslating] = useState(false)
// Determine if this is an image file (for text extraction controls)
const isImage = /\.(jpe?g|png|gif|webp|bmp|tiff?)$/i.test(name)
// Fetch existing AI fields on mount / item change
useEffect(() => {
if (!itemKey) return
fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(itemKey)}`)
.then((r) => r.json())
.then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => {
setExtractedText(data.extractedText)
setTranslatedText(data.extractedTextTranslated)
})
.catch(() => {})
}, [itemKey])
useEffect(() => {
const handleKey = (e: KeyboardEvent) => {
if (e.key === 'Escape') onClose()
@@ -168,6 +190,128 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
Tags
</p>
<TagSelector itemKey={itemKey!} onTagsChanged={onTagsChanged} refreshKey={tagRefreshKey} />
{/* Text extraction section — only for images */}
{isImage && (
<div className="mt-4 pt-4" style={{ borderTop: '1px solid var(--border)' }}>
<p className="text-xs font-semibold uppercase tracking-wider mb-3" style={{ color: 'var(--text-secondary)' }}>
Text Extraction
</p>
<button
onClick={async () => {
setExtracting(true)
setExtractError(null)
try {
const res = await fetch('/api/ai-tagging/extract-text', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Failed to extract text')
}
const result = await res.json()
setExtractedText(result.extractedText || null)
setTranslatedText(result.translatedText || null)
} catch (err) {
setExtractError(err instanceof Error ? err.message : 'Failed to extract text')
setTimeout(() => setExtractError(null), 4000)
} finally {
setExtracting(false)
}
}}
disabled={extracting}
className="text-xs px-2 py-1 rounded-lg transition-colors disabled:opacity-50 mb-2"
style={{ backgroundColor: 'var(--border)', color: 'var(--text-secondary)' }}
onMouseEnter={(e) => {
if (!extracting) {
;(e.currentTarget as HTMLElement).style.backgroundColor = 'var(--text-secondary)'
;(e.currentTarget as HTMLElement).style.color = 'var(--background)'
}
}}
onMouseLeave={(e) => {
;(e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)'
;(e.currentTarget as HTMLElement).style.color = 'var(--text-secondary)'
}}
>
{extracting ? '⟳ Extracting…' : extractedText ? '🔍 Re-extract Text' : '🔍 Extract Text'}
</button>
{extractError && (
<p className="text-xs mb-2" style={{ color: '#f87171' }}>{extractError}</p>
)}
{extractedText && (
<div className="flex flex-col gap-2">
<div>
<p className="text-xs font-medium mb-1" style={{ color: 'var(--text-secondary)' }}>
Extracted Text
</p>
<pre
className="text-xs whitespace-pre-wrap rounded-lg p-2 max-h-40 overflow-y-auto"
style={{ backgroundColor: 'var(--background)', color: 'var(--text-primary)', border: '1px solid var(--border)' }}
>
{extractedText}
</pre>
</div>
{translatedText && (
<div>
<p className="text-xs font-medium mb-1" style={{ color: 'var(--text-secondary)' }}>
Translation
</p>
<pre
className="text-xs whitespace-pre-wrap rounded-lg p-2 max-h-40 overflow-y-auto"
style={{ backgroundColor: 'var(--background)', color: 'var(--text-primary)', border: '1px solid var(--border)' }}
>
{translatedText}
</pre>
</div>
)}
<button
onClick={async () => {
setRetranslating(true)
try {
const res = await fetch('/api/ai-tagging/translate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Failed to translate')
}
const result = await res.json()
setTranslatedText(result.translatedText || null)
} catch {
// ignore
} finally {
setRetranslating(false)
}
}}
disabled={retranslating}
className="self-start text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50"
style={{ backgroundColor: 'var(--border)', color: 'var(--text-secondary)' }}
onMouseEnter={(e) => {
if (!retranslating) {
;(e.currentTarget as HTMLElement).style.backgroundColor = 'var(--text-secondary)'
;(e.currentTarget as HTMLElement).style.color = 'var(--background)'
}
}}
onMouseLeave={(e) => {
;(e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)'
;(e.currentTarget as HTMLElement).style.color = 'var(--text-secondary)'
}}
>
{retranslating ? '⟳ Translating…' : '🌐 Re-translate'}
</button>
</div>
)}
</div>
)}
</div>
</div>
) : (

View File

@@ -335,6 +335,33 @@ export default function MixedView({ libraryId, initialPath }: Props) {
fetchAssignments()
setFilterRefreshKey((k) => k + 1)
}}
onExtractText={async (e) => {
if (e.type === 'directory') {
// Bulk extract for directory
const dirRel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
const res = await fetch('/api/ai-tagging/extract-text-bulk', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ libraryId, path: dirRel }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Text extraction failed')
}
} else {
// Single image extract
const itemKey = itemKeyFor(e)
const res = await fetch('/api/ai-tagging/extract-text', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Text extraction failed')
}
}
}}
onDelete={(e) => {
const rel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
fetch(`/api/browse?libraryId=${encodeURIComponent(libraryId)}&path=${encodeURIComponent(rel)}`, { method: 'DELETE' })
@@ -464,7 +491,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
)
}
function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void> }) {
function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void> }) {
type ImgState = 'loading' | 'loaded' | 'error'
const [imgState, setImgState] = useState<ImgState>(
entry.thumbnailUrl ? 'loading' : 'error'
@@ -479,6 +506,8 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entr
const [entryRenameSaving, setEntryRenameSaving] = useState(false)
const [aiTagging, setAiTagging] = useState(false)
const [aiTagError, setAiTagError] = useState<string | null>(null)
const [textExtracting, setTextExtracting] = useState(false)
const [textExtractError, setTextExtractError] = useState<string | null>(null)
useEffect(() => {
if (!menuOpen) return
@@ -590,7 +619,7 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entr
</button>
{/* Kebab menu — top-right, shown on hover */}
{(onDelete || onRename || (onAiTag && entry.mediaType === 'image')) && (
{(onDelete || onRename || (onAiTag && entry.mediaType === 'image') || (onExtractText && entry.mediaType === 'image') || (onExtractText && entry.type === 'directory')) && (
<div className="absolute top-2 right-2 opacity-0 group-hover:opacity-100 transition-opacity hidden group-hover:block" ref={menuRef}>
<button
onClick={(e) => { e.stopPropagation(); setMenuOpen((o) => !o); setConfirming(false); setAiTagError(null) }}
@@ -625,6 +654,46 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entr
AI Tag
</button>
)}
{onExtractText && entry.mediaType === 'image' && (
<button
onClick={(e) => {
e.stopPropagation()
setMenuOpen(false)
setTextExtracting(true)
setTextExtractError(null)
onExtractText(entry)
.catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
.finally(() => setTextExtracting(false))
}}
disabled={textExtracting}
className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
style={{ color: 'var(--text-primary)' }}
onMouseEnter={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)')}
onMouseLeave={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'transparent')}
>
🔍 Extract Text
</button>
)}
{onExtractText && entry.type === 'directory' && (
<button
onClick={(e) => {
e.stopPropagation()
setMenuOpen(false)
setTextExtracting(true)
setTextExtractError(null)
onExtractText(entry)
.catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
.finally(() => setTextExtracting(false))
}}
disabled={textExtracting}
className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
style={{ color: 'var(--text-primary)' }}
onMouseEnter={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)')}
onMouseLeave={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'transparent')}
>
🔍 Extract Text for Folder
</button>
)}
{onRename && (
<button
onClick={(e) => {
@@ -680,6 +749,28 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag }: { entr
</div>
)}
{/* Text extraction status overlay */}
{(textExtracting || textExtractError) && (
<div
className="absolute inset-x-0 bottom-0 z-10 px-2 py-1.5 text-xs"
style={{ backgroundColor: textExtractError ? 'rgba(127,29,29,0.9)' : 'rgba(0,0,0,0.75)' }}
onClick={(e) => e.stopPropagation()}
>
<span style={{ color: textExtractError ? '#fca5a5' : 'var(--text-secondary)' }}>
{textExtractError ?? 'Extracting text…'}
</span>
{textExtractError && (
<button
onClick={() => setTextExtractError(null)}
className="ml-2 underline text-xs"
style={{ color: '#fca5a5' }}
>
dismiss
</button>
)}
</div>
)}
{/* Delete confirmation overlay */}
{confirming && (
<div

View File

@@ -24,6 +24,11 @@ export default function TagSelector({ itemKey, onTagsChanged, refreshKey }: Prop
const [loading, setLoading] = useState(true)
const [busy, setBusy] = useState<string | null>(null)
// AI description state
const [aiDescription, setAiDescription] = useState<string | null>(null)
const [generatingDesc, setGeneratingDesc] = useState(false)
const [descError, setDescError] = useState<string | null>(null)
// Per-category search text
const [categorySearches, setCategorySearches] = useState<Record<string, string>>({})
@@ -54,10 +59,19 @@ export default function TagSelector({ itemKey, onTagsChanged, refreshKey }: Prop
})
}, [])
const fetchAiFields = useCallback(() => {
return fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(itemKey)}`)
.then((r) => r.json())
.then((data: { aiDescription: string | null }) => {
setAiDescription(data.aiDescription)
})
.catch(() => {})
}, [itemKey])
useEffect(() => {
setLoading(true)
Promise.all([fetchAssigned(), fetchAll()]).finally(() => setLoading(false))
}, [fetchAssigned, fetchAll])
Promise.all([fetchAssigned(), fetchAll(), fetchAiFields()]).finally(() => setLoading(false))
}, [fetchAssigned, fetchAll, fetchAiFields])
useEffect(() => {
if (refreshKey !== undefined && refreshKey > 0) {
@@ -165,8 +179,63 @@ export default function TagSelector({ itemKey, onTagsChanged, refreshKey }: Prop
const assignedCategoryMap = Object.fromEntries(assigned.categories.map((c) => [c.id, c]))
const handleGenerateDescription = async () => {
setGeneratingDesc(true)
setDescError(null)
try {
const res = await fetch('/api/ai-tagging/describe', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Failed to generate description')
}
const { description } = await res.json()
setAiDescription(description)
} catch (err) {
setDescError(err instanceof Error ? err.message : 'Failed to generate description')
setTimeout(() => setDescError(null), 4000)
} finally {
setGeneratingDesc(false)
}
}
return (
<div className="flex flex-col gap-3">
{/* AI description */}
<div className="flex flex-col gap-1">
{aiDescription && (
<p className="text-xs italic" style={{ color: 'var(--text-secondary)' }}>
{aiDescription}
</p>
)}
<div className="flex items-center gap-1.5">
<button
onClick={handleGenerateDescription}
disabled={generatingDesc}
className="text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50"
style={{ backgroundColor: 'var(--border)', color: 'var(--text-secondary)' }}
onMouseEnter={(e) => {
if (!generatingDesc) {
;(e.currentTarget as HTMLElement).style.backgroundColor = 'var(--text-secondary)'
;(e.currentTarget as HTMLElement).style.color = 'var(--background)'
}
}}
onMouseLeave={(e) => {
;(e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)'
;(e.currentTarget as HTMLElement).style.color = 'var(--text-secondary)'
}}
title={aiDescription ? 'Regenerate AI description' : 'Generate AI description'}
>
{generatingDesc ? '⟳ Generating…' : aiDescription ? '✦ Regenerate Description' : '✦ Generate Description'}
</button>
{descError && (
<span className="text-xs" style={{ color: '#f87171' }}>{descError}</span>
)}
</div>
</div>
{/* Assigned tags grouped by category */}
{assigned.tags.length > 0 && (
<div className="flex flex-wrap gap-1.5">

View File

@@ -2,7 +2,7 @@ import fs from 'fs'
import path from 'path'
import type { Library, Tag, TagCategory } from '@/types'
import { getDb } from './db'
import { getAiConfig } from './app-settings'
import { getAiConfig, getPreferredLanguage } from './app-settings'
import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags'
import { getThumbnailPath, getVideoFramePaths } from './thumbnails'
import { findFile } from './media-utils'
@@ -351,3 +351,343 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
return validIds
}
// ─── Vision / Chat text helpers ──────────────────────────────────────────────
/**
* Call the vision API and return raw text content (no JSON parsing).
*/
async function callVisionApiText(
endpoint: string,
model: string,
base64Images: string[],
systemPrompt: string
): Promise<string> {
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS)
try {
const res = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
signal: controller.signal,
body: JSON.stringify({
model,
messages: [
{ role: 'system', content: systemPrompt },
{
role: 'user',
content: base64Images.map((b64) => ({
type: 'image_url',
image_url: { url: `data:image/jpeg;base64,${b64}` },
})),
},
],
max_tokens: 8192,
temperature: 0.1,
}),
})
if (!res.ok) {
const text = await res.text().catch(() => '')
throw new Error(`LLM API returned ${res.status}: ${text.slice(0, 200)}`)
}
const data = await res.json() as {
choices?: Array<{ message?: { content?: string } }>
}
return data.choices?.[0]?.message?.content?.trim() ?? ''
} finally {
clearTimeout(timeout)
}
}
/**
* Call the chat completions API with text-only input (no images).
*/
async function callChatApiText(
endpoint: string,
model: string,
systemPrompt: string,
userMessage: string
): Promise<string> {
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
const controller = new AbortController()
const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS)
try {
const res = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
signal: controller.signal,
body: JSON.stringify({
model,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userMessage },
],
max_tokens: 8192,
temperature: 0.1,
}),
})
if (!res.ok) {
const text = await res.text().catch(() => '')
throw new Error(`LLM API returned ${res.status}: ${text.slice(0, 200)}`)
}
const data = await res.json() as {
choices?: Array<{ message?: { content?: string } }>
}
return data.choices?.[0]?.message?.content?.trim() ?? ''
} finally {
clearTimeout(timeout)
}
}
// ─── AI description ──────────────────────────────────────────────────────────
/**
* Generate an AI description for a media item using a vision model.
* Stores the result in the ai_description column and returns it.
*/
export async function generateItemDescription(itemKey: string): Promise<string> {
const config = getAiConfig()
if (!config.endpoint || !config.model) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
}
const libraryId = itemKey.split(':')[0]
const db = getDb()
const item = db
.prepare('SELECT item_key, item_type, file_path, metadata FROM media_items WHERE item_key = ?')
.get(itemKey) as MediaItemRow | undefined
if (!item) {
throw Object.assign(new Error(`Item not found: ${itemKey}`), { code: 'NOT_FOUND' })
}
const library = getLibrary(libraryId)
if (!library) {
throw Object.assign(new Error(`Library not found: ${libraryId}`), { code: 'NOT_FOUND' })
}
const libraryRoot = resolveLibraryRoot(library)
const resolvedMedia = resolveItemImage(libraryRoot, item)
if (!resolvedMedia) {
throw Object.assign(new Error('No image available for this item'), { code: 'NO_IMAGE' })
}
let base64Images: string[]
if (resolvedMedia.mediaType === 'video') {
const framePaths = await getVideoFramePaths(resolvedMedia.path, libraryId, VIDEO_FRAME_PERCENTAGES)
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
} else {
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
}
const systemPrompt = 'You are a media cataloging assistant. Describe the given image briefly and objectively in 1-3 sentences. Focus on the visual content, subjects, setting, and mood. Do not speculate about context outside the image. Do not preface the description with any phrases like "This image shows" or "This image features". Return only the description text with no additional commentary.'
const description = await callVisionApiText(config.endpoint, config.model, base64Images, systemPrompt)
db.prepare('UPDATE media_items SET ai_description = ? WHERE item_key = ?').run(description, itemKey)
return description
}
// ─── Text extraction ─────────────────────────────────────────────────────────
/**
* Extract text (OCR) from an image using the vision model.
* Only works for images in mixed libraries.
* If the extracted text is not in the user's preferred language, auto-translates it.
* Returns { extractedText, translatedText }.
*/
export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
const config = getAiConfig()
if (!config.endpoint || !config.model) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
}
const libraryId = itemKey.split(':')[0]
const db = getDb()
const item = db
.prepare('SELECT item_key, item_type, file_path, metadata FROM media_items WHERE item_key = ?')
.get(itemKey) as MediaItemRow | undefined
if (!item) {
throw Object.assign(new Error(`Item not found: ${itemKey}`), { code: 'NOT_FOUND' })
}
if (item.item_type !== 'mixed_file') {
throw Object.assign(new Error('Text extraction is only available for mixed library items'), { code: 'INVALID_TYPE' })
}
const library = getLibrary(libraryId)
if (!library) {
throw Object.assign(new Error(`Library not found: ${libraryId}`), { code: 'NOT_FOUND' })
}
if (library.type !== 'mixed') {
throw Object.assign(new Error('Text extraction is only available for mixed libraries'), { code: 'INVALID_TYPE' })
}
const libraryRoot = resolveLibraryRoot(library)
const resolvedMedia = resolveItemImage(libraryRoot, item)
if (!resolvedMedia || resolvedMedia.mediaType !== 'image') {
throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
}
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
const systemPrompt = 'You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting. Be mindful of different colors of text that may indicate different speakers or emphasis. If there is no text in the image, respond with exactly: [NO TEXT]'
const extractedText = await callVisionApiText(config.endpoint, config.model, base64Images, systemPrompt)
if (!extractedText || extractedText === '[NO TEXT]') {
db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
return { extractedText: '', translatedText: null }
}
db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(extractedText, itemKey)
// Auto-translate if preferred language is set
const preferredLanguage = getPreferredLanguage()
let translatedText: string | null = null
if (preferredLanguage) {
try {
translatedText = await translateText(config.endpoint, config.model, extractedText, preferredLanguage)
if (translatedText) {
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
}
} catch (err) {
console.warn(`[ai-tagger] Translation failed for "${itemKey}":`, err instanceof Error ? err.message : err)
}
}
return { extractedText, translatedText }
}
/**
* Translate the extracted_text of an item into the preferred language.
* Returns the translated text or null if no text to translate.
*/
export async function translateItemText(itemKey: string): Promise<string | null> {
const config = getAiConfig()
if (!config.endpoint || !config.model) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
}
const db = getDb()
const row = db
.prepare('SELECT extracted_text FROM media_items WHERE item_key = ?')
.get(itemKey) as { extracted_text: string | null } | undefined
if (!row) {
throw Object.assign(new Error(`Item not found: ${itemKey}`), { code: 'NOT_FOUND' })
}
if (!row.extracted_text) {
return null
}
const preferredLanguage = getPreferredLanguage()
if (!preferredLanguage) return null
const translatedText = await translateText(config.endpoint, config.model, row.extracted_text, preferredLanguage)
if (translatedText) {
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
}
return translatedText
}
/**
* Translate text to a target language using the chat API.
* Returns null if the text is already in the target language.
*/
async function translateText(
endpoint: string,
model: string,
text: string,
targetLanguage: string
): Promise<string | null> {
const systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}. Return ONLY the translated text with no additional commentary.`
const result = await callChatApiText(endpoint, model, systemPrompt, text)
if (result === '[ALREADY_TARGET_LANGUAGE]' || !result) {
return null
}
return result
}
/**
* Extract text from all images in a directory within a mixed library.
* Returns the number of items processed.
*/
export async function extractDirectoryText(libraryId: string, dirPath: string): Promise<number> {
const config = getAiConfig()
if (!config.endpoint || !config.model) {
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
}
const library = getLibrary(libraryId)
if (!library) {
throw Object.assign(new Error(`Library not found: ${libraryId}`), { code: 'NOT_FOUND' })
}
if (library.type !== 'mixed') {
throw Object.assign(new Error('Text extraction is only available for mixed libraries'), { code: 'INVALID_TYPE' })
}
const db = getDb()
const prefix = dirPath
? `${libraryId}:mixed_file:${encodeURIComponent(dirPath + '/')}`
: `${libraryId}:mixed_file:`
const items = db
.prepare('SELECT item_key, item_type, file_path, metadata FROM media_items WHERE item_key LIKE ? AND item_type = ?')
.all(`${prefix}%`, 'mixed_file') as MediaItemRow[]
const libraryRoot = resolveLibraryRoot(library)
let processed = 0
for (const item of items) {
// Only process images
if (!item.file_path) continue
const ext = path.extname(item.file_path).toLowerCase()
if (!IMAGE_EXTENSIONS.has(ext)) continue
try {
await extractItemText(item.item_key)
processed++
} catch (err) {
console.warn(
`[ai-tagger] Failed to extract text from "${item.item_key}":`,
err instanceof Error ? err.message : err
)
}
}
return processed
}
/**
* Get the AI fields (description, extracted text, translation) for a media item.
*/
export function getAiFields(itemKey: string): { aiDescription: string | null; extractedText: string | null; extractedTextTranslated: string | null } {
const db = getDb()
const row = db
.prepare('SELECT ai_description, extracted_text, extracted_text_translated FROM media_items WHERE item_key = ?')
.get(itemKey) as { ai_description: string | null; extracted_text: string | null; extracted_text_translated: string | null } | undefined
if (!row) {
return { aiDescription: null, extractedText: null, extractedTextTranslated: null }
}
return {
aiDescription: row.ai_description,
extractedText: row.extracted_text,
extractedTextTranslated: row.extracted_text_translated,
}
}

View File

@@ -57,3 +57,11 @@ export function updateAiConfig(endpoint: string, model: string, enabled: boolean
setSetting('ai_model', model)
setSetting('ai_enabled', enabled ? 'true' : 'false')
}
export function getPreferredLanguage(): string {
return getSetting('preferred_language') ?? 'English'
}
export function setPreferredLanguage(language: string): void {
setSetting('preferred_language', language)
}

View File

@@ -103,6 +103,7 @@ function initDb(db: Database.Database): void {
migrateMediaItemsFingerprint(db)
migrateMediaTagsToItemKey(db)
migrateMediaItemsAiTagged(db)
migrateMediaItemsAiFields(db)
seedAppSettings(db)
}
@@ -114,6 +115,7 @@ function seedAppSettings(db: Database.Database): void {
ai_enabled: 'false',
ai_endpoint: '',
ai_model: '',
preferred_language: 'English',
}
const insert = db.prepare(
'INSERT OR IGNORE INTO app_settings (key, value) VALUES (?, ?)'
@@ -241,6 +243,22 @@ function migrateMediaItemsAiTagged(db: Database.Database): void {
}
}
function migrateMediaItemsAiFields(db: Database.Database): void {
const row = db
.prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='media_items'")
.get() as { sql: string } | undefined
if (!row) return
if (!row.sql.includes('ai_description')) {
db.exec('ALTER TABLE media_items ADD COLUMN ai_description TEXT')
}
if (!row.sql.includes('extracted_text')) {
db.exec('ALTER TABLE media_items ADD COLUMN extracted_text TEXT')
}
if (!row.sql.includes('extracted_text_translated')) {
db.exec('ALTER TABLE media_items ADD COLUMN extracted_text_translated TEXT')
}
}
function migrateLibrariesType(db: Database.Database): void {
const row = db
.prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='libraries'")