text-extraction-improvements #24

Merged
gpatti merged 4 commits from text-extraction-improvements into main 2026-04-13 16:29:26 +00:00
4 changed files with 138 additions and 51 deletions
Showing only changes of commit e31a9667ef - Show all commits

View File

@@ -1,6 +1,6 @@
import { NextRequest, NextResponse } from 'next/server' import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth' import { requireLibraryAccess } from '@/lib/auth'
import { getAiFields } from '@/lib/ai-tagger' import { getAiFields, updateExtractedText } from '@/lib/ai-tagger'
export async function GET(request: NextRequest) { export async function GET(request: NextRequest) {
const { searchParams } = request.nextUrl const { searchParams } = request.nextUrl
@@ -17,3 +17,27 @@ export async function GET(request: NextRequest) {
const fields = getAiFields(itemKey) const fields = getAiFields(itemKey)
return NextResponse.json(fields) return NextResponse.json(fields)
} }
export async function PATCH(request: NextRequest) {
let body: { itemKey?: string; extractedText?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { itemKey, extractedText } = body
if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
}
if (typeof extractedText !== 'string') {
return NextResponse.json({ error: 'extractedText is required' }, { status: 400 })
}
const libraryId = itemKey.split(':')[0]
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
updateExtractedText(itemKey, extractedText)
return NextResponse.json({ ok: true })
}

View File

@@ -3,14 +3,14 @@ import { requireLibraryAccess } from '@/lib/auth'
import { translateItemText } from '@/lib/ai-tagger' import { translateItemText } from '@/lib/ai-tagger'
export async function POST(request: NextRequest) { export async function POST(request: NextRequest) {
let body: { itemKey?: string } let body: { itemKey?: string; sourceLanguage?: string }
try { try {
body = await request.json() body = await request.json()
} catch { } catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 }) return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
} }
const { itemKey } = body const { itemKey, sourceLanguage } = body
if (!itemKey || typeof itemKey !== 'string') { if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 }) return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
} }
@@ -20,7 +20,7 @@ export async function POST(request: NextRequest) {
if (auth instanceof NextResponse) return auth if (auth instanceof NextResponse) return auth
try { try {
const translatedText = await translateItemText(itemKey) const translatedText = await translateItemText(itemKey, sourceLanguage || undefined)
return NextResponse.json({ translatedText }) return NextResponse.json({ translatedText })
} catch (err) { } catch (err) {
const error = err as Error & { code?: string } const error = err as Error & { code?: string }

View File

@@ -27,6 +27,9 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
const [extracting, setExtracting] = useState(false) const [extracting, setExtracting] = useState(false)
const [extractError, setExtractError] = useState<string | null>(null) const [extractError, setExtractError] = useState<string | null>(null)
const [retranslating, setRetranslating] = useState(false) const [retranslating, setRetranslating] = useState(false)
const [editedExtractedText, setEditedExtractedText] = useState<string>('')
const [savingText, setSavingText] = useState(false)
const [sourceLanguage, setSourceLanguage] = useState('')
// Text overlay state // Text overlay state
const [showTextOverlay, setShowTextOverlay] = useState(false) const [showTextOverlay, setShowTextOverlay] = useState(false)
@@ -45,6 +48,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
.then((r) => r.json()) .then((r) => r.json())
.then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => { .then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => {
setExtractedText(data.extractedText) setExtractedText(data.extractedText)
setEditedExtractedText(data.extractedText ?? '')
setTranslatedText(data.extractedTextTranslated) setTranslatedText(data.extractedTextTranslated)
}) })
.catch(() => {}) .catch(() => {})
@@ -267,6 +271,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
} }
const result = await res.json() const result = await res.json()
setExtractedText(result.extractedText || null) setExtractedText(result.extractedText || null)
setEditedExtractedText(result.extractedText || '')
setTranslatedText(result.translatedText || null) setTranslatedText(result.translatedText || null)
} catch (err) { } catch (err) {
setExtractError(err instanceof Error ? err.message : 'Failed to extract text') setExtractError(err instanceof Error ? err.message : 'Failed to extract text')
@@ -302,12 +307,41 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
<p className="text-xs font-medium mb-1" style={{ color: 'var(--text-secondary)' }}> <p className="text-xs font-medium mb-1" style={{ color: 'var(--text-secondary)' }}>
Extracted Text Extracted Text
</p> </p>
<pre <textarea
className="text-xs whitespace-pre-wrap rounded-lg p-2 max-h-40 overflow-y-auto" value={editedExtractedText}
style={{ backgroundColor: 'var(--background)', color: 'var(--text-primary)', border: '1px solid var(--border)' }} onChange={(e) => setEditedExtractedText(e.target.value)}
className="text-xs whitespace-pre-wrap rounded-lg p-2 w-full resize-y outline-none"
style={{
backgroundColor: 'var(--background)',
color: 'var(--text-primary)',
border: '1px solid var(--border)',
minHeight: '4rem',
maxHeight: '10rem',
fontFamily: 'inherit',
}}
/>
{editedExtractedText !== extractedText && (
<button
onClick={async () => {
setSavingText(true)
try {
await fetch('/api/ai-tagging/fields', {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey, extractedText: editedExtractedText }),
})
setExtractedText(editedExtractedText)
} finally {
setSavingText(false)
}
}}
disabled={savingText}
className="mt-1 text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50"
style={{ backgroundColor: 'var(--accent)', color: '#fff' }}
> >
{extractedText} {savingText ? '⟳ Saving…' : 'Save'}
</pre> </button>
)}
</div> </div>
{translatedText && ( {translatedText && (
@@ -324,6 +358,20 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
</div> </div>
)} )}
<div className="flex items-center gap-1.5 flex-wrap">
<input
type="text"
value={sourceLanguage}
onChange={(e) => setSourceLanguage(e.target.value)}
placeholder="Source lang…"
className="text-xs px-2 py-0.5 rounded-full outline-none"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
width: 100,
}}
/>
<button <button
onClick={async () => { onClick={async () => {
setRetranslating(true) setRetranslating(true)
@@ -331,7 +379,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
const res = await fetch('/api/ai-tagging/translate', { const res = await fetch('/api/ai-tagging/translate', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey }), body: JSON.stringify({ itemKey, ...(sourceLanguage.trim() && { sourceLanguage: sourceLanguage.trim() }) }),
}) })
if (!res.ok) { if (!res.ok) {
const data = await res.json().catch(() => ({})) const data = await res.json().catch(() => ({}))
@@ -346,7 +394,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
} }
}} }}
disabled={retranslating} disabled={retranslating}
className="self-start text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50" className="text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50"
style={{ backgroundColor: 'var(--border)', color: 'var(--text-secondary)' }} style={{ backgroundColor: 'var(--border)', color: 'var(--text-secondary)' }}
onMouseEnter={(e) => { onMouseEnter={(e) => {
if (!retranslating) { if (!retranslating) {
@@ -362,6 +410,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
{retranslating ? '⟳ Translating…' : '🌐 Re-translate'} {retranslating ? '⟳ Translating…' : '🌐 Re-translate'}
</button> </button>
</div> </div>
</div>
)} )}
</div> </div>
)} )}

View File

@@ -623,7 +623,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
* Translate the extracted_text of an item into the preferred language. * Translate the extracted_text of an item into the preferred language.
* Returns the translated text or null if no text to translate. * Returns the translated text or null if no text to translate.
*/ */
export async function translateItemText(itemKey: string): Promise<string | null> { export async function translateItemText(itemKey: string, sourceLanguage?: string): Promise<string | null> {
const libraryId = itemKey.split(':')[0] const libraryId = itemKey.split(':')[0]
const config = getEffectiveAiConfig(libraryId) const config = getEffectiveAiConfig(libraryId)
const translateModel = config.modelTranslate || config.model const translateModel = config.modelTranslate || config.model
@@ -645,7 +645,7 @@ export async function translateItemText(itemKey: string): Promise<string | null>
const preferredLanguage = getPreferredLanguage() const preferredLanguage = getPreferredLanguage()
if (!preferredLanguage) return null if (!preferredLanguage) return null
const translatedText = await translateText(config.endpoint, translateModel, row.extracted_text, preferredLanguage, config.promptTranslate) const translatedText = await translateText(config.endpoint, translateModel, row.extracted_text, preferredLanguage, config.promptTranslate, sourceLanguage)
if (translatedText) { if (translatedText) {
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey) db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
} }
@@ -653,6 +653,14 @@ export async function translateItemText(itemKey: string): Promise<string | null>
return translatedText return translatedText
} }
/**
* Update the extracted_text of an item.
*/
export function updateExtractedText(itemKey: string, text: string): void {
const db = getDb()
db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(text, itemKey)
}
/** /**
* Translate text to a target language using the chat API. * Translate text to a target language using the chat API.
* Returns null if the text is already in the target language. * Returns null if the text is already in the target language.
@@ -663,16 +671,22 @@ async function translateText(
text: string, text: string,
targetLanguage: string, targetLanguage: string,
customInstruction = '', customInstruction = '',
sourceLanguage?: string,
): Promise<string | null> { ): Promise<string | null> {
const systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}` let systemPrompt: string
if (sourceLanguage) {
systemPrompt = `You are a translator. Translate the following text from ${sourceLanguage} to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}`
} else {
systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}`
}
const result = await callChatApiText(endpoint, model, systemPrompt, text) const result = await callChatApiText(endpoint, model, systemPrompt, text)
if (result === '[ALREADY_TARGET_LANGUAGE]' || !result) { if (!sourceLanguage && (result === '[ALREADY_TARGET_LANGUAGE]' || !result)) {
return null return null
} }
return result return result || null
} }
/** /**