text-extraction-improvements #24

Merged
gpatti merged 4 commits from text-extraction-improvements into main 2026-04-13 16:29:26 +00:00
4 changed files with 138 additions and 51 deletions
Showing only changes of commit e31a9667ef - Show all commits

View File

@@ -1,6 +1,6 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireLibraryAccess } from '@/lib/auth'
import { getAiFields } from '@/lib/ai-tagger'
import { getAiFields, updateExtractedText } from '@/lib/ai-tagger'
export async function GET(request: NextRequest) {
const { searchParams } = request.nextUrl
@@ -17,3 +17,27 @@ export async function GET(request: NextRequest) {
const fields = getAiFields(itemKey)
return NextResponse.json(fields)
}
export async function PATCH(request: NextRequest) {
let body: { itemKey?: string; extractedText?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { itemKey, extractedText } = body
if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
}
if (typeof extractedText !== 'string') {
return NextResponse.json({ error: 'extractedText is required' }, { status: 400 })
}
const libraryId = itemKey.split(':')[0]
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
updateExtractedText(itemKey, extractedText)
return NextResponse.json({ ok: true })
}

View File

@@ -3,14 +3,14 @@ import { requireLibraryAccess } from '@/lib/auth'
import { translateItemText } from '@/lib/ai-tagger'
export async function POST(request: NextRequest) {
let body: { itemKey?: string }
let body: { itemKey?: string; sourceLanguage?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
const { itemKey } = body
const { itemKey, sourceLanguage } = body
if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
}
@@ -20,7 +20,7 @@ export async function POST(request: NextRequest) {
if (auth instanceof NextResponse) return auth
try {
const translatedText = await translateItemText(itemKey)
const translatedText = await translateItemText(itemKey, sourceLanguage || undefined)
return NextResponse.json({ translatedText })
} catch (err) {
const error = err as Error & { code?: string }

View File

@@ -27,6 +27,9 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
const [extracting, setExtracting] = useState(false)
const [extractError, setExtractError] = useState<string | null>(null)
const [retranslating, setRetranslating] = useState(false)
const [editedExtractedText, setEditedExtractedText] = useState<string>('')
const [savingText, setSavingText] = useState(false)
const [sourceLanguage, setSourceLanguage] = useState('')
// Text overlay state
const [showTextOverlay, setShowTextOverlay] = useState(false)
@@ -45,6 +48,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
.then((r) => r.json())
.then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => {
setExtractedText(data.extractedText)
setEditedExtractedText(data.extractedText ?? '')
setTranslatedText(data.extractedTextTranslated)
})
.catch(() => {})
@@ -267,6 +271,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
}
const result = await res.json()
setExtractedText(result.extractedText || null)
setEditedExtractedText(result.extractedText || '')
setTranslatedText(result.translatedText || null)
} catch (err) {
setExtractError(err instanceof Error ? err.message : 'Failed to extract text')
@@ -302,12 +307,41 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
<p className="text-xs font-medium mb-1" style={{ color: 'var(--text-secondary)' }}>
Extracted Text
</p>
<pre
className="text-xs whitespace-pre-wrap rounded-lg p-2 max-h-40 overflow-y-auto"
style={{ backgroundColor: 'var(--background)', color: 'var(--text-primary)', border: '1px solid var(--border)' }}
<textarea
value={editedExtractedText}
onChange={(e) => setEditedExtractedText(e.target.value)}
className="text-xs whitespace-pre-wrap rounded-lg p-2 w-full resize-y outline-none"
style={{
backgroundColor: 'var(--background)',
color: 'var(--text-primary)',
border: '1px solid var(--border)',
minHeight: '4rem',
maxHeight: '10rem',
fontFamily: 'inherit',
}}
/>
{editedExtractedText !== extractedText && (
<button
onClick={async () => {
setSavingText(true)
try {
await fetch('/api/ai-tagging/fields', {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey, extractedText: editedExtractedText }),
})
setExtractedText(editedExtractedText)
} finally {
setSavingText(false)
}
}}
disabled={savingText}
className="mt-1 text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50"
style={{ backgroundColor: 'var(--accent)', color: '#fff' }}
>
{extractedText}
</pre>
{savingText ? '⟳ Saving…' : 'Save'}
</button>
)}
</div>
{translatedText && (
@@ -324,6 +358,20 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
</div>
)}
<div className="flex items-center gap-1.5 flex-wrap">
<input
type="text"
value={sourceLanguage}
onChange={(e) => setSourceLanguage(e.target.value)}
placeholder="Source lang…"
className="text-xs px-2 py-0.5 rounded-full outline-none"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
width: 100,
}}
/>
<button
onClick={async () => {
setRetranslating(true)
@@ -331,7 +379,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
const res = await fetch('/api/ai-tagging/translate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey }),
body: JSON.stringify({ itemKey, ...(sourceLanguage.trim() && { sourceLanguage: sourceLanguage.trim() }) }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
@@ -346,7 +394,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
}
}}
disabled={retranslating}
className="self-start text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50"
className="text-xs px-2 py-0.5 rounded-full transition-colors disabled:opacity-50"
style={{ backgroundColor: 'var(--border)', color: 'var(--text-secondary)' }}
onMouseEnter={(e) => {
if (!retranslating) {
@@ -362,6 +410,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
{retranslating ? '⟳ Translating…' : '🌐 Re-translate'}
</button>
</div>
</div>
)}
</div>
)}

View File

@@ -623,7 +623,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
* Translate the extracted_text of an item into the preferred language.
* Returns the translated text or null if no text to translate.
*/
export async function translateItemText(itemKey: string): Promise<string | null> {
export async function translateItemText(itemKey: string, sourceLanguage?: string): Promise<string | null> {
const libraryId = itemKey.split(':')[0]
const config = getEffectiveAiConfig(libraryId)
const translateModel = config.modelTranslate || config.model
@@ -645,7 +645,7 @@ export async function translateItemText(itemKey: string): Promise<string | null>
const preferredLanguage = getPreferredLanguage()
if (!preferredLanguage) return null
const translatedText = await translateText(config.endpoint, translateModel, row.extracted_text, preferredLanguage, config.promptTranslate)
const translatedText = await translateText(config.endpoint, translateModel, row.extracted_text, preferredLanguage, config.promptTranslate, sourceLanguage)
if (translatedText) {
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
}
@@ -653,6 +653,14 @@ export async function translateItemText(itemKey: string): Promise<string | null>
return translatedText
}
/**
* Update the extracted_text of an item.
*/
export function updateExtractedText(itemKey: string, text: string): void {
const db = getDb()
db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(text, itemKey)
}
/**
* Translate text to a target language using the chat API.
* Returns null if the text is already in the target language.
@@ -663,16 +671,22 @@ async function translateText(
text: string,
targetLanguage: string,
customInstruction = '',
sourceLanguage?: string,
): Promise<string | null> {
const systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}`
let systemPrompt: string
if (sourceLanguage) {
systemPrompt = `You are a translator. Translate the following text from ${sourceLanguage} to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}`
} else {
systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}`
}
const result = await callChatApiText(endpoint, model, systemPrompt, text)
if (result === '[ALREADY_TARGET_LANGUAGE]' || !result) {
if (!sourceLanguage && (result === '[ALREADY_TARGET_LANGUAGE]' || !result)) {
return null
}
return result
return result || null
}
/**