diff --git a/src/app/api/ai-settings/ocr/route.ts b/src/app/api/ai-settings/ocr/route.ts
new file mode 100644
index 0000000..2ccf3ce
--- /dev/null
+++ b/src/app/api/ai-settings/ocr/route.ts
@@ -0,0 +1,11 @@
+import { NextRequest, NextResponse } from 'next/server'
+import { requireAuth } from '@/lib/auth'
+import { getAiConfig } from '@/lib/app-settings'
+
+export async function GET(request: NextRequest) {
+ const auth = await requireAuth(request)
+ if (auth instanceof NextResponse) return auth
+
+ const { ocrMode, ocrLanguages } = getAiConfig()
+ return NextResponse.json({ ocrMode, ocrLanguages })
+}
diff --git a/src/app/api/ai-tagging/extract-text/route.ts b/src/app/api/ai-tagging/extract-text/route.ts
index 5b6ad22..b213555 100644
--- a/src/app/api/ai-tagging/extract-text/route.ts
+++ b/src/app/api/ai-tagging/extract-text/route.ts
@@ -3,14 +3,14 @@ import { requireLibraryAccess } from '@/lib/auth'
import { enqueueJob } from '@/lib/ai-jobs'
export async function POST(request: NextRequest) {
- let body: { itemKey?: string }
+ let body: { itemKey?: string; ocrLanguages?: string }
try {
body = await request.json()
} catch {
return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 })
}
- const { itemKey } = body
+ const { itemKey, ocrLanguages } = body
if (!itemKey || typeof itemKey !== 'string') {
return NextResponse.json({ error: 'itemKey is required' }, { status: 400 })
}
@@ -19,6 +19,12 @@ export async function POST(request: NextRequest) {
const auth = await requireLibraryAccess(request, libraryId)
if (auth instanceof NextResponse) return auth
- const jobId = enqueueJob(itemKey, 'extract', libraryId)
+ const jobId = enqueueJob(
+ itemKey,
+ 'extract',
+ libraryId,
+ undefined,
+ ocrLanguages ? { ocrLanguages } : undefined,
+ )
return NextResponse.json({ jobId }, { status: 202 })
}
diff --git a/src/components/DoomScrollView.tsx b/src/components/DoomScrollView.tsx
index daa5f96..d750160 100644
--- a/src/components/DoomScrollView.tsx
+++ b/src/components/DoomScrollView.tsx
@@ -336,7 +336,7 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose,
{/* Text overlay */}
{showTextOverlay && displayText && (
e.stopPropagation()}
>
diff --git a/src/components/mixed/ImageLightbox.tsx b/src/components/mixed/ImageLightbox.tsx
index baa3742..d059616 100644
--- a/src/components/mixed/ImageLightbox.tsx
+++ b/src/components/mixed/ImageLightbox.tsx
@@ -39,6 +39,11 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
const [descPending, setDescPending] = useState(false)
const [descError, setDescError] = useState
(null)
+ // OCR settings
+ const [ocrMode, setOcrMode] = useState(null)
+ const [defaultOcrLanguages, setDefaultOcrLanguages] = useState('eng')
+ const [ocrLanguageInput, setOcrLanguageInput] = useState('')
+
// Text overlay state
const [showTextOverlay, setShowTextOverlay] = useState(false)
const [showOriginal, setShowOriginal] = useState(false)
@@ -68,6 +73,13 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
useEffect(() => {
fetchAiFields()
+ fetch('/api/ai-settings/ocr')
+ .then((r) => r.json())
+ .then((d: { ocrMode: string; ocrLanguages: string }) => {
+ setOcrMode(d.ocrMode)
+ setDefaultOcrLanguages(d.ocrLanguages)
+ })
+ .catch(() => {})
return () => {
if (pollRef.current) clearInterval(pollRef.current)
}
@@ -439,58 +451,79 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
Text Extraction
-
)}
- {onExtractText && entry.mediaType === 'image' && (
+ {onExtractText && entry.mediaType === 'image' && !showOcrPrompt && (
{
e.stopPropagation()
- setMenuOpen(false)
- setTextExtracting(true)
- setTextExtractError(null)
- onExtractText(entry)
- .catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
- .finally(() => setTextExtracting(false))
+ if (ocrMode && ocrMode !== 'llm') {
+ setOcrLanguageInput('')
+ setShowOcrPrompt(true)
+ } else {
+ setMenuOpen(false)
+ setTextExtracting(true)
+ setTextExtractError(null)
+ onExtractText(entry)
+ .catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
+ .finally(() => setTextExtracting(false))
+ }
}}
disabled={textExtracting}
className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
@@ -824,6 +846,57 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
π Extract Text
)}
+ {onExtractText && entry.mediaType === 'image' && showOcrPrompt && (
+ e.stopPropagation()}>
+
OCR language
+
setOcrLanguageInput(e.target.value)}
+ onKeyDown={(e) => {
+ if (e.key === 'Escape') { setShowOcrPrompt(false) }
+ if (e.key === 'Enter') {
+ setShowOcrPrompt(false)
+ setMenuOpen(false)
+ setTextExtracting(true)
+ setTextExtractError(null)
+ onExtractText(entry, ocrLanguageInput.trim() || undefined)
+ .catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
+ .finally(() => setTextExtracting(false))
+ }
+ }}
+ placeholder={defaultOcrLanguages ?? 'eng'}
+ className="text-xs px-2 py-1 rounded-lg outline-none w-full"
+ style={{ backgroundColor: 'var(--background)', border: '1px solid var(--border)', color: 'var(--text-primary)' }}
+ title="Tesseract language(s) for this extraction (e.g. jpn+jpn_vert). Leave blank to use the configured default."
+ />
+
+ {
+ setShowOcrPrompt(false)
+ setMenuOpen(false)
+ setTextExtracting(true)
+ setTextExtractError(null)
+ onExtractText(entry, ocrLanguageInput.trim() || undefined)
+ .catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
+ .finally(() => setTextExtracting(false))
+ }}
+ className="text-xs px-2 py-1 rounded-lg"
+ style={{ backgroundColor: 'var(--accent)', color: '#fff' }}
+ >
+ Extract
+
+ setShowOcrPrompt(false)}
+ className="text-xs px-2 py-1"
+ style={{ color: 'var(--text-secondary)' }}
+ >
+ Cancel
+
+
+
+ )}
{onExtractText && entry.type === 'directory' && (
{
diff --git a/src/lib/ai-jobs.ts b/src/lib/ai-jobs.ts
index 70ca66e..6e4b8c6 100644
--- a/src/lib/ai-jobs.ts
+++ b/src/lib/ai-jobs.ts
@@ -34,6 +34,7 @@ interface AiJobRow {
started_at: number | null
completed_at: number | null
item_title: string | null
+ payload: string | null
}
function rowToJob(row: AiJobRow): AiJob {
@@ -75,6 +76,7 @@ export function enqueueJob(
jobType: AiJobType,
libraryId: string,
sourceLanguage?: string,
+ payload?: Record,
): string {
const db = getDb()
@@ -96,9 +98,9 @@ export function enqueueJob(
const metadata = jobType === 'translate' && sourceLanguage ? sourceLanguage : null
db.prepare(
- `INSERT INTO ai_jobs (id, item_key, library_id, job_type, status, error, attempt, max_retries, created_at, item_title)
- VALUES (?, ?, ?, ?, 'queued', ?, 0, ?, ?, ?)`
- ).run(id, itemKey, libraryId, jobType, metadata, maxRetries, Date.now(), title)
+ `INSERT INTO ai_jobs (id, item_key, library_id, job_type, status, error, attempt, max_retries, created_at, item_title, payload)
+ VALUES (?, ?, ?, ?, 'queued', ?, 0, ?, ?, ?, ?)`
+ ).run(id, itemKey, libraryId, jobType, metadata, maxRetries, Date.now(), title, payload ? JSON.stringify(payload) : null)
// Wake the processor
wakeProcessor()
@@ -251,6 +253,8 @@ async function processNextJob(): Promise {
// Extract sourceLanguage for translate jobs (stored in error field at enqueue)
const sourceLanguage = row.job_type === 'translate' ? row.error : null
+ // Parse job payload (carries per-call overrides, e.g. ocrLanguages for extract jobs)
+ const jobPayload = row.payload ? (JSON.parse(row.payload) as Record) : null
db.prepare(
"UPDATE ai_jobs SET status = 'running', started_at = ?, error = NULL WHERE id = ?"
@@ -265,7 +269,7 @@ async function processNextJob(): Promise {
await generateItemDescription(row.item_key)
break
case 'extract':
- await extractItemText(row.item_key)
+ await extractItemText(row.item_key, jobPayload?.ocrLanguages)
break
case 'translate':
await translateItemText(row.item_key, sourceLanguage || undefined)
diff --git a/src/lib/ai-tagger.ts b/src/lib/ai-tagger.ts
index a5b0acf..09a6510 100644
--- a/src/lib/ai-tagger.ts
+++ b/src/lib/ai-tagger.ts
@@ -538,7 +538,7 @@ async function extractWithTesseract(
* Translation is not performed automatically β call translateItemText() separately.
* Returns { extractedText, translatedText } where translatedText is always null.
*/
-export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
+export async function extractItemText(itemKey: string, ocrLanguagesOverride?: string): Promise<{ extractedText: string; translatedText: string | null }> {
const libraryId = itemKey.split(':')[0]
const config = getEffectiveAiConfig(libraryId)
@@ -567,7 +567,8 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
}
- const { ocrMode, ocrLanguages, ocrConfidenceThreshold } = config
+ const { ocrMode, ocrLanguages: configOcrLanguages, ocrConfidenceThreshold } = config
+ const ocrLanguages = ocrLanguagesOverride?.trim() || configOcrLanguages
// ββ Tesseract path ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if (ocrMode === 'tesseract' || ocrMode === 'hybrid') {
diff --git a/src/lib/db.ts b/src/lib/db.ts
index 9525cc7..bb6d2a4 100644
--- a/src/lib/db.ts
+++ b/src/lib/db.ts
@@ -338,4 +338,12 @@ function migrateAiJobs(db: Database.Database): void {
CREATE INDEX IF NOT EXISTS ai_jobs_status ON ai_jobs(status);
CREATE INDEX IF NOT EXISTS ai_jobs_created_at ON ai_jobs(created_at);
`)
+
+ // Add payload column if not present
+ const aiJobsRow = db
+ .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='ai_jobs'")
+ .get() as { sql: string } | undefined
+ if (aiJobsRow && !aiJobsRow.sql.includes('payload')) {
+ db.exec('ALTER TABLE ai_jobs ADD COLUMN payload TEXT')
+ }
}