feat: per-extraction OCR language override
Allow users to specify a Tesseract language string (e.g. jpn+jpn_vert)
on a per-extraction basis, overriding the global OCR language setting.
- Add payload column to ai_jobs table (migration) to carry per-call data
- Thread ocrLanguages payload through enqueueJob → processNextJob → extractItemText
- New GET /api/ai-settings/ocr endpoint (requireAuth) returns { ocrMode, ocrLanguages }
- ImageLightbox fetches OCR settings and shows a language input next to the
Extract Text button when mode is hybrid or tesseract (hidden for llm-only)
- MixedView fetches OCR settings and passes them down to EntryTile; kebab
Extract Text on images shows an inline language prompt before dispatching the job
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -83,6 +83,9 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
||||
setDoomScrollLoading(false)
|
||||
}, [currentPath])
|
||||
|
||||
const [ocrMode, setOcrMode] = useState<string | null>(null)
|
||||
const [defaultOcrLanguages, setDefaultOcrLanguages] = useState('eng')
|
||||
|
||||
const fetchAssignments = useCallback(() => {
|
||||
fetch(`/api/tags/library-assignments?libraryId=${encodeURIComponent(libraryId)}`)
|
||||
.then((r) => r.json())
|
||||
@@ -92,6 +95,16 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
||||
|
||||
useEffect(() => { fetchAssignments() }, [fetchAssignments])
|
||||
|
||||
useEffect(() => {
|
||||
fetch('/api/ai-settings/ocr')
|
||||
.then((r) => r.json())
|
||||
.then((d: { ocrMode: string; ocrLanguages: string }) => {
|
||||
setOcrMode(d.ocrMode)
|
||||
setDefaultOcrLanguages(d.ocrLanguages)
|
||||
})
|
||||
.catch(() => {})
|
||||
}, [])
|
||||
|
||||
const filtersActive = search !== '' || selectedTagIds.size > 0
|
||||
|
||||
const fetchRecursive = useCallback(() => {
|
||||
@@ -387,6 +400,8 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
||||
entry={entry}
|
||||
onOpen={handleEntry}
|
||||
onTag={handleTagEntry}
|
||||
ocrMode={ocrMode}
|
||||
defaultOcrLanguages={defaultOcrLanguages}
|
||||
onAiTag={async (e) => {
|
||||
const itemKey = itemKeyFor(e)
|
||||
const res = await fetch('/api/ai-tagging', {
|
||||
@@ -401,7 +416,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
||||
fetchAssignments()
|
||||
setFilterRefreshKey((k) => k + 1)
|
||||
}}
|
||||
onExtractText={async (e) => {
|
||||
onExtractText={async (e, ocrLanguages) => {
|
||||
if (e.type === 'directory') {
|
||||
// Bulk extract for directory
|
||||
const dirRel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
|
||||
@@ -420,7 +435,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
||||
const res = await fetch('/api/ai-tagging/extract-text', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ itemKey }),
|
||||
body: JSON.stringify({ itemKey, ...(ocrLanguages && { ocrLanguages }) }),
|
||||
})
|
||||
if (!res.ok) {
|
||||
const data = await res.json().catch(() => ({}))
|
||||
@@ -594,7 +609,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
||||
)
|
||||
}
|
||||
|
||||
function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText, onDescribe, onTranslate }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void>; onDescribe?: (e: FileEntry) => Promise<void>; onTranslate?: (e: FileEntry) => Promise<void> }) {
|
||||
function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText, onDescribe, onTranslate, ocrMode, defaultOcrLanguages }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry, ocrLanguages?: string) => Promise<void>; onDescribe?: (e: FileEntry) => Promise<void>; onTranslate?: (e: FileEntry) => Promise<void>; ocrMode?: string | null; defaultOcrLanguages?: string }) {
|
||||
type ImgState = 'loading' | 'loaded' | 'error'
|
||||
const [imgState, setImgState] = useState<ImgState>(
|
||||
entry.thumbnailUrl ? 'loading' : 'error'
|
||||
@@ -615,6 +630,8 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
|
||||
const [describeError, setDescribeError] = useState<string | null>(null)
|
||||
const [translating, setTranslating] = useState(false)
|
||||
const [translateError, setTranslateError] = useState<string | null>(null)
|
||||
const [showOcrPrompt, setShowOcrPrompt] = useState(false)
|
||||
const [ocrLanguageInput, setOcrLanguageInput] = useState('')
|
||||
|
||||
useEffect(() => {
|
||||
if (!menuOpen) return
|
||||
@@ -804,16 +821,21 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
|
||||
📝 Describe Folder
|
||||
</button>
|
||||
)}
|
||||
{onExtractText && entry.mediaType === 'image' && (
|
||||
{onExtractText && entry.mediaType === 'image' && !showOcrPrompt && (
|
||||
<button
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
setMenuOpen(false)
|
||||
setTextExtracting(true)
|
||||
setTextExtractError(null)
|
||||
onExtractText(entry)
|
||||
.catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
|
||||
.finally(() => setTextExtracting(false))
|
||||
if (ocrMode && ocrMode !== 'llm') {
|
||||
setOcrLanguageInput('')
|
||||
setShowOcrPrompt(true)
|
||||
} else {
|
||||
setMenuOpen(false)
|
||||
setTextExtracting(true)
|
||||
setTextExtractError(null)
|
||||
onExtractText(entry)
|
||||
.catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
|
||||
.finally(() => setTextExtracting(false))
|
||||
}
|
||||
}}
|
||||
disabled={textExtracting}
|
||||
className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
|
||||
@@ -824,6 +846,57 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
|
||||
🔍 Extract Text
|
||||
</button>
|
||||
)}
|
||||
{onExtractText && entry.mediaType === 'image' && showOcrPrompt && (
|
||||
<div className="px-4 py-2 flex flex-col gap-2" onClick={(e) => e.stopPropagation()}>
|
||||
<p className="text-xs" style={{ color: 'var(--text-secondary)' }}>OCR language</p>
|
||||
<input
|
||||
autoFocus
|
||||
type="text"
|
||||
value={ocrLanguageInput}
|
||||
onChange={(e) => setOcrLanguageInput(e.target.value)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Escape') { setShowOcrPrompt(false) }
|
||||
if (e.key === 'Enter') {
|
||||
setShowOcrPrompt(false)
|
||||
setMenuOpen(false)
|
||||
setTextExtracting(true)
|
||||
setTextExtractError(null)
|
||||
onExtractText(entry, ocrLanguageInput.trim() || undefined)
|
||||
.catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
|
||||
.finally(() => setTextExtracting(false))
|
||||
}
|
||||
}}
|
||||
placeholder={defaultOcrLanguages ?? 'eng'}
|
||||
className="text-xs px-2 py-1 rounded-lg outline-none w-full"
|
||||
style={{ backgroundColor: 'var(--background)', border: '1px solid var(--border)', color: 'var(--text-primary)' }}
|
||||
title="Tesseract language(s) for this extraction (e.g. jpn+jpn_vert). Leave blank to use the configured default."
|
||||
/>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
onClick={() => {
|
||||
setShowOcrPrompt(false)
|
||||
setMenuOpen(false)
|
||||
setTextExtracting(true)
|
||||
setTextExtractError(null)
|
||||
onExtractText(entry, ocrLanguageInput.trim() || undefined)
|
||||
.catch((err) => setTextExtractError(err instanceof Error ? err.message : 'Text extraction failed'))
|
||||
.finally(() => setTextExtracting(false))
|
||||
}}
|
||||
className="text-xs px-2 py-1 rounded-lg"
|
||||
style={{ backgroundColor: 'var(--accent)', color: '#fff' }}
|
||||
>
|
||||
Extract
|
||||
</button>
|
||||
<button
|
||||
onClick={() => setShowOcrPrompt(false)}
|
||||
className="text-xs px-2 py-1"
|
||||
style={{ color: 'var(--text-secondary)' }}
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{onExtractText && entry.type === 'directory' && (
|
||||
<button
|
||||
onClick={(e) => {
|
||||
|
||||
Reference in New Issue
Block a user