add tesseract ocr

This commit is contained in:
Garret Patti
2026-04-13 19:40:25 -04:00
parent 1350a6f94b
commit 9b2690f639
7 changed files with 299 additions and 9 deletions

View File

@@ -1,6 +1,6 @@
import { NextRequest, NextResponse } from 'next/server'
import { requireAdmin } from '@/lib/auth'
import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries } from '@/lib/app-settings'
import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries, type OcrMode } from '@/lib/app-settings'
export async function GET(request: NextRequest) {
const auth = await requireAdmin(request)
@@ -34,6 +34,9 @@ export async function PUT(request: NextRequest) {
maxTokensDescribe?: number
maxTokensExtract?: number
maxTokensTranslate?: number
ocrMode?: string
ocrLanguages?: string
ocrConfidenceThreshold?: number
}
try {
body = await request.json()
@@ -47,6 +50,7 @@ export async function PUT(request: NextRequest) {
promptDescribe, promptTagger, promptExtract, promptTranslate,
maxRetries,
maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate,
ocrMode, ocrLanguages, ocrConfidenceThreshold,
} = body
if (typeof endpoint !== 'string') {
@@ -75,6 +79,9 @@ export async function PUT(request: NextRequest) {
typeof maxTokensDescribe === 'number' ? maxTokensDescribe : undefined,
typeof maxTokensExtract === 'number' ? maxTokensExtract : undefined,
typeof maxTokensTranslate === 'number' ? maxTokensTranslate : undefined,
(ocrMode === 'hybrid' || ocrMode === 'tesseract' || ocrMode === 'llm') ? (ocrMode as OcrMode) : undefined,
typeof ocrLanguages === 'string' ? ocrLanguages : undefined,
typeof ocrConfidenceThreshold === 'number' ? ocrConfidenceThreshold : undefined,
)
if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) {

View File

@@ -20,6 +20,9 @@ interface AiSettings {
maxTokensDescribe: number
maxTokensExtract: number
maxTokensTranslate: number
ocrMode: 'hybrid' | 'tesseract' | 'llm'
ocrLanguages: string
ocrConfidenceThreshold: number
}
interface AiJob {
@@ -76,6 +79,7 @@ export default function AiTaggingPage() {
promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '',
maxRetries: 3,
maxTokensTag: 8192, maxTokensDescribe: 8192, maxTokensExtract: 8192, maxTokensTranslate: 8192,
ocrMode: 'hybrid', ocrLanguages: 'eng', ocrConfidenceThreshold: 70,
})
const [loading, setLoading] = useState(true)
const [saving, setSaving] = useState(false)
@@ -644,6 +648,72 @@ export default function AiTaggingPage() {
/>
</Field>
<Field label="OCR Mode">
<div className="flex gap-2">
{(['hybrid', 'tesseract', 'llm'] as const).map((mode) => (
<button
key={mode}
type="button"
onClick={() => setSettings((s) => ({ ...s, ocrMode: mode }))}
className="px-3 py-1.5 rounded-lg text-sm transition-colors"
style={{
backgroundColor: settings.ocrMode === mode ? 'var(--accent)' : 'var(--surface)',
color: settings.ocrMode === mode ? '#fff' : 'var(--text-secondary)',
border: '1px solid var(--border)',
}}
>
{mode === 'hybrid' ? 'Hybrid' : mode === 'tesseract' ? 'Tesseract only' : 'LLM only'}
</button>
))}
</div>
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
Hybrid runs local OCR first and falls back to the LLM when confidence is low. Tesseract only never calls the LLM. LLM only uses the original behaviour.
</p>
</Field>
<Field label="OCR Languages">
<input
type="text"
value={settings.ocrLanguages}
onChange={(e) => setSettings((s) => ({ ...s, ocrLanguages: e.target.value }))}
placeholder="eng"
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/>
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
{`Tesseract language packs to use, joined with '+'. For Japanese manga use jpn+jpn_vert. Language data is downloaded automatically on first use.`}
</p>
</Field>
<Field label="OCR Confidence Threshold">
<input
type="number"
min={0}
max={100}
value={settings.ocrConfidenceThreshold}
onChange={(e) =>
setSettings((s) => ({ ...s, ocrConfidenceThreshold: Math.max(0, Math.min(100, parseInt(e.target.value) || 70)) }))
}
className="w-24 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
style={{
backgroundColor: 'var(--background)',
border: '1px solid var(--border)',
color: 'var(--text-primary)',
}}
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
/>
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
In hybrid mode, Tesseract results below this confidence score (0100) fall back to the LLM. Default is 70.
</p>
</Field>
<Field label="Translation Model">
<input
type="text"