add tesseract ocr
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import { NextRequest, NextResponse } from 'next/server'
|
||||
import { requireAdmin } from '@/lib/auth'
|
||||
import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries } from '@/lib/app-settings'
|
||||
import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries, type OcrMode } from '@/lib/app-settings'
|
||||
|
||||
export async function GET(request: NextRequest) {
|
||||
const auth = await requireAdmin(request)
|
||||
@@ -34,6 +34,9 @@ export async function PUT(request: NextRequest) {
|
||||
maxTokensDescribe?: number
|
||||
maxTokensExtract?: number
|
||||
maxTokensTranslate?: number
|
||||
ocrMode?: string
|
||||
ocrLanguages?: string
|
||||
ocrConfidenceThreshold?: number
|
||||
}
|
||||
try {
|
||||
body = await request.json()
|
||||
@@ -47,6 +50,7 @@ export async function PUT(request: NextRequest) {
|
||||
promptDescribe, promptTagger, promptExtract, promptTranslate,
|
||||
maxRetries,
|
||||
maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate,
|
||||
ocrMode, ocrLanguages, ocrConfidenceThreshold,
|
||||
} = body
|
||||
|
||||
if (typeof endpoint !== 'string') {
|
||||
@@ -75,6 +79,9 @@ export async function PUT(request: NextRequest) {
|
||||
typeof maxTokensDescribe === 'number' ? maxTokensDescribe : undefined,
|
||||
typeof maxTokensExtract === 'number' ? maxTokensExtract : undefined,
|
||||
typeof maxTokensTranslate === 'number' ? maxTokensTranslate : undefined,
|
||||
(ocrMode === 'hybrid' || ocrMode === 'tesseract' || ocrMode === 'llm') ? (ocrMode as OcrMode) : undefined,
|
||||
typeof ocrLanguages === 'string' ? ocrLanguages : undefined,
|
||||
typeof ocrConfidenceThreshold === 'number' ? ocrConfidenceThreshold : undefined,
|
||||
)
|
||||
|
||||
if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) {
|
||||
|
||||
@@ -20,6 +20,9 @@ interface AiSettings {
|
||||
maxTokensDescribe: number
|
||||
maxTokensExtract: number
|
||||
maxTokensTranslate: number
|
||||
ocrMode: 'hybrid' | 'tesseract' | 'llm'
|
||||
ocrLanguages: string
|
||||
ocrConfidenceThreshold: number
|
||||
}
|
||||
|
||||
interface AiJob {
|
||||
@@ -76,6 +79,7 @@ export default function AiTaggingPage() {
|
||||
promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '',
|
||||
maxRetries: 3,
|
||||
maxTokensTag: 8192, maxTokensDescribe: 8192, maxTokensExtract: 8192, maxTokensTranslate: 8192,
|
||||
ocrMode: 'hybrid', ocrLanguages: 'eng', ocrConfidenceThreshold: 70,
|
||||
})
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [saving, setSaving] = useState(false)
|
||||
@@ -644,6 +648,72 @@ export default function AiTaggingPage() {
|
||||
/>
|
||||
</Field>
|
||||
|
||||
<Field label="OCR Mode">
|
||||
<div className="flex gap-2">
|
||||
{(['hybrid', 'tesseract', 'llm'] as const).map((mode) => (
|
||||
<button
|
||||
key={mode}
|
||||
type="button"
|
||||
onClick={() => setSettings((s) => ({ ...s, ocrMode: mode }))}
|
||||
className="px-3 py-1.5 rounded-lg text-sm transition-colors"
|
||||
style={{
|
||||
backgroundColor: settings.ocrMode === mode ? 'var(--accent)' : 'var(--surface)',
|
||||
color: settings.ocrMode === mode ? '#fff' : 'var(--text-secondary)',
|
||||
border: '1px solid var(--border)',
|
||||
}}
|
||||
>
|
||||
{mode === 'hybrid' ? 'Hybrid' : mode === 'tesseract' ? 'Tesseract only' : 'LLM only'}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
|
||||
Hybrid runs local OCR first and falls back to the LLM when confidence is low. Tesseract only never calls the LLM. LLM only uses the original behaviour.
|
||||
</p>
|
||||
</Field>
|
||||
|
||||
<Field label="OCR Languages">
|
||||
<input
|
||||
type="text"
|
||||
value={settings.ocrLanguages}
|
||||
onChange={(e) => setSettings((s) => ({ ...s, ocrLanguages: e.target.value }))}
|
||||
placeholder="eng"
|
||||
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
|
||||
style={{
|
||||
backgroundColor: 'var(--background)',
|
||||
border: '1px solid var(--border)',
|
||||
color: 'var(--text-primary)',
|
||||
}}
|
||||
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||
/>
|
||||
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
|
||||
{`Tesseract language packs to use, joined with '+'. For Japanese manga use jpn+jpn_vert. Language data is downloaded automatically on first use.`}
|
||||
</p>
|
||||
</Field>
|
||||
|
||||
<Field label="OCR Confidence Threshold">
|
||||
<input
|
||||
type="number"
|
||||
min={0}
|
||||
max={100}
|
||||
value={settings.ocrConfidenceThreshold}
|
||||
onChange={(e) =>
|
||||
setSettings((s) => ({ ...s, ocrConfidenceThreshold: Math.max(0, Math.min(100, parseInt(e.target.value) || 70)) }))
|
||||
}
|
||||
className="w-24 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
|
||||
style={{
|
||||
backgroundColor: 'var(--background)',
|
||||
border: '1px solid var(--border)',
|
||||
color: 'var(--text-primary)',
|
||||
}}
|
||||
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||
/>
|
||||
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
|
||||
In hybrid mode, Tesseract results below this confidence score (0–100) fall back to the LLM. Default is 70.
|
||||
</p>
|
||||
</Field>
|
||||
|
||||
<Field label="Translation Model">
|
||||
<input
|
||||
type="text"
|
||||
|
||||
Reference in New Issue
Block a user