diff --git a/package-lock.json b/package-lock.json index 192a537..9dc18cb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,7 +17,8 @@ "node-cron": "^4.2.1", "react": "^19.2.4", "react-dom": "^19.2.4", - "sharp": "^0.34.5" + "sharp": "^0.34.5", + "tesseract.js": "^7.0.0" }, "devDependencies": { "@tailwindcss/postcss": "^4.2.2", @@ -2950,6 +2951,12 @@ "readable-stream": "^3.4.0" } }, + "node_modules/bmp-js": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz", + "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", + "license": "MIT" + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -4803,6 +4810,12 @@ "hermes-estree": "0.25.1" } }, + "node_modules/idb-keyval": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz", + "integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==", + "license": "Apache-2.0" + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -5288,6 +5301,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-url": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz", + "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", + "license": "MIT" + }, "node_modules/is-weakmap": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", @@ -6167,6 +6186,26 @@ "semver": "bin/semver.js" } }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/node-releases": { "version": "2.0.36", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", @@ -6315,6 +6354,15 @@ "wrappy": "1" } }, + "node_modules/opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==", + "license": "MIT", + "bin": { + "opencollective-postinstall": "index.js" + } + }, "node_modules/optionator": { "version": "0.9.4", "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", @@ -6747,6 +6795,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/regenerator-runtime": { + "version": "0.13.11", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", + "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", + "license": "MIT" + }, "node_modules/regexp.prototype.flags": { "version": "1.5.4", "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz", @@ -7585,6 +7639,30 @@ "streamx": "^2.12.5" } }, + "node_modules/tesseract.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-7.0.0.tgz", + "integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "bmp-js": "^0.1.0", + "idb-keyval": "^6.2.0", + "is-url": "^1.2.4", + "node-fetch": "^2.6.9", + "opencollective-postinstall": "^2.0.3", + "regenerator-runtime": "^0.13.3", + "tesseract.js-core": "^7.0.0", + "wasm-feature-detect": "^1.8.0", + "zlibjs": "^0.3.1" + } + }, + "node_modules/tesseract.js-core": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz", + "integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==", + "license": "Apache-2.0" + }, "node_modules/text-decoder": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz", @@ -7655,6 +7733,12 @@ "node": ">=8.0" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/ts-api-utils": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz", @@ -7955,6 +8039,28 @@ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", "license": "MIT" }, + "node_modules/wasm-feature-detect": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz", + "integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==", + "license": "Apache-2.0" + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -8237,6 +8343,15 @@ "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, + "node_modules/zlibjs": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz", + "integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/zod": { "version": "4.3.6", "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", diff --git a/package.json b/package.json index 0575972..1eca2bf 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,8 @@ "node-cron": "^4.2.1", "react": "^19.2.4", "react-dom": "^19.2.4", - "sharp": "^0.34.5" + "sharp": "^0.34.5", + "tesseract.js": "^7.0.0" }, "devDependencies": { "@tailwindcss/postcss": "^4.2.2", diff --git a/src/app/api/ai-settings/library/[id]/route.ts b/src/app/api/ai-settings/library/[id]/route.ts index b6afdc4..a274842 100644 --- a/src/app/api/ai-settings/library/[id]/route.ts +++ b/src/app/api/ai-settings/library/[id]/route.ts @@ -38,6 +38,10 @@ export async function PUT( promptTagger: typeof body.promptTagger === 'string' ? body.promptTagger : undefined, promptExtract: typeof body.promptExtract === 'string' ? body.promptExtract : undefined, promptTranslate: typeof body.promptTranslate === 'string' ? body.promptTranslate : undefined, + maxTokensTag: typeof body.maxTokensTag === 'number' ? body.maxTokensTag : (body.maxTokensTag === null ? null : undefined), + maxTokensDescribe: typeof body.maxTokensDescribe === 'number' ? body.maxTokensDescribe : (body.maxTokensDescribe === null ? null : undefined), + maxTokensExtract: typeof body.maxTokensExtract === 'number' ? body.maxTokensExtract : (body.maxTokensExtract === null ? null : undefined), + maxTokensTranslate: typeof body.maxTokensTranslate === 'number' ? body.maxTokensTranslate : (body.maxTokensTranslate === null ? null : undefined), }) return NextResponse.json(getLibraryAiOverrides(id)) diff --git a/src/app/api/ai-settings/route.ts b/src/app/api/ai-settings/route.ts index 219e559..f1f8a35 100644 --- a/src/app/api/ai-settings/route.ts +++ b/src/app/api/ai-settings/route.ts @@ -1,6 +1,6 @@ import { NextRequest, NextResponse } from 'next/server' import { requireAdmin } from '@/lib/auth' -import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries } from '@/lib/app-settings' +import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries, type OcrMode } from '@/lib/app-settings' export async function GET(request: NextRequest) { const auth = await requireAdmin(request) @@ -30,6 +30,13 @@ export async function PUT(request: NextRequest) { promptExtract?: string promptTranslate?: string maxRetries?: number + maxTokensTag?: number + maxTokensDescribe?: number + maxTokensExtract?: number + maxTokensTranslate?: number + ocrMode?: string + ocrLanguages?: string + ocrConfidenceThreshold?: number } try { body = await request.json() @@ -42,6 +49,8 @@ export async function PUT(request: NextRequest) { modelTagging, modelDescribe, modelExtract, modelTranslate, promptDescribe, promptTagger, promptExtract, promptTranslate, maxRetries, + maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate, + ocrMode, ocrLanguages, ocrConfidenceThreshold, } = body if (typeof endpoint !== 'string') { @@ -66,6 +75,13 @@ export async function PUT(request: NextRequest) { typeof promptTagger === 'string' ? promptTagger : undefined, typeof promptExtract === 'string' ? promptExtract : undefined, typeof promptTranslate === 'string' ? promptTranslate : undefined, + typeof maxTokensTag === 'number' ? maxTokensTag : undefined, + typeof maxTokensDescribe === 'number' ? maxTokensDescribe : undefined, + typeof maxTokensExtract === 'number' ? maxTokensExtract : undefined, + typeof maxTokensTranslate === 'number' ? maxTokensTranslate : undefined, + (ocrMode === 'hybrid' || ocrMode === 'tesseract' || ocrMode === 'llm') ? (ocrMode as OcrMode) : undefined, + typeof ocrLanguages === 'string' ? ocrLanguages : undefined, + typeof ocrConfidenceThreshold === 'number' ? ocrConfidenceThreshold : undefined, ) if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) { diff --git a/src/app/manage/ai-tagging/page.tsx b/src/app/manage/ai-tagging/page.tsx index e45b65c..e1a9942 100644 --- a/src/app/manage/ai-tagging/page.tsx +++ b/src/app/manage/ai-tagging/page.tsx @@ -16,6 +16,13 @@ interface AiSettings { promptExtract: string promptTranslate: string maxRetries: number + maxTokensTag: number + maxTokensDescribe: number + maxTokensExtract: number + maxTokensTranslate: number + ocrMode: 'hybrid' | 'tesseract' | 'llm' + ocrLanguages: string + ocrConfidenceThreshold: number } interface AiJob { @@ -47,6 +54,10 @@ interface LibraryOverride { promptTagger: string promptExtract: string promptTranslate: string + maxTokensTag: number | null + maxTokensDescribe: number | null + maxTokensExtract: number | null + maxTokensTranslate: number | null } function formatElapsed(startedAt: number): string { @@ -67,6 +78,8 @@ export default function AiTaggingPage() { enabled: false, preferredLanguage: 'English', promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '', maxRetries: 3, + maxTokensTag: 8192, maxTokensDescribe: 8192, maxTokensExtract: 8192, maxTokensTranslate: 8192, + ocrMode: 'hybrid', ocrLanguages: 'eng', ocrConfidenceThreshold: 70, }) const [loading, setLoading] = useState(true) const [saving, setSaving] = useState(false) @@ -296,7 +309,7 @@ export default function AiTaggingPage() { } } - const updateLibraryOverride = (libraryId: string, field: keyof LibraryOverride, value: string) => { + const updateLibraryOverride = (libraryId: string, field: keyof LibraryOverride, value: string | number | null) => { setLibraryOverrides((prev) => ({ ...prev, [libraryId]: { ...(prev[libraryId] ?? emptyOverride()), [field]: value }, @@ -544,6 +557,25 @@ export default function AiTaggingPage() { /> + + + setSettings((s) => ({ ...s, maxTokensTag: Math.max(1, parseInt(e.target.value) || 8192) })) + } + className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> + + + + + setSettings((s) => ({ ...s, maxTokensDescribe: Math.max(1, parseInt(e.target.value) || 8192) })) + } + className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> + + + + + setSettings((s) => ({ ...s, maxTokensExtract: Math.max(1, parseInt(e.target.value) || 8192) })) + } + className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> + + + +
+ {(['hybrid', 'tesseract', 'llm'] as const).map((mode) => ( + + ))} +
+

+ Hybrid runs local OCR first and falls back to the LLM when confidence is low. Tesseract only never calls the LLM. LLM only uses the original behaviour. +

+
+ + + setSettings((s) => ({ ...s, ocrLanguages: e.target.value }))} + placeholder="eng" + className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> +

+ {`Tesseract language packs to use, joined with '+'. For Japanese manga use jpn+jpn_vert. Language data is downloaded automatically on first use.`} +

+
+ + + + setSettings((s) => ({ ...s, ocrConfidenceThreshold: Math.max(0, Math.min(100, parseInt(e.target.value) || 70)) })) + } + className="w-24 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> +

+ In hybrid mode, Tesseract results below this confidence score (0–100) fall back to the LLM. Default is 70. +

+
+ + + + setSettings((s) => ({ ...s, maxTokensTranslate: Math.max(1, parseInt(e.target.value) || 8192) })) + } + className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2" + style={{ + backgroundColor: 'var(--background)', + border: '1px solid var(--border)', + color: 'var(--text-primary)', + }} + onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')} + onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')} + /> + +