customize-context-length #25
117
package-lock.json
generated
117
package-lock.json
generated
@@ -17,7 +17,8 @@
|
|||||||
"node-cron": "^4.2.1",
|
"node-cron": "^4.2.1",
|
||||||
"react": "^19.2.4",
|
"react": "^19.2.4",
|
||||||
"react-dom": "^19.2.4",
|
"react-dom": "^19.2.4",
|
||||||
"sharp": "^0.34.5"
|
"sharp": "^0.34.5",
|
||||||
|
"tesseract.js": "^7.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@tailwindcss/postcss": "^4.2.2",
|
"@tailwindcss/postcss": "^4.2.2",
|
||||||
@@ -2950,6 +2951,12 @@
|
|||||||
"readable-stream": "^3.4.0"
|
"readable-stream": "^3.4.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/bmp-js": {
|
||||||
|
"version": "0.1.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz",
|
||||||
|
"integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/brace-expansion": {
|
"node_modules/brace-expansion": {
|
||||||
"version": "1.1.12",
|
"version": "1.1.12",
|
||||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||||
@@ -4803,6 +4810,12 @@
|
|||||||
"hermes-estree": "0.25.1"
|
"hermes-estree": "0.25.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/idb-keyval": {
|
||||||
|
"version": "6.2.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz",
|
||||||
|
"integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==",
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
"node_modules/ieee754": {
|
"node_modules/ieee754": {
|
||||||
"version": "1.2.1",
|
"version": "1.2.1",
|
||||||
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||||
@@ -5288,6 +5301,12 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/is-url": {
|
||||||
|
"version": "1.2.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz",
|
||||||
|
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/is-weakmap": {
|
"node_modules/is-weakmap": {
|
||||||
"version": "2.0.2",
|
"version": "2.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz",
|
||||||
@@ -6167,6 +6186,26 @@
|
|||||||
"semver": "bin/semver.js"
|
"semver": "bin/semver.js"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/node-fetch": {
|
||||||
|
"version": "2.7.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||||
|
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"whatwg-url": "^5.0.0"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": "4.x || >=6.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"encoding": "^0.1.0"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"encoding": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/node-releases": {
|
"node_modules/node-releases": {
|
||||||
"version": "2.0.36",
|
"version": "2.0.36",
|
||||||
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz",
|
"resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz",
|
||||||
@@ -6315,6 +6354,15 @@
|
|||||||
"wrappy": "1"
|
"wrappy": "1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/opencollective-postinstall": {
|
||||||
|
"version": "2.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz",
|
||||||
|
"integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==",
|
||||||
|
"license": "MIT",
|
||||||
|
"bin": {
|
||||||
|
"opencollective-postinstall": "index.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/optionator": {
|
"node_modules/optionator": {
|
||||||
"version": "0.9.4",
|
"version": "0.9.4",
|
||||||
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
|
"resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz",
|
||||||
@@ -6747,6 +6795,12 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/regenerator-runtime": {
|
||||||
|
"version": "0.13.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
|
||||||
|
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/regexp.prototype.flags": {
|
"node_modules/regexp.prototype.flags": {
|
||||||
"version": "1.5.4",
|
"version": "1.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz",
|
"resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.4.tgz",
|
||||||
@@ -7585,6 +7639,30 @@
|
|||||||
"streamx": "^2.12.5"
|
"streamx": "^2.12.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/tesseract.js": {
|
||||||
|
"version": "7.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-7.0.0.tgz",
|
||||||
|
"integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==",
|
||||||
|
"hasInstallScript": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"bmp-js": "^0.1.0",
|
||||||
|
"idb-keyval": "^6.2.0",
|
||||||
|
"is-url": "^1.2.4",
|
||||||
|
"node-fetch": "^2.6.9",
|
||||||
|
"opencollective-postinstall": "^2.0.3",
|
||||||
|
"regenerator-runtime": "^0.13.3",
|
||||||
|
"tesseract.js-core": "^7.0.0",
|
||||||
|
"wasm-feature-detect": "^1.8.0",
|
||||||
|
"zlibjs": "^0.3.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/tesseract.js-core": {
|
||||||
|
"version": "7.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz",
|
||||||
|
"integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==",
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
"node_modules/text-decoder": {
|
"node_modules/text-decoder": {
|
||||||
"version": "1.2.7",
|
"version": "1.2.7",
|
||||||
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz",
|
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.7.tgz",
|
||||||
@@ -7655,6 +7733,12 @@
|
|||||||
"node": ">=8.0"
|
"node": ">=8.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/tr46": {
|
||||||
|
"version": "0.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
||||||
|
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/ts-api-utils": {
|
"node_modules/ts-api-utils": {
|
||||||
"version": "2.5.0",
|
"version": "2.5.0",
|
||||||
"resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz",
|
"resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz",
|
||||||
@@ -7955,6 +8039,28 @@
|
|||||||
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
|
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/wasm-feature-detect": {
|
||||||
|
"version": "1.8.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz",
|
||||||
|
"integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==",
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
|
"node_modules/webidl-conversions": {
|
||||||
|
"version": "3.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||||
|
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
|
||||||
|
"license": "BSD-2-Clause"
|
||||||
|
},
|
||||||
|
"node_modules/whatwg-url": {
|
||||||
|
"version": "5.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
||||||
|
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"tr46": "~0.0.3",
|
||||||
|
"webidl-conversions": "^3.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/which": {
|
"node_modules/which": {
|
||||||
"version": "2.0.2",
|
"version": "2.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
||||||
@@ -8237,6 +8343,15 @@
|
|||||||
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
|
"node": "^12.22.0 || ^14.17.0 || >=16.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/zlibjs": {
|
||||||
|
"version": "0.3.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz",
|
||||||
|
"integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/zod": {
|
"node_modules/zod": {
|
||||||
"version": "4.3.6",
|
"version": "4.3.6",
|
||||||
"resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
|
"resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
|
||||||
|
|||||||
@@ -20,7 +20,8 @@
|
|||||||
"node-cron": "^4.2.1",
|
"node-cron": "^4.2.1",
|
||||||
"react": "^19.2.4",
|
"react": "^19.2.4",
|
||||||
"react-dom": "^19.2.4",
|
"react-dom": "^19.2.4",
|
||||||
"sharp": "^0.34.5"
|
"sharp": "^0.34.5",
|
||||||
|
"tesseract.js": "^7.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@tailwindcss/postcss": "^4.2.2",
|
"@tailwindcss/postcss": "^4.2.2",
|
||||||
|
|||||||
@@ -38,6 +38,10 @@ export async function PUT(
|
|||||||
promptTagger: typeof body.promptTagger === 'string' ? body.promptTagger : undefined,
|
promptTagger: typeof body.promptTagger === 'string' ? body.promptTagger : undefined,
|
||||||
promptExtract: typeof body.promptExtract === 'string' ? body.promptExtract : undefined,
|
promptExtract: typeof body.promptExtract === 'string' ? body.promptExtract : undefined,
|
||||||
promptTranslate: typeof body.promptTranslate === 'string' ? body.promptTranslate : undefined,
|
promptTranslate: typeof body.promptTranslate === 'string' ? body.promptTranslate : undefined,
|
||||||
|
maxTokensTag: typeof body.maxTokensTag === 'number' ? body.maxTokensTag : (body.maxTokensTag === null ? null : undefined),
|
||||||
|
maxTokensDescribe: typeof body.maxTokensDescribe === 'number' ? body.maxTokensDescribe : (body.maxTokensDescribe === null ? null : undefined),
|
||||||
|
maxTokensExtract: typeof body.maxTokensExtract === 'number' ? body.maxTokensExtract : (body.maxTokensExtract === null ? null : undefined),
|
||||||
|
maxTokensTranslate: typeof body.maxTokensTranslate === 'number' ? body.maxTokensTranslate : (body.maxTokensTranslate === null ? null : undefined),
|
||||||
})
|
})
|
||||||
|
|
||||||
return NextResponse.json(getLibraryAiOverrides(id))
|
return NextResponse.json(getLibraryAiOverrides(id))
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { NextRequest, NextResponse } from 'next/server'
|
import { NextRequest, NextResponse } from 'next/server'
|
||||||
import { requireAdmin } from '@/lib/auth'
|
import { requireAdmin } from '@/lib/auth'
|
||||||
import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries } from '@/lib/app-settings'
|
import { getAiConfig, updateAiConfig, getPreferredLanguage, setPreferredLanguage, getAiMaxRetries, setAiMaxRetries, type OcrMode } from '@/lib/app-settings'
|
||||||
|
|
||||||
export async function GET(request: NextRequest) {
|
export async function GET(request: NextRequest) {
|
||||||
const auth = await requireAdmin(request)
|
const auth = await requireAdmin(request)
|
||||||
@@ -30,6 +30,13 @@ export async function PUT(request: NextRequest) {
|
|||||||
promptExtract?: string
|
promptExtract?: string
|
||||||
promptTranslate?: string
|
promptTranslate?: string
|
||||||
maxRetries?: number
|
maxRetries?: number
|
||||||
|
maxTokensTag?: number
|
||||||
|
maxTokensDescribe?: number
|
||||||
|
maxTokensExtract?: number
|
||||||
|
maxTokensTranslate?: number
|
||||||
|
ocrMode?: string
|
||||||
|
ocrLanguages?: string
|
||||||
|
ocrConfidenceThreshold?: number
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
body = await request.json()
|
body = await request.json()
|
||||||
@@ -42,6 +49,8 @@ export async function PUT(request: NextRequest) {
|
|||||||
modelTagging, modelDescribe, modelExtract, modelTranslate,
|
modelTagging, modelDescribe, modelExtract, modelTranslate,
|
||||||
promptDescribe, promptTagger, promptExtract, promptTranslate,
|
promptDescribe, promptTagger, promptExtract, promptTranslate,
|
||||||
maxRetries,
|
maxRetries,
|
||||||
|
maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate,
|
||||||
|
ocrMode, ocrLanguages, ocrConfidenceThreshold,
|
||||||
} = body
|
} = body
|
||||||
|
|
||||||
if (typeof endpoint !== 'string') {
|
if (typeof endpoint !== 'string') {
|
||||||
@@ -66,6 +75,13 @@ export async function PUT(request: NextRequest) {
|
|||||||
typeof promptTagger === 'string' ? promptTagger : undefined,
|
typeof promptTagger === 'string' ? promptTagger : undefined,
|
||||||
typeof promptExtract === 'string' ? promptExtract : undefined,
|
typeof promptExtract === 'string' ? promptExtract : undefined,
|
||||||
typeof promptTranslate === 'string' ? promptTranslate : undefined,
|
typeof promptTranslate === 'string' ? promptTranslate : undefined,
|
||||||
|
typeof maxTokensTag === 'number' ? maxTokensTag : undefined,
|
||||||
|
typeof maxTokensDescribe === 'number' ? maxTokensDescribe : undefined,
|
||||||
|
typeof maxTokensExtract === 'number' ? maxTokensExtract : undefined,
|
||||||
|
typeof maxTokensTranslate === 'number' ? maxTokensTranslate : undefined,
|
||||||
|
(ocrMode === 'hybrid' || ocrMode === 'tesseract' || ocrMode === 'llm') ? (ocrMode as OcrMode) : undefined,
|
||||||
|
typeof ocrLanguages === 'string' ? ocrLanguages : undefined,
|
||||||
|
typeof ocrConfidenceThreshold === 'number' ? ocrConfidenceThreshold : undefined,
|
||||||
)
|
)
|
||||||
|
|
||||||
if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) {
|
if (typeof preferredLanguage === 'string' && preferredLanguage.trim()) {
|
||||||
|
|||||||
@@ -16,6 +16,13 @@ interface AiSettings {
|
|||||||
promptExtract: string
|
promptExtract: string
|
||||||
promptTranslate: string
|
promptTranslate: string
|
||||||
maxRetries: number
|
maxRetries: number
|
||||||
|
maxTokensTag: number
|
||||||
|
maxTokensDescribe: number
|
||||||
|
maxTokensExtract: number
|
||||||
|
maxTokensTranslate: number
|
||||||
|
ocrMode: 'hybrid' | 'tesseract' | 'llm'
|
||||||
|
ocrLanguages: string
|
||||||
|
ocrConfidenceThreshold: number
|
||||||
}
|
}
|
||||||
|
|
||||||
interface AiJob {
|
interface AiJob {
|
||||||
@@ -47,6 +54,10 @@ interface LibraryOverride {
|
|||||||
promptTagger: string
|
promptTagger: string
|
||||||
promptExtract: string
|
promptExtract: string
|
||||||
promptTranslate: string
|
promptTranslate: string
|
||||||
|
maxTokensTag: number | null
|
||||||
|
maxTokensDescribe: number | null
|
||||||
|
maxTokensExtract: number | null
|
||||||
|
maxTokensTranslate: number | null
|
||||||
}
|
}
|
||||||
|
|
||||||
function formatElapsed(startedAt: number): string {
|
function formatElapsed(startedAt: number): string {
|
||||||
@@ -67,6 +78,8 @@ export default function AiTaggingPage() {
|
|||||||
enabled: false, preferredLanguage: 'English',
|
enabled: false, preferredLanguage: 'English',
|
||||||
promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '',
|
promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '',
|
||||||
maxRetries: 3,
|
maxRetries: 3,
|
||||||
|
maxTokensTag: 8192, maxTokensDescribe: 8192, maxTokensExtract: 8192, maxTokensTranslate: 8192,
|
||||||
|
ocrMode: 'hybrid', ocrLanguages: 'eng', ocrConfidenceThreshold: 70,
|
||||||
})
|
})
|
||||||
const [loading, setLoading] = useState(true)
|
const [loading, setLoading] = useState(true)
|
||||||
const [saving, setSaving] = useState(false)
|
const [saving, setSaving] = useState(false)
|
||||||
@@ -296,7 +309,7 @@ export default function AiTaggingPage() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const updateLibraryOverride = (libraryId: string, field: keyof LibraryOverride, value: string) => {
|
const updateLibraryOverride = (libraryId: string, field: keyof LibraryOverride, value: string | number | null) => {
|
||||||
setLibraryOverrides((prev) => ({
|
setLibraryOverrides((prev) => ({
|
||||||
...prev,
|
...prev,
|
||||||
[libraryId]: { ...(prev[libraryId] ?? emptyOverride()), [field]: value },
|
[libraryId]: { ...(prev[libraryId] ?? emptyOverride()), [field]: value },
|
||||||
@@ -544,6 +557,25 @@ export default function AiTaggingPage() {
|
|||||||
/>
|
/>
|
||||||
</Field>
|
</Field>
|
||||||
|
|
||||||
|
<Field label="Tagging Max Tokens">
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min={1}
|
||||||
|
value={settings.maxTokensTag}
|
||||||
|
onChange={(e) =>
|
||||||
|
setSettings((s) => ({ ...s, maxTokensTag: Math.max(1, parseInt(e.target.value) || 8192) }))
|
||||||
|
}
|
||||||
|
className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
|
||||||
|
style={{
|
||||||
|
backgroundColor: 'var(--background)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
color: 'var(--text-primary)',
|
||||||
|
}}
|
||||||
|
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||||
|
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||||
|
/>
|
||||||
|
</Field>
|
||||||
|
|
||||||
<Field label="Description Model">
|
<Field label="Description Model">
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
@@ -561,6 +593,25 @@ export default function AiTaggingPage() {
|
|||||||
/>
|
/>
|
||||||
</Field>
|
</Field>
|
||||||
|
|
||||||
|
<Field label="Description Max Tokens">
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min={1}
|
||||||
|
value={settings.maxTokensDescribe}
|
||||||
|
onChange={(e) =>
|
||||||
|
setSettings((s) => ({ ...s, maxTokensDescribe: Math.max(1, parseInt(e.target.value) || 8192) }))
|
||||||
|
}
|
||||||
|
className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
|
||||||
|
style={{
|
||||||
|
backgroundColor: 'var(--background)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
color: 'var(--text-primary)',
|
||||||
|
}}
|
||||||
|
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||||
|
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||||
|
/>
|
||||||
|
</Field>
|
||||||
|
|
||||||
<Field label="Text Extraction Model">
|
<Field label="Text Extraction Model">
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
@@ -578,6 +629,91 @@ export default function AiTaggingPage() {
|
|||||||
/>
|
/>
|
||||||
</Field>
|
</Field>
|
||||||
|
|
||||||
|
<Field label="Text Extraction Max Tokens">
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min={1}
|
||||||
|
value={settings.maxTokensExtract}
|
||||||
|
onChange={(e) =>
|
||||||
|
setSettings((s) => ({ ...s, maxTokensExtract: Math.max(1, parseInt(e.target.value) || 8192) }))
|
||||||
|
}
|
||||||
|
className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
|
||||||
|
style={{
|
||||||
|
backgroundColor: 'var(--background)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
color: 'var(--text-primary)',
|
||||||
|
}}
|
||||||
|
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||||
|
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||||
|
/>
|
||||||
|
</Field>
|
||||||
|
|
||||||
|
<Field label="OCR Mode">
|
||||||
|
<div className="flex gap-2">
|
||||||
|
{(['hybrid', 'tesseract', 'llm'] as const).map((mode) => (
|
||||||
|
<button
|
||||||
|
key={mode}
|
||||||
|
type="button"
|
||||||
|
onClick={() => setSettings((s) => ({ ...s, ocrMode: mode }))}
|
||||||
|
className="px-3 py-1.5 rounded-lg text-sm transition-colors"
|
||||||
|
style={{
|
||||||
|
backgroundColor: settings.ocrMode === mode ? 'var(--accent)' : 'var(--surface)',
|
||||||
|
color: settings.ocrMode === mode ? '#fff' : 'var(--text-secondary)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{mode === 'hybrid' ? 'Hybrid' : mode === 'tesseract' ? 'Tesseract only' : 'LLM only'}
|
||||||
|
</button>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
|
||||||
|
Hybrid runs local OCR first and falls back to the LLM when confidence is low. Tesseract only never calls the LLM. LLM only uses the original behaviour.
|
||||||
|
</p>
|
||||||
|
</Field>
|
||||||
|
|
||||||
|
<Field label="OCR Languages">
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
value={settings.ocrLanguages}
|
||||||
|
onChange={(e) => setSettings((s) => ({ ...s, ocrLanguages: e.target.value }))}
|
||||||
|
placeholder="eng"
|
||||||
|
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
|
||||||
|
style={{
|
||||||
|
backgroundColor: 'var(--background)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
color: 'var(--text-primary)',
|
||||||
|
}}
|
||||||
|
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||||
|
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||||
|
/>
|
||||||
|
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
|
||||||
|
{`Tesseract language packs to use, joined with '+'. For Japanese manga use jpn+jpn_vert. Language data is downloaded automatically on first use.`}
|
||||||
|
</p>
|
||||||
|
</Field>
|
||||||
|
|
||||||
|
<Field label="OCR Confidence Threshold">
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min={0}
|
||||||
|
max={100}
|
||||||
|
value={settings.ocrConfidenceThreshold}
|
||||||
|
onChange={(e) =>
|
||||||
|
setSettings((s) => ({ ...s, ocrConfidenceThreshold: Math.max(0, Math.min(100, parseInt(e.target.value) || 70)) }))
|
||||||
|
}
|
||||||
|
className="w-24 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
|
||||||
|
style={{
|
||||||
|
backgroundColor: 'var(--background)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
color: 'var(--text-primary)',
|
||||||
|
}}
|
||||||
|
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||||
|
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||||
|
/>
|
||||||
|
<p className="mt-1 text-xs" style={{ color: 'var(--text-secondary)' }}>
|
||||||
|
In hybrid mode, Tesseract results below this confidence score (0–100) fall back to the LLM. Default is 70.
|
||||||
|
</p>
|
||||||
|
</Field>
|
||||||
|
|
||||||
<Field label="Translation Model">
|
<Field label="Translation Model">
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
@@ -595,6 +731,25 @@ export default function AiTaggingPage() {
|
|||||||
/>
|
/>
|
||||||
</Field>
|
</Field>
|
||||||
|
|
||||||
|
<Field label="Translation Max Tokens">
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min={1}
|
||||||
|
value={settings.maxTokensTranslate}
|
||||||
|
onChange={(e) =>
|
||||||
|
setSettings((s) => ({ ...s, maxTokensTranslate: Math.max(1, parseInt(e.target.value) || 8192) }))
|
||||||
|
}
|
||||||
|
className="w-32 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
|
||||||
|
style={{
|
||||||
|
backgroundColor: 'var(--background)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
color: 'var(--text-primary)',
|
||||||
|
}}
|
||||||
|
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||||
|
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||||
|
/>
|
||||||
|
</Field>
|
||||||
|
|
||||||
<Field label="Automatic Tagging">
|
<Field label="Automatic Tagging">
|
||||||
<label className="flex items-center gap-3 cursor-pointer select-none">
|
<label className="flex items-center gap-3 cursor-pointer select-none">
|
||||||
<div
|
<div
|
||||||
@@ -890,7 +1045,7 @@ export default function AiTaggingPage() {
|
|||||||
<Field key={field} label={label}>
|
<Field key={field} label={label}>
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
value={overrides[field]}
|
value={overrides[field] as string}
|
||||||
onChange={(e) => updateLibraryOverride(lib.id, field, e.target.value)}
|
onChange={(e) => updateLibraryOverride(lib.id, field, e.target.value)}
|
||||||
placeholder={`Leave blank to use global default${settings[field as keyof AiSettings] ? ` (${settings[field as keyof AiSettings]})` : ''}`}
|
placeholder={`Leave blank to use global default${settings[field as keyof AiSettings] ? ` (${settings[field as keyof AiSettings]})` : ''}`}
|
||||||
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
|
className="w-full rounded-lg px-3 py-2 text-sm font-mono outline-none focus:ring-2"
|
||||||
@@ -906,6 +1061,39 @@ export default function AiTaggingPage() {
|
|||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div className="flex flex-col gap-3">
|
||||||
|
<p className="text-xs font-medium uppercase tracking-wide" style={{ color: 'var(--text-secondary)' }}>Max Tokens</p>
|
||||||
|
{(
|
||||||
|
[
|
||||||
|
['maxTokensTag', 'Tagging', 'maxTokensTag'] as const,
|
||||||
|
['maxTokensDescribe', 'Description', 'maxTokensDescribe'] as const,
|
||||||
|
['maxTokensExtract', 'Text Extraction', 'maxTokensExtract'] as const,
|
||||||
|
['maxTokensTranslate', 'Translation', 'maxTokensTranslate'] as const,
|
||||||
|
]
|
||||||
|
).map(([field, label, globalField]) => (
|
||||||
|
<Field key={field} label={label}>
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
min={1}
|
||||||
|
value={overrides[field] ?? ''}
|
||||||
|
placeholder={`Leave blank to use global default (${settings[globalField]})`}
|
||||||
|
onChange={(e) => {
|
||||||
|
const raw = e.target.value
|
||||||
|
updateLibraryOverride(lib.id, field, raw === '' ? null : Math.max(1, parseInt(raw) || 1))
|
||||||
|
}}
|
||||||
|
className="w-40 rounded-lg px-3 py-2 text-sm outline-none focus:ring-2"
|
||||||
|
style={{
|
||||||
|
backgroundColor: 'var(--background)',
|
||||||
|
border: '1px solid var(--border)',
|
||||||
|
color: 'var(--text-primary)',
|
||||||
|
}}
|
||||||
|
onFocus={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--accent)')}
|
||||||
|
onBlur={(e) => ((e.currentTarget as HTMLElement).style.borderColor = 'var(--border)')}
|
||||||
|
/>
|
||||||
|
</Field>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
|
||||||
<div className="flex flex-col gap-3">
|
<div className="flex flex-col gap-3">
|
||||||
<p className="text-xs font-medium uppercase tracking-wide" style={{ color: 'var(--text-secondary)' }}>Prompts</p>
|
<p className="text-xs font-medium uppercase tracking-wide" style={{ color: 'var(--text-secondary)' }}>Prompts</p>
|
||||||
{(
|
{(
|
||||||
@@ -919,7 +1107,7 @@ export default function AiTaggingPage() {
|
|||||||
<Field key={field} label={label}>
|
<Field key={field} label={label}>
|
||||||
<textarea
|
<textarea
|
||||||
rows={3}
|
rows={3}
|
||||||
value={overrides[field]}
|
value={overrides[field] as string}
|
||||||
onChange={(e) => updateLibraryOverride(lib.id, field, e.target.value)}
|
onChange={(e) => updateLibraryOverride(lib.id, field, e.target.value)}
|
||||||
placeholder={globalValue ? `Leave blank to use global default:\n${globalValue}` : 'Leave blank to use global default'}
|
placeholder={globalValue ? `Leave blank to use global default:\n${globalValue}` : 'Leave blank to use global default'}
|
||||||
className="w-full rounded-lg px-3 py-2 text-sm outline-none focus:ring-2 resize-y"
|
className="w-full rounded-lg px-3 py-2 text-sm outline-none focus:ring-2 resize-y"
|
||||||
@@ -1010,6 +1198,7 @@ function emptyOverride(): LibraryOverride {
|
|||||||
return {
|
return {
|
||||||
modelTagging: '', modelDescribe: '', modelExtract: '', modelTranslate: '',
|
modelTagging: '', modelDescribe: '', modelExtract: '', modelTranslate: '',
|
||||||
promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '',
|
promptDescribe: '', promptTagger: '', promptExtract: '', promptTranslate: '',
|
||||||
|
maxTokensTag: null, maxTokensDescribe: null, maxTokensExtract: null, maxTokensTranslate: null,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -414,7 +414,7 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
|
|||||||
;(e.currentTarget as HTMLElement).style.color = 'var(--text-secondary)'
|
;(e.currentTarget as HTMLElement).style.color = 'var(--text-secondary)'
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
{retranslating ? '⟳ Translating…' : '🌐 Re-translate'}
|
{retranslating ? '⟳ Translating…' : translatedText ? '🌐 Re-translate' : '🌐 Translate'}
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -453,6 +453,18 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
|
onTranslate={async (e) => {
|
||||||
|
const itemKey = itemKeyFor(e)
|
||||||
|
const res = await fetch('/api/ai-tagging/translate', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ itemKey }),
|
||||||
|
})
|
||||||
|
if (!res.ok) {
|
||||||
|
const data = await res.json().catch(() => ({}))
|
||||||
|
throw new Error((data as { error?: string }).error ?? 'Translation failed')
|
||||||
|
}
|
||||||
|
}}
|
||||||
onDelete={(e) => {
|
onDelete={(e) => {
|
||||||
const rel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
|
const rel = filtersActive ? e.name : (currentPath ? `${currentPath}/${e.name}` : e.name)
|
||||||
fetch(`/api/browse?libraryId=${encodeURIComponent(libraryId)}&path=${encodeURIComponent(rel)}`, { method: 'DELETE' })
|
fetch(`/api/browse?libraryId=${encodeURIComponent(libraryId)}&path=${encodeURIComponent(rel)}`, { method: 'DELETE' })
|
||||||
@@ -582,7 +594,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText, onDescribe }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void>; onDescribe?: (e: FileEntry) => Promise<void> }) {
|
function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtractText, onDescribe, onTranslate }: { entry: FileEntry; onOpen: (e: FileEntry) => void; onTag: (e: FileEntry) => void; onDelete?: (e: FileEntry) => void; onRename?: (e: FileEntry, newName: string) => Promise<boolean>; onAiTag?: (e: FileEntry) => Promise<void>; onExtractText?: (e: FileEntry) => Promise<void>; onDescribe?: (e: FileEntry) => Promise<void>; onTranslate?: (e: FileEntry) => Promise<void> }) {
|
||||||
type ImgState = 'loading' | 'loaded' | 'error'
|
type ImgState = 'loading' | 'loaded' | 'error'
|
||||||
const [imgState, setImgState] = useState<ImgState>(
|
const [imgState, setImgState] = useState<ImgState>(
|
||||||
entry.thumbnailUrl ? 'loading' : 'error'
|
entry.thumbnailUrl ? 'loading' : 'error'
|
||||||
@@ -601,6 +613,8 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
|
|||||||
const [textExtractError, setTextExtractError] = useState<string | null>(null)
|
const [textExtractError, setTextExtractError] = useState<string | null>(null)
|
||||||
const [describing, setDescribing] = useState(false)
|
const [describing, setDescribing] = useState(false)
|
||||||
const [describeError, setDescribeError] = useState<string | null>(null)
|
const [describeError, setDescribeError] = useState<string | null>(null)
|
||||||
|
const [translating, setTranslating] = useState(false)
|
||||||
|
const [translateError, setTranslateError] = useState<string | null>(null)
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!menuOpen) return
|
if (!menuOpen) return
|
||||||
@@ -830,6 +844,26 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
|
|||||||
🔍 Extract Text for Folder
|
🔍 Extract Text for Folder
|
||||||
</button>
|
</button>
|
||||||
)}
|
)}
|
||||||
|
{onTranslate && entry.mediaType === 'image' && (
|
||||||
|
<button
|
||||||
|
onClick={(e) => {
|
||||||
|
e.stopPropagation()
|
||||||
|
setMenuOpen(false)
|
||||||
|
setTranslating(true)
|
||||||
|
setTranslateError(null)
|
||||||
|
onTranslate(entry)
|
||||||
|
.catch((err) => setTranslateError(err instanceof Error ? err.message : 'Translation failed'))
|
||||||
|
.finally(() => setTranslating(false))
|
||||||
|
}}
|
||||||
|
disabled={translating}
|
||||||
|
className="flex items-center gap-2 w-full px-4 py-2 text-sm text-left transition-colors disabled:opacity-50"
|
||||||
|
style={{ color: 'var(--text-primary)' }}
|
||||||
|
onMouseEnter={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'var(--border)')}
|
||||||
|
onMouseLeave={(e) => ((e.currentTarget as HTMLElement).style.backgroundColor = 'transparent')}
|
||||||
|
>
|
||||||
|
🌐 Translate
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
{onRename && (
|
{onRename && (
|
||||||
<button
|
<button
|
||||||
onClick={(e) => {
|
onClick={(e) => {
|
||||||
@@ -929,6 +963,28 @@ function EntryTile({ entry, onOpen, onTag, onDelete, onRename, onAiTag, onExtrac
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Translation status overlay */}
|
||||||
|
{(translating || translateError) && (
|
||||||
|
<div
|
||||||
|
className="absolute inset-x-0 bottom-0 z-10 px-2 py-1.5 text-xs"
|
||||||
|
style={{ backgroundColor: translateError ? 'rgba(127,29,29,0.9)' : 'rgba(0,0,0,0.75)' }}
|
||||||
|
onClick={(e) => e.stopPropagation()}
|
||||||
|
>
|
||||||
|
<span style={{ color: translateError ? '#fca5a5' : 'var(--text-secondary)' }}>
|
||||||
|
{translateError ?? 'Translating…'}
|
||||||
|
</span>
|
||||||
|
{translateError && (
|
||||||
|
<button
|
||||||
|
onClick={() => setTranslateError(null)}
|
||||||
|
className="ml-2 underline text-xs"
|
||||||
|
style={{ color: '#fca5a5' }}
|
||||||
|
>
|
||||||
|
dismiss
|
||||||
|
</button>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Delete confirmation overlay */}
|
{/* Delete confirmation overlay */}
|
||||||
{confirming && (
|
{confirming && (
|
||||||
<div
|
<div
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import type { Library, Tag, TagCategory } from '@/types'
|
|||||||
import { getDb } from './db'
|
import { getDb } from './db'
|
||||||
import { getAiConfig, getEffectiveAiConfig, getPreferredLanguage } from './app-settings'
|
import { getAiConfig, getEffectiveAiConfig, getPreferredLanguage } from './app-settings'
|
||||||
import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags'
|
import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags'
|
||||||
import { getAiImagePath, getVideoFramePaths } from './thumbnails'
|
import { getAiImagePath, getOcrImagePath, getVideoFramePaths } from './thumbnails'
|
||||||
import { findFile } from './media-utils'
|
import { findFile } from './media-utils'
|
||||||
import { getLibrary, resolveLibraryRoot } from './libraries'
|
import { getLibrary, resolveLibraryRoot } from './libraries'
|
||||||
|
|
||||||
@@ -171,7 +171,8 @@ async function callVisionApi(
|
|||||||
endpoint: string,
|
endpoint: string,
|
||||||
model: string,
|
model: string,
|
||||||
base64Images: string[],
|
base64Images: string[],
|
||||||
systemPrompt: string
|
systemPrompt: string,
|
||||||
|
maxTokens: number,
|
||||||
): Promise<string[]> {
|
): Promise<string[]> {
|
||||||
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
|
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
|
||||||
|
|
||||||
@@ -195,7 +196,7 @@ async function callVisionApi(
|
|||||||
})),
|
})),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
max_tokens: 8192,
|
max_tokens: maxTokens,
|
||||||
temperature: 0.1,
|
temperature: 0.1,
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
@@ -338,7 +339,7 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
|
|||||||
customInstruction: config.promptTagger || undefined,
|
customInstruction: config.promptTagger || undefined,
|
||||||
})
|
})
|
||||||
|
|
||||||
const suggestedIds = await callVisionApi(config.endpoint, taggingModel, base64Images, systemPromptWithContext)
|
const suggestedIds = await callVisionApi(config.endpoint, taggingModel, base64Images, systemPromptWithContext, config.maxTokensTag)
|
||||||
const validIds = suggestedIds.filter((id) => validTagIds.has(id))
|
const validIds = suggestedIds.filter((id) => validTagIds.has(id))
|
||||||
|
|
||||||
for (const tagId of validIds) {
|
for (const tagId of validIds) {
|
||||||
@@ -359,7 +360,8 @@ async function callVisionApiText(
|
|||||||
endpoint: string,
|
endpoint: string,
|
||||||
model: string,
|
model: string,
|
||||||
base64Images: string[],
|
base64Images: string[],
|
||||||
systemPrompt: string
|
systemPrompt: string,
|
||||||
|
maxTokens: number,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
|
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
|
||||||
|
|
||||||
@@ -383,7 +385,7 @@ async function callVisionApiText(
|
|||||||
})),
|
})),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
max_tokens: 8192,
|
max_tokens: maxTokens,
|
||||||
temperature: 0.1,
|
temperature: 0.1,
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
@@ -410,7 +412,8 @@ async function callChatApiText(
|
|||||||
endpoint: string,
|
endpoint: string,
|
||||||
model: string,
|
model: string,
|
||||||
systemPrompt: string,
|
systemPrompt: string,
|
||||||
userMessage: string
|
userMessage: string,
|
||||||
|
maxTokens: number,
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
|
const url = endpoint.replace(/\/+$/, '') + '/chat/completions'
|
||||||
|
|
||||||
@@ -428,7 +431,7 @@ async function callChatApiText(
|
|||||||
{ role: 'system', content: systemPrompt },
|
{ role: 'system', content: systemPrompt },
|
||||||
{ role: 'user', content: userMessage },
|
{ role: 'user', content: userMessage },
|
||||||
],
|
],
|
||||||
max_tokens: 8192,
|
max_tokens: maxTokens,
|
||||||
temperature: 0.1,
|
temperature: 0.1,
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
@@ -496,7 +499,7 @@ export async function generateItemDescription(itemKey: string): Promise<string>
|
|||||||
: ''
|
: ''
|
||||||
const systemPrompt = `You are a media cataloging assistant. Describe the given image briefly and objectively in 1-3 sentences.${config.promptDescribe ? ' ' + config.promptDescribe : ''}${tagContext}`
|
const systemPrompt = `You are a media cataloging assistant. Describe the given image briefly and objectively in 1-3 sentences.${config.promptDescribe ? ' ' + config.promptDescribe : ''}${tagContext}`
|
||||||
|
|
||||||
const description = await callVisionApiText(config.endpoint, describeModel, base64Images, systemPrompt)
|
const description = await callVisionApiText(config.endpoint, describeModel, base64Images, systemPrompt, config.maxTokensDescribe)
|
||||||
|
|
||||||
db.prepare('UPDATE media_items SET ai_description = ? WHERE item_key = ?').run(description, itemKey)
|
db.prepare('UPDATE media_items SET ai_description = ? WHERE item_key = ?').run(description, itemKey)
|
||||||
|
|
||||||
@@ -506,36 +509,38 @@ export async function generateItemDescription(itemKey: string): Promise<string>
|
|||||||
// ─── Text extraction ─────────────────────────────────────────────────────────
|
// ─── Text extraction ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract text (OCR) from an image using the vision model.
|
* Run Tesseract OCR on a preprocessed image file.
|
||||||
* Only works for images in mixed libraries.
|
* Returns the extracted text and a mean confidence score (0–100).
|
||||||
* If the extracted text is not in the user's preferred language, auto-translates it.
|
* A confidence of 0 with empty text means no recognisable text was found.
|
||||||
* Returns { extractedText, translatedText }.
|
|
||||||
*/
|
*/
|
||||||
/**
|
async function extractWithTesseract(
|
||||||
* Parse a structured extraction response from the AI.
|
imagePath: string,
|
||||||
* Returns null if the response cannot be parsed as valid JSON with the expected shape.
|
languages: string,
|
||||||
*/
|
): Promise<{ text: string; confidence: number }> {
|
||||||
function parseStructuredExtraction(raw: string): { text: string; needsTranslation: boolean } | null {
|
const { createWorker } = await import('tesseract.js')
|
||||||
const jsonMatch = raw.match(/\{[\s\S]*\}/)
|
const workerPath = path.join(process.cwd(), 'node_modules/tesseract.js/src/worker-script/node/index.js')
|
||||||
if (!jsonMatch) return null
|
const worker = await createWorker(languages, 1, { workerPath })
|
||||||
try {
|
try {
|
||||||
const parsed = JSON.parse(jsonMatch[0])
|
const { data } = await worker.recognize(imagePath)
|
||||||
if (typeof parsed.text === 'string' && typeof parsed.needsTranslation === 'boolean') {
|
return { text: data.text.trim(), confidence: data.confidence }
|
||||||
return { text: parsed.text, needsTranslation: parsed.needsTranslation }
|
} finally {
|
||||||
}
|
await worker.terminate()
|
||||||
} catch {
|
|
||||||
// fall through
|
|
||||||
}
|
}
|
||||||
return null
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract text (OCR) from an image using the configured OCR mode:
|
||||||
|
* - hybrid: try Tesseract first; fall back to LLM if confidence is below threshold
|
||||||
|
* - tesseract: local Tesseract only, no LLM call
|
||||||
|
* - llm: LLM vision API only (original behaviour)
|
||||||
|
*
|
||||||
|
* Only works for images in mixed libraries.
|
||||||
|
* Translation is not performed automatically — call translateItemText() separately.
|
||||||
|
* Returns { extractedText, translatedText } where translatedText is always null.
|
||||||
|
*/
|
||||||
export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
|
export async function extractItemText(itemKey: string): Promise<{ extractedText: string; translatedText: string | null }> {
|
||||||
const libraryId = itemKey.split(':')[0]
|
const libraryId = itemKey.split(':')[0]
|
||||||
const config = getEffectiveAiConfig(libraryId)
|
const config = getEffectiveAiConfig(libraryId)
|
||||||
const extractModel = config.modelExtract || config.model
|
|
||||||
if (!config.endpoint || !extractModel) {
|
|
||||||
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
|
|
||||||
}
|
|
||||||
|
|
||||||
const db = getDb()
|
const db = getDb()
|
||||||
const item = db
|
const item = db
|
||||||
@@ -562,72 +567,49 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
|
|||||||
throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
|
throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const { ocrMode, ocrLanguages, ocrConfidenceThreshold } = config
|
||||||
|
|
||||||
|
// ── Tesseract path ────────────────────────────────────────────────────────
|
||||||
|
if (ocrMode === 'tesseract' || ocrMode === 'hybrid') {
|
||||||
|
const ocrImagePath = await getOcrImagePath(resolvedMedia.path, libraryId)
|
||||||
|
const { text, confidence } = await extractWithTesseract(ocrImagePath, ocrLanguages)
|
||||||
|
|
||||||
|
const useTesseractResult = ocrMode === 'tesseract' || confidence >= ocrConfidenceThreshold
|
||||||
|
if (useTesseractResult) {
|
||||||
|
console.log(`[ocr] tesseract used for ${itemKey} (confidence=${confidence}, mode=${ocrMode})`)
|
||||||
|
if (!text) {
|
||||||
|
db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
|
||||||
|
return { extractedText: '', translatedText: null }
|
||||||
|
}
|
||||||
|
db.prepare('UPDATE media_items SET extracted_text = ?, extracted_text_translated = NULL WHERE item_key = ?').run(text, itemKey)
|
||||||
|
return { extractedText: text, translatedText: null }
|
||||||
|
}
|
||||||
|
console.log(`[ocr] tesseract confidence too low (${confidence} < ${ocrConfidenceThreshold}), falling back to LLM for ${itemKey}`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── LLM vision path ───────────────────────────────────────────────────────
|
||||||
|
const extractModel = config.modelExtract || config.model
|
||||||
|
if (!config.endpoint || !extractModel) {
|
||||||
|
throw Object.assign(new Error('AI endpoint and model are not configured'), { code: 'NOT_CONFIGURED' })
|
||||||
|
}
|
||||||
|
|
||||||
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
|
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
|
||||||
const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
||||||
|
|
||||||
const preferredLanguage = getPreferredLanguage()
|
|
||||||
const customInstruction = config.promptExtract ? ' ' + config.promptExtract : ''
|
const customInstruction = config.promptExtract ? ' ' + config.promptExtract : ''
|
||||||
|
const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction} If there is no text in the image, respond with exactly: [NO TEXT]`
|
||||||
|
|
||||||
// When a preferred language is configured, ask the AI to also flag whether translation is needed.
|
console.log(`[ocr] llm used for ${itemKey} (mode=${ocrMode})`)
|
||||||
// This avoids a separate translation API call for text already in the target language.
|
const extractedText = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt, config.maxTokensExtract)
|
||||||
let systemPrompt: string
|
|
||||||
if (preferredLanguage) {
|
|
||||||
systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction}
|
|
||||||
|
|
||||||
Respond ONLY with a valid JSON object — no markdown, no explanation:
|
|
||||||
{"needsTranslation": boolean, "text": "extracted text"}
|
|
||||||
|
|
||||||
Rules:
|
|
||||||
- Set needsTranslation to true if the text is NOT already written in ${preferredLanguage}.
|
|
||||||
- Set needsTranslation to false if the text IS in ${preferredLanguage}, or if there is no text.
|
|
||||||
- If there is no text in the image, use exactly: {"needsTranslation": false, "text": "[NO TEXT]"}`
|
|
||||||
} else {
|
|
||||||
systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${customInstruction} If there is no text in the image, respond with exactly: [NO TEXT]`
|
|
||||||
}
|
|
||||||
|
|
||||||
const rawResponse = await callVisionApiText(config.endpoint, extractModel, base64Images, systemPrompt)
|
|
||||||
|
|
||||||
// Parse the response — structured JSON when a preferred language is set, plain text otherwise
|
|
||||||
let extractedText: string
|
|
||||||
let needsTranslation: boolean
|
|
||||||
|
|
||||||
if (preferredLanguage) {
|
|
||||||
const parsed = parseStructuredExtraction(rawResponse)
|
|
||||||
if (parsed) {
|
|
||||||
extractedText = parsed.text
|
|
||||||
needsTranslation = parsed.needsTranslation
|
|
||||||
} else {
|
|
||||||
// Malformed JSON fallback: treat raw response as plain text and attempt translation
|
|
||||||
extractedText = rawResponse
|
|
||||||
needsTranslation = true
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
extractedText = rawResponse
|
|
||||||
needsTranslation = false
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!extractedText || extractedText === '[NO TEXT]') {
|
if (!extractedText || extractedText === '[NO TEXT]') {
|
||||||
db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
|
db.prepare('UPDATE media_items SET extracted_text = NULL, extracted_text_translated = NULL WHERE item_key = ?').run(itemKey)
|
||||||
return { extractedText: '', translatedText: null }
|
return { extractedText: '', translatedText: null }
|
||||||
}
|
}
|
||||||
|
|
||||||
db.prepare('UPDATE media_items SET extracted_text = ? WHERE item_key = ?').run(extractedText, itemKey)
|
db.prepare('UPDATE media_items SET extracted_text = ?, extracted_text_translated = NULL WHERE item_key = ?').run(extractedText, itemKey)
|
||||||
|
|
||||||
// Only translate if the extraction step determined the text is not already in the preferred language
|
return { extractedText, translatedText: null }
|
||||||
let translatedText: string | null = null
|
|
||||||
if (preferredLanguage && needsTranslation) {
|
|
||||||
const translateModel = config.modelTranslate || config.model
|
|
||||||
try {
|
|
||||||
translatedText = await translateText(config.endpoint, translateModel, extractedText, preferredLanguage, config.promptTranslate)
|
|
||||||
if (translatedText) {
|
|
||||||
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.warn(`[ai-tagger] Translation failed for "${itemKey}":`, err instanceof Error ? err.message : err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return { extractedText, translatedText }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -656,7 +638,7 @@ export async function translateItemText(itemKey: string, sourceLanguage?: string
|
|||||||
const preferredLanguage = getPreferredLanguage()
|
const preferredLanguage = getPreferredLanguage()
|
||||||
if (!preferredLanguage) return null
|
if (!preferredLanguage) return null
|
||||||
|
|
||||||
const translatedText = await translateText(config.endpoint, translateModel, row.extracted_text, preferredLanguage, config.promptTranslate, sourceLanguage)
|
const translatedText = await translateText(config.endpoint, translateModel, row.extracted_text, preferredLanguage, config.promptTranslate, config.maxTokensTranslate, sourceLanguage)
|
||||||
if (translatedText) {
|
if (translatedText) {
|
||||||
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
|
db.prepare('UPDATE media_items SET extracted_text_translated = ? WHERE item_key = ?').run(translatedText, itemKey)
|
||||||
}
|
}
|
||||||
@@ -682,6 +664,7 @@ async function translateText(
|
|||||||
text: string,
|
text: string,
|
||||||
targetLanguage: string,
|
targetLanguage: string,
|
||||||
customInstruction = '',
|
customInstruction = '',
|
||||||
|
maxTokens = 8192,
|
||||||
sourceLanguage?: string,
|
sourceLanguage?: string,
|
||||||
): Promise<string | null> {
|
): Promise<string | null> {
|
||||||
let systemPrompt: string
|
let systemPrompt: string
|
||||||
@@ -691,7 +674,7 @@ async function translateText(
|
|||||||
systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}`
|
systemPrompt = `You are a translator. Determine if the following text is already in ${targetLanguage}. If it is, respond with exactly: [ALREADY_TARGET_LANGUAGE]. If it is not, translate it to ${targetLanguage}.${customInstruction ? ' ' + customInstruction : ''}`
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await callChatApiText(endpoint, model, systemPrompt, text)
|
const result = await callChatApiText(endpoint, model, systemPrompt, text, maxTokens)
|
||||||
|
|
||||||
if (!sourceLanguage && (result === '[ALREADY_TARGET_LANGUAGE]' || !result)) {
|
if (!sourceLanguage && (result === '[ALREADY_TARGET_LANGUAGE]' || !result)) {
|
||||||
return null
|
return null
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ const DEFAULT_PROMPT_EXTRACT =
|
|||||||
'Be mindful of different colors of text that may indicate different speakers or emphasis.'
|
'Be mindful of different colors of text that may indicate different speakers or emphasis.'
|
||||||
const DEFAULT_PROMPT_TRANSLATE = 'Return ONLY the translated text with no additional commentary.'
|
const DEFAULT_PROMPT_TRANSLATE = 'Return ONLY the translated text with no additional commentary.'
|
||||||
|
|
||||||
|
export type OcrMode = 'hybrid' | 'tesseract' | 'llm'
|
||||||
|
|
||||||
export interface AiConfig {
|
export interface AiConfig {
|
||||||
endpoint: string
|
endpoint: string
|
||||||
model: string
|
model: string
|
||||||
@@ -58,6 +60,13 @@ export interface AiConfig {
|
|||||||
promptTagger: string
|
promptTagger: string
|
||||||
promptExtract: string
|
promptExtract: string
|
||||||
promptTranslate: string
|
promptTranslate: string
|
||||||
|
maxTokensTag: number
|
||||||
|
maxTokensDescribe: number
|
||||||
|
maxTokensExtract: number
|
||||||
|
maxTokensTranslate: number
|
||||||
|
ocrMode: OcrMode
|
||||||
|
ocrLanguages: string
|
||||||
|
ocrConfidenceThreshold: number
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getAiConfig(): AiConfig {
|
export function getAiConfig(): AiConfig {
|
||||||
@@ -76,9 +85,19 @@ export function getAiConfig(): AiConfig {
|
|||||||
const promptExtract = promptExtractRaw !== null ? promptExtractRaw : DEFAULT_PROMPT_EXTRACT
|
const promptExtract = promptExtractRaw !== null ? promptExtractRaw : DEFAULT_PROMPT_EXTRACT
|
||||||
const promptTranslateRaw = getSetting('ai_prompt_translate')
|
const promptTranslateRaw = getSetting('ai_prompt_translate')
|
||||||
const promptTranslate = promptTranslateRaw !== null ? promptTranslateRaw : DEFAULT_PROMPT_TRANSLATE
|
const promptTranslate = promptTranslateRaw !== null ? promptTranslateRaw : DEFAULT_PROMPT_TRANSLATE
|
||||||
|
const maxTokensTag = parseInt(getSetting('ai_max_tokens_tag') ?? '8192', 10) || 8192
|
||||||
|
const maxTokensDescribe = parseInt(getSetting('ai_max_tokens_describe') ?? '8192', 10) || 8192
|
||||||
|
const maxTokensExtract = parseInt(getSetting('ai_max_tokens_extract') ?? '8192', 10) || 8192
|
||||||
|
const maxTokensTranslate = parseInt(getSetting('ai_max_tokens_translate') ?? '8192', 10) || 8192
|
||||||
|
const rawOcrMode = getSetting('ai_ocr_mode') ?? 'hybrid'
|
||||||
|
const ocrMode: OcrMode = rawOcrMode === 'tesseract' || rawOcrMode === 'llm' ? rawOcrMode : 'hybrid'
|
||||||
|
const ocrLanguages = getSetting('ai_ocr_languages') ?? 'eng'
|
||||||
|
const ocrConfidenceThreshold = parseInt(getSetting('ai_ocr_confidence_threshold') ?? '70', 10) || 70
|
||||||
return {
|
return {
|
||||||
endpoint, model, modelTagging, modelDescribe, modelExtract, modelTranslate, enabled,
|
endpoint, model, modelTagging, modelDescribe, modelExtract, modelTranslate, enabled,
|
||||||
promptDescribe, promptTagger, promptExtract, promptTranslate,
|
promptDescribe, promptTagger, promptExtract, promptTranslate,
|
||||||
|
maxTokensTag, maxTokensDescribe, maxTokensExtract, maxTokensTranslate,
|
||||||
|
ocrMode, ocrLanguages, ocrConfidenceThreshold,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,6 +113,13 @@ export function updateAiConfig(
|
|||||||
promptTagger?: string,
|
promptTagger?: string,
|
||||||
promptExtract?: string,
|
promptExtract?: string,
|
||||||
promptTranslate?: string,
|
promptTranslate?: string,
|
||||||
|
maxTokensTag?: number,
|
||||||
|
maxTokensDescribe?: number,
|
||||||
|
maxTokensExtract?: number,
|
||||||
|
maxTokensTranslate?: number,
|
||||||
|
ocrMode?: OcrMode,
|
||||||
|
ocrLanguages?: string,
|
||||||
|
ocrConfidenceThreshold?: number,
|
||||||
): void {
|
): void {
|
||||||
setSetting('ai_endpoint', endpoint)
|
setSetting('ai_endpoint', endpoint)
|
||||||
setSetting('ai_model', model)
|
setSetting('ai_model', model)
|
||||||
@@ -106,6 +132,13 @@ export function updateAiConfig(
|
|||||||
if (promptTagger !== undefined) setSetting('ai_prompt_tagger', promptTagger)
|
if (promptTagger !== undefined) setSetting('ai_prompt_tagger', promptTagger)
|
||||||
if (promptExtract !== undefined) setSetting('ai_prompt_extract', promptExtract)
|
if (promptExtract !== undefined) setSetting('ai_prompt_extract', promptExtract)
|
||||||
if (promptTranslate !== undefined) setSetting('ai_prompt_translate', promptTranslate)
|
if (promptTranslate !== undefined) setSetting('ai_prompt_translate', promptTranslate)
|
||||||
|
if (maxTokensTag !== undefined) setSetting('ai_max_tokens_tag', String(Math.max(1, Math.floor(maxTokensTag))))
|
||||||
|
if (maxTokensDescribe !== undefined) setSetting('ai_max_tokens_describe', String(Math.max(1, Math.floor(maxTokensDescribe))))
|
||||||
|
if (maxTokensExtract !== undefined) setSetting('ai_max_tokens_extract', String(Math.max(1, Math.floor(maxTokensExtract))))
|
||||||
|
if (maxTokensTranslate !== undefined) setSetting('ai_max_tokens_translate', String(Math.max(1, Math.floor(maxTokensTranslate))))
|
||||||
|
if (ocrMode !== undefined) setSetting('ai_ocr_mode', ocrMode)
|
||||||
|
if (ocrLanguages !== undefined) setSetting('ai_ocr_languages', ocrLanguages.trim() || 'eng')
|
||||||
|
if (ocrConfidenceThreshold !== undefined) setSetting('ai_ocr_confidence_threshold', String(Math.max(0, Math.min(100, Math.floor(ocrConfidenceThreshold)))))
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getPreferredLanguage(): string {
|
export function getPreferredLanguage(): string {
|
||||||
@@ -127,6 +160,10 @@ export interface LibraryAiOverrides {
|
|||||||
promptTagger: string
|
promptTagger: string
|
||||||
promptExtract: string
|
promptExtract: string
|
||||||
promptTranslate: string
|
promptTranslate: string
|
||||||
|
maxTokensTag: number | null
|
||||||
|
maxTokensDescribe: number | null
|
||||||
|
maxTokensExtract: number | null
|
||||||
|
maxTokensTranslate: number | null
|
||||||
}
|
}
|
||||||
|
|
||||||
interface LibraryAiSettingsRow {
|
interface LibraryAiSettingsRow {
|
||||||
@@ -138,6 +175,10 @@ interface LibraryAiSettingsRow {
|
|||||||
prompt_tagger: string | null
|
prompt_tagger: string | null
|
||||||
prompt_extract: string | null
|
prompt_extract: string | null
|
||||||
prompt_translate: string | null
|
prompt_translate: string | null
|
||||||
|
max_tokens_tag: number | null
|
||||||
|
max_tokens_describe: number | null
|
||||||
|
max_tokens_extract: number | null
|
||||||
|
max_tokens_translate: number | null
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getLibraryAiOverrides(libraryId: string): LibraryAiOverrides {
|
export function getLibraryAiOverrides(libraryId: string): LibraryAiOverrides {
|
||||||
@@ -154,6 +195,10 @@ export function getLibraryAiOverrides(libraryId: string): LibraryAiOverrides {
|
|||||||
promptTagger: row?.prompt_tagger ?? '',
|
promptTagger: row?.prompt_tagger ?? '',
|
||||||
promptExtract: row?.prompt_extract ?? '',
|
promptExtract: row?.prompt_extract ?? '',
|
||||||
promptTranslate: row?.prompt_translate ?? '',
|
promptTranslate: row?.prompt_translate ?? '',
|
||||||
|
maxTokensTag: row?.max_tokens_tag ?? null,
|
||||||
|
maxTokensDescribe: row?.max_tokens_describe ?? null,
|
||||||
|
maxTokensExtract: row?.max_tokens_extract ?? null,
|
||||||
|
maxTokensTranslate: row?.max_tokens_translate ?? null,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,7 +209,7 @@ export function setLibraryAiOverrides(libraryId: string, overrides: Partial<Libr
|
|||||||
'INSERT OR IGNORE INTO library_ai_settings (library_id) VALUES (?)'
|
'INSERT OR IGNORE INTO library_ai_settings (library_id) VALUES (?)'
|
||||||
).run(libraryId)
|
).run(libraryId)
|
||||||
|
|
||||||
const fields: Record<string, string | undefined> = {
|
const stringFields: Record<string, string | undefined> = {
|
||||||
model_tagging: overrides.modelTagging,
|
model_tagging: overrides.modelTagging,
|
||||||
model_describe: overrides.modelDescribe,
|
model_describe: overrides.modelDescribe,
|
||||||
model_extract: overrides.modelExtract,
|
model_extract: overrides.modelExtract,
|
||||||
@@ -175,7 +220,7 @@ export function setLibraryAiOverrides(libraryId: string, overrides: Partial<Libr
|
|||||||
prompt_translate: overrides.promptTranslate,
|
prompt_translate: overrides.promptTranslate,
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const [col, val] of Object.entries(fields)) {
|
for (const [col, val] of Object.entries(stringFields)) {
|
||||||
if (val !== undefined) {
|
if (val !== undefined) {
|
||||||
db.prepare(`UPDATE library_ai_settings SET ${col} = ? WHERE library_id = ?`).run(
|
db.prepare(`UPDATE library_ai_settings SET ${col} = ? WHERE library_id = ?`).run(
|
||||||
val === '' ? null : val,
|
val === '' ? null : val,
|
||||||
@@ -183,6 +228,22 @@ export function setLibraryAiOverrides(libraryId: string, overrides: Partial<Libr
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const numberFields: Record<string, number | null | undefined> = {
|
||||||
|
max_tokens_tag: overrides.maxTokensTag,
|
||||||
|
max_tokens_describe: overrides.maxTokensDescribe,
|
||||||
|
max_tokens_extract: overrides.maxTokensExtract,
|
||||||
|
max_tokens_translate: overrides.maxTokensTranslate,
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [col, val] of Object.entries(numberFields)) {
|
||||||
|
if (val !== undefined) {
|
||||||
|
db.prepare(`UPDATE library_ai_settings SET ${col} = ? WHERE library_id = ?`).run(
|
||||||
|
val === null ? null : Math.max(1, Math.floor(val)),
|
||||||
|
libraryId,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getEffectiveAiConfig(libraryId: string): AiConfig {
|
export function getEffectiveAiConfig(libraryId: string): AiConfig {
|
||||||
@@ -200,6 +261,13 @@ export function getEffectiveAiConfig(libraryId: string): AiConfig {
|
|||||||
promptTagger: overrides.promptTagger || global.promptTagger,
|
promptTagger: overrides.promptTagger || global.promptTagger,
|
||||||
promptExtract: overrides.promptExtract || global.promptExtract,
|
promptExtract: overrides.promptExtract || global.promptExtract,
|
||||||
promptTranslate: overrides.promptTranslate || global.promptTranslate,
|
promptTranslate: overrides.promptTranslate || global.promptTranslate,
|
||||||
|
maxTokensTag: overrides.maxTokensTag ?? global.maxTokensTag,
|
||||||
|
maxTokensDescribe: overrides.maxTokensDescribe ?? global.maxTokensDescribe,
|
||||||
|
maxTokensExtract: overrides.maxTokensExtract ?? global.maxTokensExtract,
|
||||||
|
maxTokensTranslate: overrides.maxTokensTranslate ?? global.maxTokensTranslate,
|
||||||
|
ocrMode: global.ocrMode,
|
||||||
|
ocrLanguages: global.ocrLanguages,
|
||||||
|
ocrConfidenceThreshold: global.ocrConfidenceThreshold,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -119,6 +119,10 @@ function seedAppSettings(db: Database.Database): void {
|
|||||||
ai_model: '',
|
ai_model: '',
|
||||||
preferred_language: 'English',
|
preferred_language: 'English',
|
||||||
ai_max_retries: '3',
|
ai_max_retries: '3',
|
||||||
|
ai_max_tokens_tag: '8192',
|
||||||
|
ai_max_tokens_describe: '8192',
|
||||||
|
ai_max_tokens_extract: '8192',
|
||||||
|
ai_max_tokens_translate: '8192',
|
||||||
}
|
}
|
||||||
const insert = db.prepare(
|
const insert = db.prepare(
|
||||||
'INSERT OR IGNORE INTO app_settings (key, value) VALUES (?, ?)'
|
'INSERT OR IGNORE INTO app_settings (key, value) VALUES (?, ?)'
|
||||||
@@ -276,6 +280,19 @@ function migrateLibraryAiSettings(db: Database.Database): void {
|
|||||||
prompt_translate TEXT
|
prompt_translate TEXT
|
||||||
);
|
);
|
||||||
`)
|
`)
|
||||||
|
|
||||||
|
// Add max_tokens columns if they don't exist yet
|
||||||
|
const row = db
|
||||||
|
.prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='library_ai_settings'")
|
||||||
|
.get() as { sql: string } | undefined
|
||||||
|
if (row && !row.sql.includes('max_tokens_tag')) {
|
||||||
|
db.exec(`
|
||||||
|
ALTER TABLE library_ai_settings ADD COLUMN max_tokens_tag INTEGER;
|
||||||
|
ALTER TABLE library_ai_settings ADD COLUMN max_tokens_describe INTEGER;
|
||||||
|
ALTER TABLE library_ai_settings ADD COLUMN max_tokens_extract INTEGER;
|
||||||
|
ALTER TABLE library_ai_settings ADD COLUMN max_tokens_translate INTEGER;
|
||||||
|
`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function migrateLibrariesType(db: Database.Database): void {
|
function migrateLibrariesType(db: Database.Database): void {
|
||||||
|
|||||||
@@ -60,6 +60,19 @@ async function generateAiImage(src: string, dest: string): Promise<void> {
|
|||||||
fs.renameSync(tmp, dest)
|
fs.renameSync(tmp, dest)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Generate a grayscale, contrast-normalised PNG for local OCR (Tesseract).
|
||||||
|
* PNG is lossless and avoids JPEG artefacts that can degrade OCR accuracy. */
|
||||||
|
async function generateOcrImage(src: string, dest: string): Promise<void> {
|
||||||
|
const tmp = dest + '.tmp'
|
||||||
|
await sharp(src)
|
||||||
|
.resize(AI_IMAGE_WIDTH, undefined, { withoutEnlargement: true })
|
||||||
|
.grayscale()
|
||||||
|
.normalise()
|
||||||
|
.png()
|
||||||
|
.toFile(tmp)
|
||||||
|
fs.renameSync(tmp, dest)
|
||||||
|
}
|
||||||
|
|
||||||
/** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */
|
/** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */
|
||||||
function run(bin: string, args: string[]): Promise<void> {
|
function run(bin: string, args: string[]): Promise<void> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
@@ -190,6 +203,24 @@ export async function getAiImagePath(
|
|||||||
return cacheFile
|
return cacheFile
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the absolute path to a preprocessed PNG suitable for local OCR.
|
||||||
|
* The image is converted to grayscale and contrast-normalised for better
|
||||||
|
* Tesseract accuracy. Cached with an `_ocr` suffix.
|
||||||
|
*/
|
||||||
|
export async function getOcrImagePath(
|
||||||
|
absoluteFilePath: string,
|
||||||
|
libraryId: string
|
||||||
|
): Promise<string> {
|
||||||
|
ensureCacheDir()
|
||||||
|
const key = cacheKey(libraryId, absoluteFilePath)
|
||||||
|
const cacheFile = path.join(CACHE_DIR, key + '_ocr.png')
|
||||||
|
const cached = getCachedPath(cacheFile, absoluteFilePath)
|
||||||
|
if (cached) return cached
|
||||||
|
await generateOcrImage(absoluteFilePath, cacheFile)
|
||||||
|
return cacheFile
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the absolute path to a cached thumbnail JPEG for the given file.
|
* Returns the absolute path to a cached thumbnail JPEG for the given file.
|
||||||
* Generates it on first call (or when the source has been modified).
|
* Generates it on first call (or when the source has been modified).
|
||||||
|
|||||||
Reference in New Issue
Block a user