send higher resolution images to AI vision endpoints
Add getAiImagePath() to thumbnails.ts (1920px wide, quality 90, no upscaling) cached separately from display thumbnails via an _ai suffix. Swap all four image-to-AI code paths in ai-tagger.ts (extract text, describe, batch tagging x2) to use the new high-res image instead of the 400px display thumbnail, improving OCR accuracy on dense text. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,7 @@ import type { Library, Tag, TagCategory } from '@/types'
|
||||
import { getDb } from './db'
|
||||
import { getAiConfig, getEffectiveAiConfig, getPreferredLanguage } from './app-settings'
|
||||
import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags'
|
||||
import { getThumbnailPath, getVideoFramePaths } from './thumbnails'
|
||||
import { getAiImagePath, getVideoFramePaths } from './thumbnails'
|
||||
import { findFile } from './media-utils'
|
||||
import { getLibrary, resolveLibraryRoot } from './libraries'
|
||||
|
||||
@@ -279,7 +279,7 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
|
||||
const framePaths = await getVideoFramePaths(resolvedMedia.path, library.id, VIDEO_FRAME_PERCENTAGES)
|
||||
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
|
||||
} else {
|
||||
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, library.id, 'image')
|
||||
const thumbnailPath = await getAiImagePath(resolvedMedia.path, library.id)
|
||||
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
||||
}
|
||||
|
||||
@@ -367,7 +367,7 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
|
||||
const framePaths = await getVideoFramePaths(imagePath.path, libraryId, VIDEO_FRAME_PERCENTAGES)
|
||||
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
|
||||
} else {
|
||||
const thumbnailPath = await getThumbnailPath(imagePath.path, libraryId, 'image')
|
||||
const thumbnailPath = await getAiImagePath(imagePath.path, libraryId)
|
||||
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
||||
}
|
||||
|
||||
@@ -529,7 +529,7 @@ export async function generateItemDescription(itemKey: string): Promise<string>
|
||||
const framePaths = await getVideoFramePaths(resolvedMedia.path, libraryId, VIDEO_FRAME_PERCENTAGES)
|
||||
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
|
||||
} else {
|
||||
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
|
||||
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
|
||||
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
||||
}
|
||||
|
||||
@@ -587,7 +587,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
|
||||
throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
|
||||
}
|
||||
|
||||
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
|
||||
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
|
||||
const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
|
||||
|
||||
const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${config.promptExtract ? ' ' + config.promptExtract : ''} If there is no text in the image, respond with exactly: [NO TEXT]`
|
||||
|
||||
@@ -7,6 +7,8 @@ import sharp from 'sharp'
|
||||
const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
|
||||
const THUMBNAIL_WIDTH = 400
|
||||
const JPEG_QUALITY = 75
|
||||
const AI_IMAGE_WIDTH = 1920
|
||||
const AI_JPEG_QUALITY = 90
|
||||
|
||||
/** Ensure the cache directory exists. */
|
||||
function ensureCacheDir(): void {
|
||||
@@ -47,6 +49,17 @@ async function generateImageThumbnail(src: string, dest: string): Promise<void>
|
||||
fs.renameSync(tmp, dest)
|
||||
}
|
||||
|
||||
/** Generate a high-resolution JPEG for AI vision use. Images smaller than
|
||||
* AI_IMAGE_WIDTH are not upscaled — they are converted at their native size. */
|
||||
async function generateAiImage(src: string, dest: string): Promise<void> {
|
||||
const tmp = dest + '.tmp'
|
||||
await sharp(src)
|
||||
.resize(AI_IMAGE_WIDTH, undefined, { withoutEnlargement: true })
|
||||
.jpeg({ quality: AI_JPEG_QUALITY })
|
||||
.toFile(tmp)
|
||||
fs.renameSync(tmp, dest)
|
||||
}
|
||||
|
||||
/** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */
|
||||
function run(bin: string, args: string[]): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
@@ -158,6 +171,25 @@ export async function getVideoFramePaths(
|
||||
return framePaths
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the absolute path to a high-resolution JPEG suitable for AI vision
|
||||
* APIs (1920px wide max, quality 90). Cached alongside display thumbnails with
|
||||
* an `_ai` suffix so display performance is unaffected.
|
||||
* Generates on first call or when the source file has been modified.
|
||||
*/
|
||||
export async function getAiImagePath(
|
||||
absoluteFilePath: string,
|
||||
libraryId: string
|
||||
): Promise<string> {
|
||||
ensureCacheDir()
|
||||
const key = cacheKey(libraryId, absoluteFilePath)
|
||||
const cacheFile = path.join(CACHE_DIR, key + '_ai.jpg')
|
||||
const cached = getCachedPath(cacheFile, absoluteFilePath)
|
||||
if (cached) return cached
|
||||
await generateAiImage(absoluteFilePath, cacheFile)
|
||||
return cacheFile
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the absolute path to a cached thumbnail JPEG for the given file.
|
||||
* Generates it on first call (or when the source has been modified).
|
||||
|
||||
Reference in New Issue
Block a user