From cd9a83ea90f18bdd2de1f3b3a7e7ec8b0da89b43 Mon Sep 17 00:00:00 2001 From: Garret Patti <42485635+garretpatti@users.noreply.github.com> Date: Mon, 13 Apr 2026 09:08:43 -0400 Subject: [PATCH] send higher resolution images to AI vision endpoints Add getAiImagePath() to thumbnails.ts (1920px wide, quality 90, no upscaling) cached separately from display thumbnails via an _ai suffix. Swap all four image-to-AI code paths in ai-tagger.ts (extract text, describe, batch tagging x2) to use the new high-res image instead of the 400px display thumbnail, improving OCR accuracy on dense text. Co-Authored-By: Claude Sonnet 4.6 --- src/lib/ai-tagger.ts | 10 +++++----- src/lib/thumbnails.ts | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/lib/ai-tagger.ts b/src/lib/ai-tagger.ts index fc04423..da2b951 100644 --- a/src/lib/ai-tagger.ts +++ b/src/lib/ai-tagger.ts @@ -4,7 +4,7 @@ import type { Library, Tag, TagCategory } from '@/types' import { getDb } from './db' import { getAiConfig, getEffectiveAiConfig, getPreferredLanguage } from './app-settings' import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags' -import { getThumbnailPath, getVideoFramePaths } from './thumbnails' +import { getAiImagePath, getVideoFramePaths } from './thumbnails' import { findFile } from './media-utils' import { getLibrary, resolveLibraryRoot } from './libraries' @@ -279,7 +279,7 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi const framePaths = await getVideoFramePaths(resolvedMedia.path, library.id, VIDEO_FRAME_PERCENTAGES) base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64')) } else { - const thumbnailPath = await getThumbnailPath(resolvedMedia.path, library.id, 'image') + const thumbnailPath = await getAiImagePath(resolvedMedia.path, library.id) base64Images = [fs.readFileSync(thumbnailPath, 'base64')] } @@ -367,7 +367,7 @@ export async function tagSingleItem(itemKey: string): Promise { const framePaths = await getVideoFramePaths(imagePath.path, libraryId, VIDEO_FRAME_PERCENTAGES) base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64')) } else { - const thumbnailPath = await getThumbnailPath(imagePath.path, libraryId, 'image') + const thumbnailPath = await getAiImagePath(imagePath.path, libraryId) base64Images = [fs.readFileSync(thumbnailPath, 'base64')] } @@ -529,7 +529,7 @@ export async function generateItemDescription(itemKey: string): Promise const framePaths = await getVideoFramePaths(resolvedMedia.path, libraryId, VIDEO_FRAME_PERCENTAGES) base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64')) } else { - const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image') + const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId) base64Images = [fs.readFileSync(thumbnailPath, 'base64')] } @@ -587,7 +587,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText: throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' }) } - const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image') + const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId) const base64Images = [fs.readFileSync(thumbnailPath, 'base64')] const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${config.promptExtract ? ' ' + config.promptExtract : ''} If there is no text in the image, respond with exactly: [NO TEXT]` diff --git a/src/lib/thumbnails.ts b/src/lib/thumbnails.ts index 04f80e1..144a446 100644 --- a/src/lib/thumbnails.ts +++ b/src/lib/thumbnails.ts @@ -7,6 +7,8 @@ import sharp from 'sharp' const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails') const THUMBNAIL_WIDTH = 400 const JPEG_QUALITY = 75 +const AI_IMAGE_WIDTH = 1920 +const AI_JPEG_QUALITY = 90 /** Ensure the cache directory exists. */ function ensureCacheDir(): void { @@ -47,6 +49,17 @@ async function generateImageThumbnail(src: string, dest: string): Promise fs.renameSync(tmp, dest) } +/** Generate a high-resolution JPEG for AI vision use. Images smaller than + * AI_IMAGE_WIDTH are not upscaled — they are converted at their native size. */ +async function generateAiImage(src: string, dest: string): Promise { + const tmp = dest + '.tmp' + await sharp(src) + .resize(AI_IMAGE_WIDTH, undefined, { withoutEnlargement: true }) + .jpeg({ quality: AI_JPEG_QUALITY }) + .toFile(tmp) + fs.renameSync(tmp, dest) +} + /** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */ function run(bin: string, args: string[]): Promise { return new Promise((resolve, reject) => { @@ -158,6 +171,25 @@ export async function getVideoFramePaths( return framePaths } +/** + * Returns the absolute path to a high-resolution JPEG suitable for AI vision + * APIs (1920px wide max, quality 90). Cached alongside display thumbnails with + * an `_ai` suffix so display performance is unaffected. + * Generates on first call or when the source file has been modified. + */ +export async function getAiImagePath( + absoluteFilePath: string, + libraryId: string +): Promise { + ensureCacheDir() + const key = cacheKey(libraryId, absoluteFilePath) + const cacheFile = path.join(CACHE_DIR, key + '_ai.jpg') + const cached = getCachedPath(cacheFile, absoluteFilePath) + if (cached) return cached + await generateAiImage(absoluteFilePath, cacheFile) + return cacheFile +} + /** * Returns the absolute path to a cached thumbnail JPEG for the given file. * Generates it on first call (or when the source has been modified).