send higher resolution images to AI vision endpoints

Add getAiImagePath() to thumbnails.ts (1920px wide, quality 90, no upscaling) cached separately from display thumbnails via an _ai suffix. Swap all four image-to-AI code paths in ai-tagger.ts (extract text, describe, batch tagging x2) to use the new high-res image instead of the 400px display thumbnail, improving OCR accuracy on dense text. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
doom scroll and viewer improvements
2026-04-13 09:08:43 -04:00 · 2026-04-13 08:16:34 -04:00
6 changed files with 249 additions and 25 deletions
--- a/src/components/DoomScrollView.tsx
+++ b/src/components/DoomScrollView.tsx
@@ -14,6 +14,7 @@ interface Props {
  items: DoomScrollItem[]
  videoContext?: 'mixed' | 'movies' | 'tv'
  onClose: () => void
+  onViewInLibrary?: (item: DoomScrollItem) => void
 }

 const HISTORY_CAP = 100
@@ -26,7 +27,7 @@ function pickRandom(items: DoomScrollItem[], excludeRecent: DoomScrollItem[]): D
  return pool[Math.floor(Math.random() * pool.length)]
 }

-export default function DoomScrollView({ items, videoContext = 'mixed', onClose }: Props) {
+export default function DoomScrollView({ items, videoContext = 'mixed', onClose, onViewInLibrary }: Props) {
  const settings = useUserSettings()
  const settingsMuted = videoContext === 'mixed' ? settings.mixedMuted : videoContext === 'movies' ? settings.moviesMuted : settings.tvMuted

@@ -40,6 +41,12 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
  const [autoPlayEnabled, setAutoPlayEnabled] = useState(false)
  const [autoPlaySeconds, setAutoPlaySeconds] = useState(5)

+  // Text overlay state
+  const [extractedText, setExtractedText] = useState<string | null>(null)
+  const [translatedText, setTranslatedText] = useState<string | null>(null)
+  const [showTextOverlay, setShowTextOverlay] = useState(false)
+  const [showOriginal, setShowOriginal] = useState(false)
+
  const videoRef = useRef<HTMLVideoElement>(null)
  const cooldownRef = useRef(false)
  const touchStartY = useRef<number | null>(null)
@@ -48,6 +55,9 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
  const isVideo = current?.mediaType === 'video'
  const backCount = history.length - 1 - historyIndex

+  // Derived: what text to display in the overlay
+  const displayText = (translatedText && !showOriginal) ? translatedText : extractedText
+
  const goNext = useCallback(() => {
    if (items.length === 0) return
    setHistoryIndex((idx) => {
@@ -114,11 +124,30 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
    return () => clearTimeout(id)
  }, [autoPlayEnabled, isPaused, autoPlaySeconds, current?.url, goNext])

+  // Fetch extracted text for current item
+  useEffect(() => {
+    setExtractedText(null)
+    setTranslatedText(null)
+    setShowTextOverlay(false)
+    setShowOriginal(false)
+    if (!current?.itemKey) return
+    fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(current.itemKey)}`)
+      .then((r) => r.json())
+      .then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => {
+        setExtractedText(data.extractedText)
+        setTranslatedText(data.extractedTextTranslated)
+      })
+      .catch(() => {})
+  }, [current?.itemKey])
+
  useEffect(() => {
    const handleKey = (e: KeyboardEvent) => {
      if (e.key === 'Escape') { onClose(); return }
      if (e.key === 'ArrowDown' || e.key === ' ' || e.key === 'PageDown') { e.preventDefault(); navigate('next') }
      if (e.key === 'ArrowUp' || e.key === 'PageUp') { e.preventDefault(); navigate('prev') }
+      if (e.key === 't' || e.key === 'T') {
+        if (extractedText) setShowTextOverlay((v) => !v)
+      }
    }
    const handleWheel = (e: WheelEvent) => {
      e.preventDefault()
@@ -147,7 +176,7 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
      document.removeEventListener('touchend', handleTouchEnd)
      document.body.style.overflow = ''
    }
-  }, [navigate, onClose])
+  }, [navigate, onClose, extractedText])

  return (
    <div className="fixed inset-0 z-50 flex flex-col" style={{ backgroundColor: '#000' }}>
@@ -219,8 +248,9 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
            loop={!autoPlayEnabled}
            muted={localMuted}
            playsInline
-            className="max-w-full max-h-full object-contain"
+            className="max-w-full max-h-full object-contain cursor-pointer"
            style={{ backgroundColor: '#000' }}
+            onClick={() => setIsPaused((v) => !v)}
          />
        ) : current?.mediaType === 'image' ? (
          // eslint-disable-next-line @next/next/no-img-element
@@ -233,32 +263,88 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
        ) : null}
      </div>

-      {/* Bottom bar: mute | filename | play-pause */}
+      {/* Text overlay */}
+      {showTextOverlay && displayText && (
+        <div
+          className="absolute bottom-16 left-4 right-4 z-20 rounded-xl p-4"
+          style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
+          onClick={(e) => e.stopPropagation()}
+        >
+          {extractedText && translatedText && (
+            <div className="flex justify-end mb-2">
+              <button
+                onClick={() => setShowOriginal((v) => !v)}
+                className="text-xs px-2 py-0.5 rounded-full"
+                style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
+              >
+                {showOriginal ? 'Show Translation' : 'Show Original'}
+              </button>
+            </div>
+          )}
+          <p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
+            {displayText}
+          </p>
+        </div>
+      )}
+
+      {/* Bottom bar: mute | filename | action buttons */}
      <div className="absolute bottom-0 left-0 right-0 flex items-center gap-3 px-4 pb-3 pt-2 z-10">
        <div className="w-9 flex-shrink-0">
          {isVideo && (
            <button
              onClick={() => setLocalMuted((v) => !v)}
-              className="w-9 h-9 rounded-full flex items-center justify-center text-base transition-opacity hover:opacity-100 opacity-70"
+              className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
              style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
              aria-label={localMuted ? 'Unmute' : 'Mute'}
            >
-              {localMuted ? '🔇' : '🔊'}
+              {localMuted ? (
+                <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                  <polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
+                  <line x1="23" y1="9" x2="17" y2="15"/>
+                  <line x1="17" y1="9" x2="23" y2="15"/>
+                </svg>
+              ) : (
+                <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                  <polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
+                  <path d="M19.07 4.93a10 10 0 0 1 0 14.14"/>
+                  <path d="M15.54 8.46a5 5 0 0 1 0 7.07"/>
+                </svg>
+              )}
            </button>
          )}
        </div>
        <span className="flex-1 text-xs truncate text-center" style={{ color: 'rgba(255,255,255,0.4)' }}>
          {current?.name}
        </span>
-        <div className="w-9 flex-shrink-0 flex justify-end">
-          {isVideo && (
+        <div className="flex-shrink-0 flex items-center gap-1">
+          {extractedText && (
            <button
-              onClick={() => setIsPaused((v) => !v)}
-              className="w-9 h-9 rounded-full flex items-center justify-center text-sm transition-opacity hover:opacity-100 opacity-70"
-              style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
-              aria-label={isPaused ? 'Play' : 'Pause'}
+              onClick={() => setShowTextOverlay((v) => !v)}
+              className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
+              style={{
+                backgroundColor: showTextOverlay ? 'rgba(255,255,255,0.2)' : 'rgba(0,0,0,0.5)',
+                color: '#fff',
+              }}
+              aria-label={showTextOverlay ? 'Hide text' : 'Show text'}
            >
-              {isPaused ? '▶' : '⏸'}
+              <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                <line x1="3" y1="6" x2="21" y2="6"/>
+                <line x1="3" y1="12" x2="15" y2="12"/>
+                <line x1="3" y1="18" x2="18" y2="18"/>
+              </svg>
+            </button>
+          )}
+          {onViewInLibrary && current?.itemKey && (
+            <button
+              onClick={(e) => { e.stopPropagation(); onViewInLibrary(current) }}
+              className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
+              style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
+              aria-label="View in library"
+            >
+              <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                <path d="M3 9l9-7 9 7v11a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/>
+                <polyline points="9 22 9 12 15 12 15 22"/>
+              </svg>
            </button>
          )}
        </div>
--- a/src/components/mixed/ImageLightbox.tsx
+++ b/src/components/mixed/ImageLightbox.tsx
@@ -16,9 +16,7 @@ interface Props {

 export default function ImageLightbox({ url, name, onClose, onPrev, onNext, itemKey, onTagsChanged, onAiTag }: Props) {
  const overlayRef = useRef<HTMLDivElement>(null)
-  const [showTags, setShowTags] = useState(
-    () => !!itemKey && typeof window !== 'undefined' && window.innerWidth >= 1280
-  )
+  const [showTags, setShowTags] = useState(false)
  const [aiTagging, setAiTagging] = useState(false)
  const [aiTagError, setAiTagError] = useState<string | null>(null)
  const [tagRefreshKey, setTagRefreshKey] = useState(0)
@@ -30,9 +28,16 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
  const [extractError, setExtractError] = useState<string | null>(null)
  const [retranslating, setRetranslating] = useState(false)

+  // Text overlay state
+  const [showTextOverlay, setShowTextOverlay] = useState(false)
+  const [showOriginal, setShowOriginal] = useState(false)
+
  // Determine if this is an image file (for text extraction controls)
  const isImage = /\.(jpe?g|png|gif|webp|bmp|tiff?)$/i.test(name)

+  // Derived: what text to display in the overlay
+  const displayText = (translatedText && !showOriginal) ? translatedText : extractedText
+
  // Fetch existing AI fields on mount / item change
  useEffect(() => {
    if (!itemKey) return
@@ -76,6 +81,31 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
          {name}
        </span>
        <div className="flex items-center gap-2 flex-shrink-0">
+          {/* Text overlay button — only shown when extracted text exists */}
+          {extractedText && (
+            <button
+              onClick={(e) => { e.stopPropagation(); setShowTextOverlay((v) => !v) }}
+              className="w-12 h-12 rounded-full flex items-center justify-center transition-colors"
+              style={{
+                backgroundColor: showTextOverlay ? 'var(--accent)' : 'var(--surface)',
+                color: showTextOverlay ? '#fff' : 'var(--text-primary)',
+              }}
+              onMouseEnter={(e) => {
+                if (!showTextOverlay) (e.currentTarget as HTMLElement).style.backgroundColor = 'var(--surface-hover)'
+              }}
+              onMouseLeave={(e) => {
+                if (!showTextOverlay) (e.currentTarget as HTMLElement).style.backgroundColor = 'var(--surface)'
+              }}
+              aria-label={showTextOverlay ? 'Hide text' : 'Show text'}
+              title="Display text"
+            >
+              <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                <line x1="3" y1="6" x2="21" y2="6"/>
+                <line x1="3" y1="12" x2="15" y2="12"/>
+                <line x1="3" y1="18" x2="18" y2="18"/>
+              </svg>
+            </button>
+          )}
          {itemKey && (
            <button
              onClick={(e) => { e.stopPropagation(); setShowTags((v) => !v) }}
@@ -179,6 +209,29 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
                ›
              </button>
            )}
+            {/* Text overlay */}
+            {showTextOverlay && displayText && (
+              <div
+                className="absolute bottom-4 left-4 right-4 z-10 rounded-xl p-4"
+                style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
+                onClick={(e) => e.stopPropagation()}
+              >
+                {extractedText && translatedText && (
+                  <div className="flex justify-end mb-2">
+                    <button
+                      onClick={() => setShowOriginal((v) => !v)}
+                      className="text-xs px-2 py-0.5 rounded-full"
+                      style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
+                    >
+                      {showOriginal ? 'Show Translation' : 'Show Original'}
+                    </button>
+                  </div>
+                )}
+                <p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
+                  {displayText}
+                </p>
+              </div>
+            )}
          </div>
          {/* Tag panel */}
          <div
@@ -343,6 +396,29 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
              ›
            </button>
          )}
+          {/* Text overlay */}
+          {showTextOverlay && displayText && (
+            <div
+              className="absolute bottom-4 left-4 right-4 z-10 rounded-xl p-4"
+              style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
+              onClick={(e) => e.stopPropagation()}
+            >
+              {extractedText && translatedText && (
+                <div className="flex justify-end mb-2">
+                  <button
+                    onClick={() => setShowOriginal((v) => !v)}
+                    className="text-xs px-2 py-0.5 rounded-full"
+                    style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
+                  >
+                    {showOriginal ? 'Show Translation' : 'Show Original'}
+                  </button>
+                </div>
+              )}
+              <p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
+                {displayText}
+              </p>
+            </div>
+          )}
        </div>
      )}
    </div>
--- a/src/components/mixed/MixedView.tsx
+++ b/src/components/mixed/MixedView.tsx
@@ -41,6 +41,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
  const [doomScrollEntries, setDoomScrollEntries] = useState<FileEntry[]>([])
  const [doomScrollEntriesLoading, setDoomScrollEntriesLoading] = useState(false)
  const [doomScrollEntriesLoaded, setDoomScrollEntriesLoaded] = useState(false)
+  const [pendingOpen, setPendingOpen] = useState<string | null>(null)

  const toggleTag = (tagId: string) =>
    setSelectedTagIds((prev) => {
@@ -234,9 +235,39 @@ export default function MixedView({ libraryId, initialPath }: Props) {

  // When filters are active, doom scroll uses filteredEntries (already filtered by search/tags).
  // When no filters, doom scroll uses files recursively under the current directory.
+  // In both cases entries come from recursive API calls so entry.name is the full relative path.
  const doomScrollItems: DoomScrollItem[] = (filtersActive ? filteredEntries : doomScrollEntries)
    .filter((e) => e.type === 'file' && (e.mediaType === 'video' || e.mediaType === 'image') && e.url && isBrowserPlayable(e.name))
-    .map((e) => ({ url: e.url!, name: e.name, mediaType: e.mediaType as 'video' | 'image' }))
+    .map((e) => ({
+      url: e.url!,
+      name: e.name,
+      mediaType: e.mediaType as 'video' | 'image',
+      itemKey: `${libraryId}:mixed_file:${encodeURIComponent(e.name)}`,
+    }))
+
+  const handleViewInLibrary = useCallback((item: DoomScrollItem) => {
+    if (!item.itemKey) return
+    const rel = decodeURIComponent(item.itemKey.split(':mixed_file:')[1])
+    const parts = rel.split('/')
+    parts.pop()
+    const dir = parts.join('/')
+    setDoomScrollActive(false)
+    setPendingOpen(rel)
+    loadPath(dir)
+  }, [loadPath])
+
+  // Auto-open a file after navigating to its directory (from "view in library")
+  useEffect(() => {
+    if (!pendingOpen || !listing) return
+    const filename = pendingOpen.split('/').pop()!
+    const entry = listing.entries.find((e) => e.name === filename && e.type === 'file')
+    if (!entry) return
+    setPendingOpen(null)
+    const idx = mediaEntries.indexOf(entry)
+    openMediaEntry(entry, idx >= 0 ? idx : 0)
+  // openMediaEntry is defined inline and depends on stable state; listing/pendingOpen are the real triggers
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [listing, pendingOpen])

  return (
    <>
@@ -245,6 +276,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
          items={doomScrollItems}
          videoContext="mixed"
          onClose={() => setDoomScrollActive(false)}
+          onViewInLibrary={handleViewInLibrary}
        />
      )}

--- a/src/components/mixed/VideoPlayerModal.tsx
+++ b/src/components/mixed/VideoPlayerModal.tsx
@@ -22,9 +22,7 @@ export default function VideoPlayerModal({ url, name, onClose, onPrev, onNext, i
  const loop     = context === 'mixed' ? settings.mixedLoop     : context === 'movies' ? settings.moviesLoop     : settings.tvLoop
  const muted    = context === 'mixed' ? settings.mixedMuted    : context === 'movies' ? settings.moviesMuted    : settings.tvMuted
  const overlayRef = useRef<HTMLDivElement>(null)
-  const [showTags, setShowTags] = useState(
-    () => !!itemKey && typeof window !== 'undefined' && window.innerWidth >= 1280
-  )
+  const [showTags, setShowTags] = useState(false)
  const [aiTagging, setAiTagging] = useState(false)
  const [aiTagError, setAiTagError] = useState<string | null>(null)
  const [tagRefreshKey, setTagRefreshKey] = useState(0)
--- a/src/lib/ai-tagger.ts
+++ b/src/lib/ai-tagger.ts
@@ -4,7 +4,7 @@ import type { Library, Tag, TagCategory } from '@/types'
 import { getDb } from './db'
 import { getAiConfig, getEffectiveAiConfig, getPreferredLanguage } from './app-settings'
 import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags'
-import { getThumbnailPath, getVideoFramePaths } from './thumbnails'
+import { getAiImagePath, getVideoFramePaths } from './thumbnails'
 import { findFile } from './media-utils'
 import { getLibrary, resolveLibraryRoot } from './libraries'

@@ -279,7 +279,7 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
        const framePaths = await getVideoFramePaths(resolvedMedia.path, library.id, VIDEO_FRAME_PERCENTAGES)
        base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
      } else {
-        const thumbnailPath = await getThumbnailPath(resolvedMedia.path, library.id, 'image')
+        const thumbnailPath = await getAiImagePath(resolvedMedia.path, library.id)
        base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
      }

@@ -367,7 +367,7 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
    const framePaths = await getVideoFramePaths(imagePath.path, libraryId, VIDEO_FRAME_PERCENTAGES)
    base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
  } else {
-    const thumbnailPath = await getThumbnailPath(imagePath.path, libraryId, 'image')
+    const thumbnailPath = await getAiImagePath(imagePath.path, libraryId)
    base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
  }

@@ -529,7 +529,7 @@ export async function generateItemDescription(itemKey: string): Promise<string>
    const framePaths = await getVideoFramePaths(resolvedMedia.path, libraryId, VIDEO_FRAME_PERCENTAGES)
    base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
  } else {
-    const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
+    const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
    base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
  }

@@ -587,7 +587,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
    throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
  }

-  const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
+  const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
  const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]

  const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${config.promptExtract ? ' ' + config.promptExtract : ''} If there is no text in the image, respond with exactly: [NO TEXT]`
--- a/src/lib/thumbnails.ts
+++ b/src/lib/thumbnails.ts
@@ -7,6 +7,8 @@ import sharp from 'sharp'
 const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
 const THUMBNAIL_WIDTH = 400
 const JPEG_QUALITY = 75
+const AI_IMAGE_WIDTH = 1920
+const AI_JPEG_QUALITY = 90

 /** Ensure the cache directory exists. */
 function ensureCacheDir(): void {
@@ -47,6 +49,17 @@ async function generateImageThumbnail(src: string, dest: string): Promise<void>
  fs.renameSync(tmp, dest)
 }

+/** Generate a high-resolution JPEG for AI vision use. Images smaller than
+ *  AI_IMAGE_WIDTH are not upscaled — they are converted at their native size. */
+async function generateAiImage(src: string, dest: string): Promise<void> {
+  const tmp = dest + '.tmp'
+  await sharp(src)
+    .resize(AI_IMAGE_WIDTH, undefined, { withoutEnlargement: true })
+    .jpeg({ quality: AI_JPEG_QUALITY })
+    .toFile(tmp)
+  fs.renameSync(tmp, dest)
+}
+
 /** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */
 function run(bin: string, args: string[]): Promise<void> {
  return new Promise((resolve, reject) => {
@@ -158,6 +171,25 @@ export async function getVideoFramePaths(
  return framePaths
 }

+/**
+ * Returns the absolute path to a high-resolution JPEG suitable for AI vision
+ * APIs (1920px wide max, quality 90). Cached alongside display thumbnails with
+ * an `_ai` suffix so display performance is unaffected.
+ * Generates on first call or when the source file has been modified.
+ */
+export async function getAiImagePath(
+  absoluteFilePath: string,
+  libraryId: string
+): Promise<string> {
+  ensureCacheDir()
+  const key = cacheKey(libraryId, absoluteFilePath)
+  const cacheFile = path.join(CACHE_DIR, key + '_ai.jpg')
+  const cached = getCachedPath(cacheFile, absoluteFilePath)
+  if (cached) return cached
+  await generateAiImage(absoluteFilePath, cacheFile)
+  return cacheFile
+}
+
 /**
 * Returns the absolute path to a cached thumbnail JPEG for the given file.
 * Generates it on first call (or when the source has been modified).