From 5ba73b2e560ebcd9605ebf2ba4e466d9d27e4300 Mon Sep 17 00:00:00 2001
From: Garret Patti <42485635+garretpatti@users.noreply.github.com>
Date: Mon, 13 Apr 2026 08:16:34 -0400
Subject: [PATCH 1/3] doom scroll and viewer improvements

- move play/pause to clicking the video directly; remove dedicated button
- replace emoji mute icons with flat minimal SVGs
- add view-in-library button in doom scroll that navigates to the file's
  directory and opens it in the regular viewer
- add display text overlay button in doom scroll and image lightbox;
  shows extracted text (translated by default when available) in a
  semi-transparent box at the bottom; toggle between translated/original
- hide tag panel by default in image lightbox and video player modal

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/components/DoomScrollView.tsx         | 112 +++++++++++++++++++---
 src/components/mixed/ImageLightbox.tsx    |  82 +++++++++++++++-
 src/components/mixed/MixedView.tsx        |  34 ++++++-
 src/components/mixed/VideoPlayerModal.tsx |   4 +-
 4 files changed, 212 insertions(+), 20 deletions(-)
diff --git a/src/components/DoomScrollView.tsx b/src/components/DoomScrollView.tsx
index 4de737d..f6d6734 100644
--- a/src/components/DoomScrollView.tsx
+++ b/src/components/DoomScrollView.tsx
@@ -14,6 +14,7 @@ interface Props {
   items: DoomScrollItem[]
   videoContext?: 'mixed' | 'movies' | 'tv'
   onClose: () => void
+  onViewInLibrary?: (item: DoomScrollItem) => void
 }
 
 const HISTORY_CAP = 100
@@ -26,7 +27,7 @@ function pickRandom(items: DoomScrollItem[], excludeRecent: DoomScrollItem[]): D
   return pool[Math.floor(Math.random() * pool.length)]
 }
 
-export default function DoomScrollView({ items, videoContext = 'mixed', onClose }: Props) {
+export default function DoomScrollView({ items, videoContext = 'mixed', onClose, onViewInLibrary }: Props) {
   const settings = useUserSettings()
   const settingsMuted = videoContext === 'mixed' ? settings.mixedMuted : videoContext === 'movies' ? settings.moviesMuted : settings.tvMuted
 
@@ -40,6 +41,12 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
   const [autoPlayEnabled, setAutoPlayEnabled] = useState(false)
   const [autoPlaySeconds, setAutoPlaySeconds] = useState(5)
 
+  // Text overlay state
+  const [extractedText, setExtractedText] = useState<string | null>(null)
+  const [translatedText, setTranslatedText] = useState<string | null>(null)
+  const [showTextOverlay, setShowTextOverlay] = useState(false)
+  const [showOriginal, setShowOriginal] = useState(false)
+
   const videoRef = useRef<HTMLVideoElement>(null)
   const cooldownRef = useRef(false)
   const touchStartY = useRef<number | null>(null)
@@ -48,6 +55,9 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
   const isVideo = current?.mediaType === 'video'
   const backCount = history.length - 1 - historyIndex
 
+  // Derived: what text to display in the overlay
+  const displayText = (translatedText && !showOriginal) ? translatedText : extractedText
+
   const goNext = useCallback(() => {
     if (items.length === 0) return
     setHistoryIndex((idx) => {
@@ -114,11 +124,30 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
     return () => clearTimeout(id)
   }, [autoPlayEnabled, isPaused, autoPlaySeconds, current?.url, goNext])
 
+  // Fetch extracted text for current item
+  useEffect(() => {
+    setExtractedText(null)
+    setTranslatedText(null)
+    setShowTextOverlay(false)
+    setShowOriginal(false)
+    if (!current?.itemKey) return
+    fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(current.itemKey)}`)
+      .then((r) => r.json())
+      .then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => {
+        setExtractedText(data.extractedText)
+        setTranslatedText(data.extractedTextTranslated)
+      })
+      .catch(() => {})
+  }, [current?.itemKey])
+
   useEffect(() => {
     const handleKey = (e: KeyboardEvent) => {
       if (e.key === 'Escape') { onClose(); return }
       if (e.key === 'ArrowDown' || e.key === ' ' || e.key === 'PageDown') { e.preventDefault(); navigate('next') }
       if (e.key === 'ArrowUp' || e.key === 'PageUp') { e.preventDefault(); navigate('prev') }
+      if (e.key === 't' || e.key === 'T') {
+        if (extractedText) setShowTextOverlay((v) => !v)
+      }
     }
     const handleWheel = (e: WheelEvent) => {
       e.preventDefault()
@@ -147,7 +176,7 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
       document.removeEventListener('touchend', handleTouchEnd)
       document.body.style.overflow = ''
     }
-  }, [navigate, onClose])
+  }, [navigate, onClose, extractedText])
 
   return (
     <div className="fixed inset-0 z-50 flex flex-col" style={{ backgroundColor: '#000' }}>
@@ -219,8 +248,9 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
             loop={!autoPlayEnabled}
             muted={localMuted}
             playsInline
-            className="max-w-full max-h-full object-contain"
+            className="max-w-full max-h-full object-contain cursor-pointer"
             style={{ backgroundColor: '#000' }}
+            onClick={() => setIsPaused((v) => !v)}
           />
         ) : current?.mediaType === 'image' ? (
           // eslint-disable-next-line @next/next/no-img-element
@@ -233,32 +263,88 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
         ) : null}
       </div>
 
-      {/* Bottom bar: mute | filename | play-pause */}
+      {/* Text overlay */}
+      {showTextOverlay && displayText && (
+        <div
+          className="absolute bottom-16 left-4 right-4 z-20 rounded-xl p-4"
+          style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
+          onClick={(e) => e.stopPropagation()}
+        >
+          {extractedText && translatedText && (
+            <div className="flex justify-end mb-2">
+              <button
+                onClick={() => setShowOriginal((v) => !v)}
+                className="text-xs px-2 py-0.5 rounded-full"
+                style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
+              >
+                {showOriginal ? 'Show Translation' : 'Show Original'}
+              </button>
+            </div>
+          )}
+          <p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
+            {displayText}
+          </p>
+        </div>
+      )}
+
+      {/* Bottom bar: mute | filename | action buttons */}
       <div className="absolute bottom-0 left-0 right-0 flex items-center gap-3 px-4 pb-3 pt-2 z-10">
         <div className="w-9 flex-shrink-0">
           {isVideo && (
             <button
               onClick={() => setLocalMuted((v) => !v)}
-              className="w-9 h-9 rounded-full flex items-center justify-center text-base transition-opacity hover:opacity-100 opacity-70"
+              className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
               style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
               aria-label={localMuted ? 'Unmute' : 'Mute'}
             >
-              {localMuted ? '🔇' : '🔊'}
+              {localMuted ? (
+                <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                  <polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
+                  <line x1="23" y1="9" x2="17" y2="15"/>
+                  <line x1="17" y1="9" x2="23" y2="15"/>
+                </svg>
+              ) : (
+                <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                  <polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
+                  <path d="M19.07 4.93a10 10 0 0 1 0 14.14"/>
+                  <path d="M15.54 8.46a5 5 0 0 1 0 7.07"/>
+                </svg>
+              )}
             </button>
           )}
         </div>
         <span className="flex-1 text-xs truncate text-center" style={{ color: 'rgba(255,255,255,0.4)' }}>
           {current?.name}
         </span>
-        <div className="w-9 flex-shrink-0 flex justify-end">
-          {isVideo && (
+        <div className="flex-shrink-0 flex items-center gap-1">
+          {extractedText && (
             <button
-              onClick={() => setIsPaused((v) => !v)}
-              className="w-9 h-9 rounded-full flex items-center justify-center text-sm transition-opacity hover:opacity-100 opacity-70"
-              style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
-              aria-label={isPaused ? 'Play' : 'Pause'}
+              onClick={() => setShowTextOverlay((v) => !v)}
+              className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
+              style={{
+                backgroundColor: showTextOverlay ? 'rgba(255,255,255,0.2)' : 'rgba(0,0,0,0.5)',
+                color: '#fff',
+              }}
+              aria-label={showTextOverlay ? 'Hide text' : 'Show text'}
             >
-              {isPaused ? '▶' : '⏸'}
+              <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                <line x1="3" y1="6" x2="21" y2="6"/>
+                <line x1="3" y1="12" x2="15" y2="12"/>
+                <line x1="3" y1="18" x2="18" y2="18"/>
+              </svg>
+            </button>
+          )}
+          {onViewInLibrary && current?.itemKey && (
+            <button
+              onClick={(e) => { e.stopPropagation(); onViewInLibrary(current) }}
+              className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
+              style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
+              aria-label="View in library"
+            >
+              <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                <path d="M3 9l9-7 9 7v11a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/>
+                <polyline points="9 22 9 12 15 12 15 22"/>
+              </svg>
             </button>
           )}
         </div>
diff --git a/src/components/mixed/ImageLightbox.tsx b/src/components/mixed/ImageLightbox.tsx
index f3c413e..c49c52e 100644
--- a/src/components/mixed/ImageLightbox.tsx
+++ b/src/components/mixed/ImageLightbox.tsx
@@ -16,9 +16,7 @@ interface Props {
 
 export default function ImageLightbox({ url, name, onClose, onPrev, onNext, itemKey, onTagsChanged, onAiTag }: Props) {
   const overlayRef = useRef<HTMLDivElement>(null)
-  const [showTags, setShowTags] = useState(
-    () => !!itemKey && typeof window !== 'undefined' && window.innerWidth >= 1280
-  )
+  const [showTags, setShowTags] = useState(false)
   const [aiTagging, setAiTagging] = useState(false)
   const [aiTagError, setAiTagError] = useState<string | null>(null)
   const [tagRefreshKey, setTagRefreshKey] = useState(0)
@@ -30,9 +28,16 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
   const [extractError, setExtractError] = useState<string | null>(null)
   const [retranslating, setRetranslating] = useState(false)
 
+  // Text overlay state
+  const [showTextOverlay, setShowTextOverlay] = useState(false)
+  const [showOriginal, setShowOriginal] = useState(false)
+
   // Determine if this is an image file (for text extraction controls)
   const isImage = /\.(jpe?g|png|gif|webp|bmp|tiff?)$/i.test(name)
 
+  // Derived: what text to display in the overlay
+  const displayText = (translatedText && !showOriginal) ? translatedText : extractedText
+
   // Fetch existing AI fields on mount / item change
   useEffect(() => {
     if (!itemKey) return
@@ -76,6 +81,31 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
           {name}
         </span>
         <div className="flex items-center gap-2 flex-shrink-0">
+          {/* Text overlay button — only shown when extracted text exists */}
+          {extractedText && (
+            <button
+              onClick={(e) => { e.stopPropagation(); setShowTextOverlay((v) => !v) }}
+              className="w-12 h-12 rounded-full flex items-center justify-center transition-colors"
+              style={{
+                backgroundColor: showTextOverlay ? 'var(--accent)' : 'var(--surface)',
+                color: showTextOverlay ? '#fff' : 'var(--text-primary)',
+              }}
+              onMouseEnter={(e) => {
+                if (!showTextOverlay) (e.currentTarget as HTMLElement).style.backgroundColor = 'var(--surface-hover)'
+              }}
+              onMouseLeave={(e) => {
+                if (!showTextOverlay) (e.currentTarget as HTMLElement).style.backgroundColor = 'var(--surface)'
+              }}
+              aria-label={showTextOverlay ? 'Hide text' : 'Show text'}
+              title="Display text"
+            >
+              <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                <line x1="3" y1="6" x2="21" y2="6"/>
+                <line x1="3" y1="12" x2="15" y2="12"/>
+                <line x1="3" y1="18" x2="18" y2="18"/>
+              </svg>
+            </button>
+          )}
           {itemKey && (
             <button
               onClick={(e) => { e.stopPropagation(); setShowTags((v) => !v) }}
@@ -179,6 +209,29 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
                 ›
               </button>
             )}
+            {/* Text overlay */}
+            {showTextOverlay && displayText && (
+              <div
+                className="absolute bottom-4 left-4 right-4 z-10 rounded-xl p-4"
+                style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
+                onClick={(e) => e.stopPropagation()}
+              >
+                {extractedText && translatedText && (
+                  <div className="flex justify-end mb-2">
+                    <button
+                      onClick={() => setShowOriginal((v) => !v)}
+                      className="text-xs px-2 py-0.5 rounded-full"
+                      style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
+                    >
+                      {showOriginal ? 'Show Translation' : 'Show Original'}
+                    </button>
+                  </div>
+                )}
+                <p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
+                  {displayText}
+                </p>
+              </div>
+            )}
           </div>
           {/* Tag panel */}
           <div
@@ -343,6 +396,29 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
               ›
             </button>
           )}
+          {/* Text overlay */}
+          {showTextOverlay && displayText && (
+            <div
+              className="absolute bottom-4 left-4 right-4 z-10 rounded-xl p-4"
+              style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
+              onClick={(e) => e.stopPropagation()}
+            >
+              {extractedText && translatedText && (
+                <div className="flex justify-end mb-2">
+                  <button
+                    onClick={() => setShowOriginal((v) => !v)}
+                    className="text-xs px-2 py-0.5 rounded-full"
+                    style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
+                  >
+                    {showOriginal ? 'Show Translation' : 'Show Original'}
+                  </button>
+                </div>
+              )}
+              <p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
+                {displayText}
+              </p>
+            </div>
+          )}
         </div>
       )}
     </div>
diff --git a/src/components/mixed/MixedView.tsx b/src/components/mixed/MixedView.tsx
index bfae948..de652f9 100644
--- a/src/components/mixed/MixedView.tsx
+++ b/src/components/mixed/MixedView.tsx
@@ -41,6 +41,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
   const [doomScrollEntries, setDoomScrollEntries] = useState<FileEntry[]>([])
   const [doomScrollEntriesLoading, setDoomScrollEntriesLoading] = useState(false)
   const [doomScrollEntriesLoaded, setDoomScrollEntriesLoaded] = useState(false)
+  const [pendingOpen, setPendingOpen] = useState<string | null>(null)
 
   const toggleTag = (tagId: string) =>
     setSelectedTagIds((prev) => {
@@ -234,9 +235,39 @@ export default function MixedView({ libraryId, initialPath }: Props) {
 
   // When filters are active, doom scroll uses filteredEntries (already filtered by search/tags).
   // When no filters, doom scroll uses files recursively under the current directory.
+  // In both cases entries come from recursive API calls so entry.name is the full relative path.
   const doomScrollItems: DoomScrollItem[] = (filtersActive ? filteredEntries : doomScrollEntries)
     .filter((e) => e.type === 'file' && (e.mediaType === 'video' || e.mediaType === 'image') && e.url && isBrowserPlayable(e.name))
-    .map((e) => ({ url: e.url!, name: e.name, mediaType: e.mediaType as 'video' | 'image' }))
+    .map((e) => ({
+      url: e.url!,
+      name: e.name,
+      mediaType: e.mediaType as 'video' | 'image',
+      itemKey: `${libraryId}:mixed_file:${encodeURIComponent(e.name)}`,
+    }))
+
+  const handleViewInLibrary = useCallback((item: DoomScrollItem) => {
+    if (!item.itemKey) return
+    const rel = decodeURIComponent(item.itemKey.split(':mixed_file:')[1])
+    const parts = rel.split('/')
+    parts.pop()
+    const dir = parts.join('/')
+    setDoomScrollActive(false)
+    setPendingOpen(rel)
+    loadPath(dir)
+  }, [loadPath])
+
+  // Auto-open a file after navigating to its directory (from "view in library")
+  useEffect(() => {
+    if (!pendingOpen || !listing) return
+    const filename = pendingOpen.split('/').pop()!
+    const entry = listing.entries.find((e) => e.name === filename && e.type === 'file')
+    if (!entry) return
+    setPendingOpen(null)
+    const idx = mediaEntries.indexOf(entry)
+    openMediaEntry(entry, idx >= 0 ? idx : 0)
+  // openMediaEntry is defined inline and depends on stable state; listing/pendingOpen are the real triggers
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [listing, pendingOpen])
 
   return (
     <>
@@ -245,6 +276,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
           items={doomScrollItems}
           videoContext="mixed"
           onClose={() => setDoomScrollActive(false)}
+          onViewInLibrary={handleViewInLibrary}
         />
       )}
 
diff --git a/src/components/mixed/VideoPlayerModal.tsx b/src/components/mixed/VideoPlayerModal.tsx
index b1a1031..fc01156 100644
--- a/src/components/mixed/VideoPlayerModal.tsx
+++ b/src/components/mixed/VideoPlayerModal.tsx
@@ -22,9 +22,7 @@ export default function VideoPlayerModal({ url, name, onClose, onPrev, onNext, i
   const loop     = context === 'mixed' ? settings.mixedLoop     : context === 'movies' ? settings.moviesLoop     : settings.tvLoop
   const muted    = context === 'mixed' ? settings.mixedMuted    : context === 'movies' ? settings.moviesMuted    : settings.tvMuted
   const overlayRef = useRef<HTMLDivElement>(null)
-  const [showTags, setShowTags] = useState(
-    () => !!itemKey && typeof window !== 'undefined' && window.innerWidth >= 1280
-  )
+  const [showTags, setShowTags] = useState(false)
   const [aiTagging, setAiTagging] = useState(false)
   const [aiTagError, setAiTagError] = useState<string | null>(null)
   const [tagRefreshKey, setTagRefreshKey] = useState(0)
-- 
2.49.1


From cd9a83ea90f18bdd2de1f3b3a7e7ec8b0da89b43 Mon Sep 17 00:00:00 2001
From: Garret Patti <42485635+garretpatti@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:08:43 -0400
Subject: [PATCH 2/3] send higher resolution images to AI vision endpoints

Add getAiImagePath() to thumbnails.ts (1920px wide, quality 90, no
upscaling) cached separately from display thumbnails via an _ai suffix.
Swap all four image-to-AI code paths in ai-tagger.ts (extract text,
describe, batch tagging x2) to use the new high-res image instead of
the 400px display thumbnail, improving OCR accuracy on dense text.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/lib/ai-tagger.ts  | 10 +++++-----
 src/lib/thumbnails.ts | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/src/lib/ai-tagger.ts b/src/lib/ai-tagger.ts
index fc04423..da2b951 100644
--- a/src/lib/ai-tagger.ts
+++ b/src/lib/ai-tagger.ts
@@ -4,7 +4,7 @@ import type { Library, Tag, TagCategory } from '@/types'
 import { getDb } from './db'
 import { getAiConfig, getEffectiveAiConfig, getPreferredLanguage } from './app-settings'
 import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags'
-import { getThumbnailPath, getVideoFramePaths } from './thumbnails'
+import { getAiImagePath, getVideoFramePaths } from './thumbnails'
 import { findFile } from './media-utils'
 import { getLibrary, resolveLibraryRoot } from './libraries'
 
@@ -279,7 +279,7 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
         const framePaths = await getVideoFramePaths(resolvedMedia.path, library.id, VIDEO_FRAME_PERCENTAGES)
         base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
       } else {
-        const thumbnailPath = await getThumbnailPath(resolvedMedia.path, library.id, 'image')
+        const thumbnailPath = await getAiImagePath(resolvedMedia.path, library.id)
         base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
       }
 
@@ -367,7 +367,7 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
     const framePaths = await getVideoFramePaths(imagePath.path, libraryId, VIDEO_FRAME_PERCENTAGES)
     base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
   } else {
-    const thumbnailPath = await getThumbnailPath(imagePath.path, libraryId, 'image')
+    const thumbnailPath = await getAiImagePath(imagePath.path, libraryId)
     base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
   }
 
@@ -529,7 +529,7 @@ export async function generateItemDescription(itemKey: string): Promise<string>
     const framePaths = await getVideoFramePaths(resolvedMedia.path, libraryId, VIDEO_FRAME_PERCENTAGES)
     base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
   } else {
-    const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
+    const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
     base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
   }
 
@@ -587,7 +587,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
     throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
   }
 
-  const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
+  const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
   const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
 
   const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${config.promptExtract ? ' ' + config.promptExtract : ''} If there is no text in the image, respond with exactly: [NO TEXT]`
diff --git a/src/lib/thumbnails.ts b/src/lib/thumbnails.ts
index 04f80e1..144a446 100644
--- a/src/lib/thumbnails.ts
+++ b/src/lib/thumbnails.ts
@@ -7,6 +7,8 @@ import sharp from 'sharp'
 const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
 const THUMBNAIL_WIDTH = 400
 const JPEG_QUALITY = 75
+const AI_IMAGE_WIDTH = 1920
+const AI_JPEG_QUALITY = 90
 
 /** Ensure the cache directory exists. */
 function ensureCacheDir(): void {
@@ -47,6 +49,17 @@ async function generateImageThumbnail(src: string, dest: string): Promise<void>
   fs.renameSync(tmp, dest)
 }
 
+/** Generate a high-resolution JPEG for AI vision use. Images smaller than
+ *  AI_IMAGE_WIDTH are not upscaled — they are converted at their native size. */
+async function generateAiImage(src: string, dest: string): Promise<void> {
+  const tmp = dest + '.tmp'
+  await sharp(src)
+    .resize(AI_IMAGE_WIDTH, undefined, { withoutEnlargement: true })
+    .jpeg({ quality: AI_JPEG_QUALITY })
+    .toFile(tmp)
+  fs.renameSync(tmp, dest)
+}
+
 /** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */
 function run(bin: string, args: string[]): Promise<void> {
   return new Promise((resolve, reject) => {
@@ -158,6 +171,25 @@ export async function getVideoFramePaths(
   return framePaths
 }
 
+/**
+ * Returns the absolute path to a high-resolution JPEG suitable for AI vision
+ * APIs (1920px wide max, quality 90). Cached alongside display thumbnails with
+ * an `_ai` suffix so display performance is unaffected.
+ * Generates on first call or when the source file has been modified.
+ */
+export async function getAiImagePath(
+  absoluteFilePath: string,
+  libraryId: string
+): Promise<string> {
+  ensureCacheDir()
+  const key = cacheKey(libraryId, absoluteFilePath)
+  const cacheFile = path.join(CACHE_DIR, key + '_ai.jpg')
+  const cached = getCachedPath(cacheFile, absoluteFilePath)
+  if (cached) return cached
+  await generateAiImage(absoluteFilePath, cacheFile)
+  return cacheFile
+}
+
 /**
  * Returns the absolute path to a cached thumbnail JPEG for the given file.
  * Generates it on first call (or when the source has been modified).
-- 
2.49.1


From b0fc275a52658cacf15d1c704ad89af5592ee772 Mon Sep 17 00:00:00 2001
From: Garret Patti <42485635+garretpatti@users.noreply.github.com>
Date: Mon, 13 Apr 2026 09:19:32 -0400
Subject: [PATCH 3/3] add extract text button to doom scroll mode

Show an extract-text button (document icon) in the bottom bar when the
current image has no extracted text yet. Clicking it calls the extract-text
API, shows a spinner while in progress, and on success replaces itself with
the text-lines display button and auto-opens the overlay. Error state briefly
turns the button red. Resets on every item navigation alongside the other
text state. Hidden for videos and items without an itemKey.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/components/DoomScrollView.tsx | 57 +++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/src/components/DoomScrollView.tsx b/src/components/DoomScrollView.tsx
index f6d6734..04972d9 100644
--- a/src/components/DoomScrollView.tsx
+++ b/src/components/DoomScrollView.tsx
@@ -46,6 +46,8 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose,
   const [translatedText, setTranslatedText] = useState<string | null>(null)
   const [showTextOverlay, setShowTextOverlay] = useState(false)
   const [showOriginal, setShowOriginal] = useState(false)
+  const [extracting, setExtracting] = useState(false)
+  const [extractError, setExtractError] = useState<string | null>(null)
 
   const videoRef = useRef<HTMLVideoElement>(null)
   const cooldownRef = useRef(false)
@@ -130,6 +132,8 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose,
     setTranslatedText(null)
     setShowTextOverlay(false)
     setShowOriginal(false)
+    setExtracting(false)
+    setExtractError(null)
     if (!current?.itemKey) return
     fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(current.itemKey)}`)
       .then((r) => r.json())
@@ -178,6 +182,32 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose,
     }
   }, [navigate, onClose, extractedText])
 
+  const handleExtractText = async () => {
+    if (!current?.itemKey) return
+    setExtracting(true)
+    setExtractError(null)
+    try {
+      const res = await fetch('/api/ai-tagging/extract-text', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ itemKey: current.itemKey }),
+      })
+      if (!res.ok) {
+        const data = await res.json().catch(() => ({}))
+        throw new Error((data as { error?: string }).error ?? 'Extraction failed')
+      }
+      const result = await res.json()
+      setExtractedText(result.extractedText || null)
+      setTranslatedText(result.translatedText || null)
+      if (result.extractedText) setShowTextOverlay(true)
+    } catch (err) {
+      setExtractError(err instanceof Error ? err.message : 'Extraction failed')
+      setTimeout(() => setExtractError(null), 4000)
+    } finally {
+      setExtracting(false)
+    }
+  }
+
   return (
     <div className="fixed inset-0 z-50 flex flex-col" style={{ backgroundColor: '#000' }}>
       {/* Keyframe for auto-play progress bar */}
@@ -317,7 +347,7 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose,
           {current?.name}
         </span>
         <div className="flex-shrink-0 flex items-center gap-1">
-          {extractedText && (
+          {extractedText ? (
             <button
               onClick={() => setShowTextOverlay((v) => !v)}
               className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
@@ -333,7 +363,30 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose,
                 <line x1="3" y1="18" x2="18" y2="18"/>
               </svg>
             </button>
-          )}
+          ) : current?.itemKey && current?.mediaType === 'image' ? (
+            <button
+              onClick={handleExtractText}
+              disabled={extracting}
+              className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70 disabled:opacity-40"
+              style={{
+                backgroundColor: extractError ? 'rgba(127,29,29,0.8)' : 'rgba(0,0,0,0.5)',
+                color: extractError ? '#fca5a5' : '#fff',
+              }}
+              aria-label="Extract text"
+            >
+              {extracting ? (
+                <span className="animate-spin" style={{ display: 'inline-block', fontSize: '0.75rem' }}>⟳</span>
+              ) : (
+                <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
+                  <path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
+                  <polyline points="14 2 14 8 20 8"/>
+                  <line x1="16" y1="13" x2="8" y2="13"/>
+                  <line x1="16" y1="17" x2="8" y2="17"/>
+                  <polyline points="10 9 9 9 8 9"/>
+                </svg>
+              )}
+            </button>
+          ) : null}
           {onViewInLibrary && current?.itemKey && (
             <button
               onClick={(e) => { e.stopPropagation(); onViewInLibrary(current) }}
-- 
2.49.1