doomscroll-improvements #23

Merged
gpatti merged 3 commits from doomscroll-improvements into main 2026-04-13 13:23:33 +00:00
6 changed files with 302 additions and 25 deletions

View File

@@ -14,6 +14,7 @@ interface Props {
items: DoomScrollItem[]
videoContext?: 'mixed' | 'movies' | 'tv'
onClose: () => void
onViewInLibrary?: (item: DoomScrollItem) => void
}
const HISTORY_CAP = 100
@@ -26,7 +27,7 @@ function pickRandom(items: DoomScrollItem[], excludeRecent: DoomScrollItem[]): D
return pool[Math.floor(Math.random() * pool.length)]
}
export default function DoomScrollView({ items, videoContext = 'mixed', onClose }: Props) {
export default function DoomScrollView({ items, videoContext = 'mixed', onClose, onViewInLibrary }: Props) {
const settings = useUserSettings()
const settingsMuted = videoContext === 'mixed' ? settings.mixedMuted : videoContext === 'movies' ? settings.moviesMuted : settings.tvMuted
@@ -40,6 +41,14 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
const [autoPlayEnabled, setAutoPlayEnabled] = useState(false)
const [autoPlaySeconds, setAutoPlaySeconds] = useState(5)
// Text overlay state
const [extractedText, setExtractedText] = useState<string | null>(null)
const [translatedText, setTranslatedText] = useState<string | null>(null)
const [showTextOverlay, setShowTextOverlay] = useState(false)
const [showOriginal, setShowOriginal] = useState(false)
const [extracting, setExtracting] = useState(false)
const [extractError, setExtractError] = useState<string | null>(null)
const videoRef = useRef<HTMLVideoElement>(null)
const cooldownRef = useRef(false)
const touchStartY = useRef<number | null>(null)
@@ -48,6 +57,9 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
const isVideo = current?.mediaType === 'video'
const backCount = history.length - 1 - historyIndex
// Derived: what text to display in the overlay
const displayText = (translatedText && !showOriginal) ? translatedText : extractedText
const goNext = useCallback(() => {
if (items.length === 0) return
setHistoryIndex((idx) => {
@@ -114,11 +126,32 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
return () => clearTimeout(id)
}, [autoPlayEnabled, isPaused, autoPlaySeconds, current?.url, goNext])
// Fetch extracted text for current item
useEffect(() => {
setExtractedText(null)
setTranslatedText(null)
setShowTextOverlay(false)
setShowOriginal(false)
setExtracting(false)
setExtractError(null)
if (!current?.itemKey) return
fetch(`/api/ai-tagging/fields?itemKey=${encodeURIComponent(current.itemKey)}`)
.then((r) => r.json())
.then((data: { extractedText: string | null; extractedTextTranslated: string | null }) => {
setExtractedText(data.extractedText)
setTranslatedText(data.extractedTextTranslated)
})
.catch(() => {})
}, [current?.itemKey])
useEffect(() => {
const handleKey = (e: KeyboardEvent) => {
if (e.key === 'Escape') { onClose(); return }
if (e.key === 'ArrowDown' || e.key === ' ' || e.key === 'PageDown') { e.preventDefault(); navigate('next') }
if (e.key === 'ArrowUp' || e.key === 'PageUp') { e.preventDefault(); navigate('prev') }
if (e.key === 't' || e.key === 'T') {
if (extractedText) setShowTextOverlay((v) => !v)
}
}
const handleWheel = (e: WheelEvent) => {
e.preventDefault()
@@ -147,7 +180,33 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
document.removeEventListener('touchend', handleTouchEnd)
document.body.style.overflow = ''
}
}, [navigate, onClose])
}, [navigate, onClose, extractedText])
const handleExtractText = async () => {
if (!current?.itemKey) return
setExtracting(true)
setExtractError(null)
try {
const res = await fetch('/api/ai-tagging/extract-text', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ itemKey: current.itemKey }),
})
if (!res.ok) {
const data = await res.json().catch(() => ({}))
throw new Error((data as { error?: string }).error ?? 'Extraction failed')
}
const result = await res.json()
setExtractedText(result.extractedText || null)
setTranslatedText(result.translatedText || null)
if (result.extractedText) setShowTextOverlay(true)
} catch (err) {
setExtractError(err instanceof Error ? err.message : 'Extraction failed')
setTimeout(() => setExtractError(null), 4000)
} finally {
setExtracting(false)
}
}
return (
<div className="fixed inset-0 z-50 flex flex-col" style={{ backgroundColor: '#000' }}>
@@ -219,8 +278,9 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
loop={!autoPlayEnabled}
muted={localMuted}
playsInline
className="max-w-full max-h-full object-contain"
className="max-w-full max-h-full object-contain cursor-pointer"
style={{ backgroundColor: '#000' }}
onClick={() => setIsPaused((v) => !v)}
/>
) : current?.mediaType === 'image' ? (
// eslint-disable-next-line @next/next/no-img-element
@@ -233,32 +293,111 @@ export default function DoomScrollView({ items, videoContext = 'mixed', onClose
) : null}
</div>
{/* Bottom bar: mute | filename | play-pause */}
{/* Text overlay */}
{showTextOverlay && displayText && (
<div
className="absolute bottom-16 left-4 right-4 z-20 rounded-xl p-4"
style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
onClick={(e) => e.stopPropagation()}
>
{extractedText && translatedText && (
<div className="flex justify-end mb-2">
<button
onClick={() => setShowOriginal((v) => !v)}
className="text-xs px-2 py-0.5 rounded-full"
style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
>
{showOriginal ? 'Show Translation' : 'Show Original'}
</button>
</div>
)}
<p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
{displayText}
</p>
</div>
)}
{/* Bottom bar: mute | filename | action buttons */}
<div className="absolute bottom-0 left-0 right-0 flex items-center gap-3 px-4 pb-3 pt-2 z-10">
<div className="w-9 flex-shrink-0">
{isVideo && (
<button
onClick={() => setLocalMuted((v) => !v)}
className="w-9 h-9 rounded-full flex items-center justify-center text-base transition-opacity hover:opacity-100 opacity-70"
className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
aria-label={localMuted ? 'Unmute' : 'Mute'}
>
{localMuted ? '🔇' : '🔊'}
{localMuted ? (
<svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
<line x1="23" y1="9" x2="17" y2="15"/>
<line x1="17" y1="9" x2="23" y2="15"/>
</svg>
) : (
<svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
<path d="M19.07 4.93a10 10 0 0 1 0 14.14"/>
<path d="M15.54 8.46a5 5 0 0 1 0 7.07"/>
</svg>
)}
</button>
)}
</div>
<span className="flex-1 text-xs truncate text-center" style={{ color: 'rgba(255,255,255,0.4)' }}>
{current?.name}
</span>
<div className="w-9 flex-shrink-0 flex justify-end">
{isVideo && (
<div className="flex-shrink-0 flex items-center gap-1">
{extractedText ? (
<button
onClick={() => setIsPaused((v) => !v)}
className="w-9 h-9 rounded-full flex items-center justify-center text-sm transition-opacity hover:opacity-100 opacity-70"
style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
aria-label={isPaused ? 'Play' : 'Pause'}
onClick={() => setShowTextOverlay((v) => !v)}
className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
style={{
backgroundColor: showTextOverlay ? 'rgba(255,255,255,0.2)' : 'rgba(0,0,0,0.5)',
color: '#fff',
}}
aria-label={showTextOverlay ? 'Hide text' : 'Show text'}
>
{isPaused ? '▶' : '⏸'}
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<line x1="3" y1="6" x2="21" y2="6"/>
<line x1="3" y1="12" x2="15" y2="12"/>
<line x1="3" y1="18" x2="18" y2="18"/>
</svg>
</button>
) : current?.itemKey && current?.mediaType === 'image' ? (
<button
onClick={handleExtractText}
disabled={extracting}
className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70 disabled:opacity-40"
style={{
backgroundColor: extractError ? 'rgba(127,29,29,0.8)' : 'rgba(0,0,0,0.5)',
color: extractError ? '#fca5a5' : '#fff',
}}
aria-label="Extract text"
>
{extracting ? (
<span className="animate-spin" style={{ display: 'inline-block', fontSize: '0.75rem' }}></span>
) : (
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/>
<polyline points="14 2 14 8 20 8"/>
<line x1="16" y1="13" x2="8" y2="13"/>
<line x1="16" y1="17" x2="8" y2="17"/>
<polyline points="10 9 9 9 8 9"/>
</svg>
)}
</button>
) : null}
{onViewInLibrary && current?.itemKey && (
<button
onClick={(e) => { e.stopPropagation(); onViewInLibrary(current) }}
className="w-9 h-9 rounded-full flex items-center justify-center transition-opacity hover:opacity-100 opacity-70"
style={{ backgroundColor: 'rgba(0,0,0,0.5)', color: '#fff' }}
aria-label="View in library"
>
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<path d="M3 9l9-7 9 7v11a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z"/>
<polyline points="9 22 9 12 15 12 15 22"/>
</svg>
</button>
)}
</div>

View File

@@ -16,9 +16,7 @@ interface Props {
export default function ImageLightbox({ url, name, onClose, onPrev, onNext, itemKey, onTagsChanged, onAiTag }: Props) {
const overlayRef = useRef<HTMLDivElement>(null)
const [showTags, setShowTags] = useState(
() => !!itemKey && typeof window !== 'undefined' && window.innerWidth >= 1280
)
const [showTags, setShowTags] = useState(false)
const [aiTagging, setAiTagging] = useState(false)
const [aiTagError, setAiTagError] = useState<string | null>(null)
const [tagRefreshKey, setTagRefreshKey] = useState(0)
@@ -30,9 +28,16 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
const [extractError, setExtractError] = useState<string | null>(null)
const [retranslating, setRetranslating] = useState(false)
// Text overlay state
const [showTextOverlay, setShowTextOverlay] = useState(false)
const [showOriginal, setShowOriginal] = useState(false)
// Determine if this is an image file (for text extraction controls)
const isImage = /\.(jpe?g|png|gif|webp|bmp|tiff?)$/i.test(name)
// Derived: what text to display in the overlay
const displayText = (translatedText && !showOriginal) ? translatedText : extractedText
// Fetch existing AI fields on mount / item change
useEffect(() => {
if (!itemKey) return
@@ -76,6 +81,31 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
{name}
</span>
<div className="flex items-center gap-2 flex-shrink-0">
{/* Text overlay button — only shown when extracted text exists */}
{extractedText && (
<button
onClick={(e) => { e.stopPropagation(); setShowTextOverlay((v) => !v) }}
className="w-12 h-12 rounded-full flex items-center justify-center transition-colors"
style={{
backgroundColor: showTextOverlay ? 'var(--accent)' : 'var(--surface)',
color: showTextOverlay ? '#fff' : 'var(--text-primary)',
}}
onMouseEnter={(e) => {
if (!showTextOverlay) (e.currentTarget as HTMLElement).style.backgroundColor = 'var(--surface-hover)'
}}
onMouseLeave={(e) => {
if (!showTextOverlay) (e.currentTarget as HTMLElement).style.backgroundColor = 'var(--surface)'
}}
aria-label={showTextOverlay ? 'Hide text' : 'Show text'}
title="Display text"
>
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
<line x1="3" y1="6" x2="21" y2="6"/>
<line x1="3" y1="12" x2="15" y2="12"/>
<line x1="3" y1="18" x2="18" y2="18"/>
</svg>
</button>
)}
{itemKey && (
<button
onClick={(e) => { e.stopPropagation(); setShowTags((v) => !v) }}
@@ -179,6 +209,29 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
</button>
)}
{/* Text overlay */}
{showTextOverlay && displayText && (
<div
className="absolute bottom-4 left-4 right-4 z-10 rounded-xl p-4"
style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
onClick={(e) => e.stopPropagation()}
>
{extractedText && translatedText && (
<div className="flex justify-end mb-2">
<button
onClick={() => setShowOriginal((v) => !v)}
className="text-xs px-2 py-0.5 rounded-full"
style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
>
{showOriginal ? 'Show Translation' : 'Show Original'}
</button>
</div>
)}
<p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
{displayText}
</p>
</div>
)}
</div>
{/* Tag panel */}
<div
@@ -343,6 +396,29 @@ export default function ImageLightbox({ url, name, onClose, onPrev, onNext, item
</button>
)}
{/* Text overlay */}
{showTextOverlay && displayText && (
<div
className="absolute bottom-4 left-4 right-4 z-10 rounded-xl p-4"
style={{ backgroundColor: 'rgba(0,0,0,0.75)' }}
onClick={(e) => e.stopPropagation()}
>
{extractedText && translatedText && (
<div className="flex justify-end mb-2">
<button
onClick={() => setShowOriginal((v) => !v)}
className="text-xs px-2 py-0.5 rounded-full"
style={{ backgroundColor: 'rgba(255,255,255,0.15)', color: 'rgba(255,255,255,0.7)' }}
>
{showOriginal ? 'Show Translation' : 'Show Original'}
</button>
</div>
)}
<p className="text-sm whitespace-pre-wrap" style={{ color: 'rgba(255,255,255,0.9)' }}>
{displayText}
</p>
</div>
)}
</div>
)}
</div>

View File

@@ -41,6 +41,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
const [doomScrollEntries, setDoomScrollEntries] = useState<FileEntry[]>([])
const [doomScrollEntriesLoading, setDoomScrollEntriesLoading] = useState(false)
const [doomScrollEntriesLoaded, setDoomScrollEntriesLoaded] = useState(false)
const [pendingOpen, setPendingOpen] = useState<string | null>(null)
const toggleTag = (tagId: string) =>
setSelectedTagIds((prev) => {
@@ -234,9 +235,39 @@ export default function MixedView({ libraryId, initialPath }: Props) {
// When filters are active, doom scroll uses filteredEntries (already filtered by search/tags).
// When no filters, doom scroll uses files recursively under the current directory.
// In both cases entries come from recursive API calls so entry.name is the full relative path.
const doomScrollItems: DoomScrollItem[] = (filtersActive ? filteredEntries : doomScrollEntries)
.filter((e) => e.type === 'file' && (e.mediaType === 'video' || e.mediaType === 'image') && e.url && isBrowserPlayable(e.name))
.map((e) => ({ url: e.url!, name: e.name, mediaType: e.mediaType as 'video' | 'image' }))
.map((e) => ({
url: e.url!,
name: e.name,
mediaType: e.mediaType as 'video' | 'image',
itemKey: `${libraryId}:mixed_file:${encodeURIComponent(e.name)}`,
}))
const handleViewInLibrary = useCallback((item: DoomScrollItem) => {
if (!item.itemKey) return
const rel = decodeURIComponent(item.itemKey.split(':mixed_file:')[1])
const parts = rel.split('/')
parts.pop()
const dir = parts.join('/')
setDoomScrollActive(false)
setPendingOpen(rel)
loadPath(dir)
}, [loadPath])
// Auto-open a file after navigating to its directory (from "view in library")
useEffect(() => {
if (!pendingOpen || !listing) return
const filename = pendingOpen.split('/').pop()!
const entry = listing.entries.find((e) => e.name === filename && e.type === 'file')
if (!entry) return
setPendingOpen(null)
const idx = mediaEntries.indexOf(entry)
openMediaEntry(entry, idx >= 0 ? idx : 0)
// openMediaEntry is defined inline and depends on stable state; listing/pendingOpen are the real triggers
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [listing, pendingOpen])
return (
<>
@@ -245,6 +276,7 @@ export default function MixedView({ libraryId, initialPath }: Props) {
items={doomScrollItems}
videoContext="mixed"
onClose={() => setDoomScrollActive(false)}
onViewInLibrary={handleViewInLibrary}
/>
)}

View File

@@ -22,9 +22,7 @@ export default function VideoPlayerModal({ url, name, onClose, onPrev, onNext, i
const loop = context === 'mixed' ? settings.mixedLoop : context === 'movies' ? settings.moviesLoop : settings.tvLoop
const muted = context === 'mixed' ? settings.mixedMuted : context === 'movies' ? settings.moviesMuted : settings.tvMuted
const overlayRef = useRef<HTMLDivElement>(null)
const [showTags, setShowTags] = useState(
() => !!itemKey && typeof window !== 'undefined' && window.innerWidth >= 1280
)
const [showTags, setShowTags] = useState(false)
const [aiTagging, setAiTagging] = useState(false)
const [aiTagError, setAiTagError] = useState<string | null>(null)
const [tagRefreshKey, setTagRefreshKey] = useState(0)

View File

@@ -4,7 +4,7 @@ import type { Library, Tag, TagCategory } from '@/types'
import { getDb } from './db'
import { getAiConfig, getEffectiveAiConfig, getPreferredLanguage } from './app-settings'
import { getTags, getCategories, addTagToItem, getActiveCategoryIdsForLibrary, getResolvedTagsForItem } from './tags'
import { getThumbnailPath, getVideoFramePaths } from './thumbnails'
import { getAiImagePath, getVideoFramePaths } from './thumbnails'
import { findFile } from './media-utils'
import { getLibrary, resolveLibraryRoot } from './libraries'
@@ -279,7 +279,7 @@ export async function runAiTagging(library: Library, libraryRoot: string): Promi
const framePaths = await getVideoFramePaths(resolvedMedia.path, library.id, VIDEO_FRAME_PERCENTAGES)
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
} else {
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, library.id, 'image')
const thumbnailPath = await getAiImagePath(resolvedMedia.path, library.id)
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
}
@@ -367,7 +367,7 @@ export async function tagSingleItem(itemKey: string): Promise<string[]> {
const framePaths = await getVideoFramePaths(imagePath.path, libraryId, VIDEO_FRAME_PERCENTAGES)
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
} else {
const thumbnailPath = await getThumbnailPath(imagePath.path, libraryId, 'image')
const thumbnailPath = await getAiImagePath(imagePath.path, libraryId)
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
}
@@ -529,7 +529,7 @@ export async function generateItemDescription(itemKey: string): Promise<string>
const framePaths = await getVideoFramePaths(resolvedMedia.path, libraryId, VIDEO_FRAME_PERCENTAGES)
base64Images = framePaths.map((p) => fs.readFileSync(p, 'base64'))
} else {
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
}
@@ -587,7 +587,7 @@ export async function extractItemText(itemKey: string): Promise<{ extractedText:
throw Object.assign(new Error('Text extraction is only available for images'), { code: 'NO_IMAGE' })
}
const thumbnailPath = await getThumbnailPath(resolvedMedia.path, libraryId, 'image')
const thumbnailPath = await getAiImagePath(resolvedMedia.path, libraryId)
const base64Images = [fs.readFileSync(thumbnailPath, 'base64')]
const systemPrompt = `You are an OCR assistant. Extract ALL text visible in the image exactly as it appears. Preserve line breaks and formatting.${config.promptExtract ? ' ' + config.promptExtract : ''} If there is no text in the image, respond with exactly: [NO TEXT]`

View File

@@ -7,6 +7,8 @@ import sharp from 'sharp'
const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
const THUMBNAIL_WIDTH = 400
const JPEG_QUALITY = 75
const AI_IMAGE_WIDTH = 1920
const AI_JPEG_QUALITY = 90
/** Ensure the cache directory exists. */
function ensureCacheDir(): void {
@@ -47,6 +49,17 @@ async function generateImageThumbnail(src: string, dest: string): Promise<void>
fs.renameSync(tmp, dest)
}
/** Generate a high-resolution JPEG for AI vision use. Images smaller than
* AI_IMAGE_WIDTH are not upscaled — they are converted at their native size. */
async function generateAiImage(src: string, dest: string): Promise<void> {
const tmp = dest + '.tmp'
await sharp(src)
.resize(AI_IMAGE_WIDTH, undefined, { withoutEnlargement: true })
.jpeg({ quality: AI_JPEG_QUALITY })
.toFile(tmp)
fs.renameSync(tmp, dest)
}
/** Run a child process and collect stderr. Resolves on exit code 0, rejects otherwise. */
function run(bin: string, args: string[]): Promise<void> {
return new Promise((resolve, reject) => {
@@ -158,6 +171,25 @@ export async function getVideoFramePaths(
return framePaths
}
/**
* Returns the absolute path to a high-resolution JPEG suitable for AI vision
* APIs (1920px wide max, quality 90). Cached alongside display thumbnails with
* an `_ai` suffix so display performance is unaffected.
* Generates on first call or when the source file has been modified.
*/
export async function getAiImagePath(
absoluteFilePath: string,
libraryId: string
): Promise<string> {
ensureCacheDir()
const key = cacheKey(libraryId, absoluteFilePath)
const cacheFile = path.join(CACHE_DIR, key + '_ai.jpg')
const cached = getCachedPath(cacheFile, absoluteFilePath)
if (cached) return cached
await generateAiImage(absoluteFilePath, cacheFile)
return cacheFile
}
/**
* Returns the absolute path to a cached thumbnail JPEG for the given file.
* Generates it on first call (or when the source has been modified).