From 4d75e74cabeebb87ae820cabdeef8484d06485f6 Mon Sep 17 00:00:00 2001 From: Garret Patti <42485635+garretpatti@users.noreply.github.com> Date: Mon, 6 Apr 2026 19:58:05 -0400 Subject: [PATCH] Fix post-scan CPU spike and improve scan performance at scale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove fire-and-forget thumbnail pre-warming from scanMixed(): firing 48k+ simultaneous unresolved getThumbnailPath() promises was saturating sharp and ffmpeg after scan completion, keeping CPU pegged. Mixed-library thumbnails are now generated on-demand by /api/thumbnail as before. - Add incremental fingerprinting: load existing (item_key → fingerprint) map from DB before each walk; reuse stored fingerprint for unchanged paths instead of re-reading 64 KB per file. Stable re-scans now do ~0 bytes of fingerprint I/O. - Wrap all bulk DB upsert and delete loops in db.transaction() in scanMovies(), scanTv(), scanMixed(), and reconcileAndPrune(). Reduces N auto-committed WAL writes to a single batch commit per scan phase. Co-Authored-By: Claude Sonnet 4.6 --- src/lib/scanner.ts | 247 +++++++++++++++++++++++++-------------------- 1 file changed, 138 insertions(+), 109 deletions(-) diff --git a/src/lib/scanner.ts b/src/lib/scanner.ts index 6dc7ea2..526d46a 100644 --- a/src/lib/scanner.ts +++ b/src/lib/scanner.ts @@ -10,9 +10,6 @@ import { scanGamesLibrary } from './games' import { getThumbnailPath } from './thumbnails' import { computeFingerprint } from './fingerprint' import { reKeyMediaItem } from './tags' -import { VIDEO_EXTENSIONS } from './media-utils' - -const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']) let scanRunning = false @@ -70,14 +67,20 @@ async function scanMovies(library: Library, libraryRoot: string): Promise const db = getDb() const now = Date.now() + // Load existing fingerprints for incremental hashing (skip re-reading unchanged files) + const existingFps = db + .prepare('SELECT item_key, fingerprint FROM media_items WHERE library_id = ? AND fingerprint IS NOT NULL') + .all(library.id) as Array<{ item_key: string; fingerprint: string }> + const existingFpMap = new Map(existingFps.map((r) => [r.item_key, r.fingerprint])) + // Build new items map: item_key → { fingerprint, movie } type MovieEntry = { fingerprint: string | null; movie: Movie } const newItems = new Map() for (const movie of movies) { const itemKey = `${library.id}:movie:${movie.id}` - const fingerprint = movie.videoPath - ? computeFingerprint(path.join(libraryRoot, movie.videoPath)) - : null + const fingerprint = + existingFpMap.get(itemKey) ?? + (movie.videoPath ? computeFingerprint(path.join(libraryRoot, movie.videoPath)) : null) newItems.set(itemKey, { fingerprint, movie }) } @@ -101,26 +104,31 @@ async function scanMovies(library: Library, libraryRoot: string): Promise scanned_at = excluded.scanned_at `) - for (const [itemKey, { fingerprint, movie }] of newItems) { - upsert.run({ - library_id: library.id, - item_key: itemKey, - item_type: 'movie', - title: movie.title, - year: movie.year ?? null, - plot: movie.plot ?? null, - genres: JSON.stringify(movie.genres), - metadata: JSON.stringify({ - rating: movie.rating, - runtime: movie.runtime, - posterUrl: movie.posterUrl, - backdropUrl: movie.backdropUrl, - }), - file_path: movie.videoPath, - fingerprint, - scanned_at: now, - }) + db.transaction(() => { + for (const [itemKey, { fingerprint, movie }] of newItems) { + upsert.run({ + library_id: library.id, + item_key: itemKey, + item_type: 'movie', + title: movie.title, + year: movie.year ?? null, + plot: movie.plot ?? null, + genres: JSON.stringify(movie.genres), + metadata: JSON.stringify({ + rating: movie.rating, + runtime: movie.runtime, + posterUrl: movie.posterUrl, + backdropUrl: movie.backdropUrl, + }), + file_path: movie.videoPath, + fingerprint, + scanned_at: now, + }) + } + })() + // Prewarm poster thumbnails after the transaction (bounded by number of movies) + for (const [, { movie }] of newItems) { if (movie.posterUrl) { await prewarmThumbnailFromUrl(movie.posterUrl, library.id, libraryRoot, 'image') } @@ -142,6 +150,12 @@ async function scanTv(library: Library, libraryRoot: string): Promise { type EpisodeRow = { episode: TvEpisode; episodeKey: string; fingerprint: string | null } type SeriesRow = { show: TvSeries; seriesKey: string; seasons: SeasonRow[] } + // Load existing episode fingerprints for incremental hashing + const existingEpFps = db + .prepare('SELECT item_key, fingerprint FROM media_items WHERE library_id = ? AND item_type = ? AND fingerprint IS NOT NULL') + .all(library.id, 'tv_episode') as Array<{ item_key: string; fingerprint: string }> + const existingEpFpMap = new Map(existingEpFps.map((r) => [r.item_key, r.fingerprint])) + const allSeries: SeriesRow[] = [] const newKeys = new Set() const newEpisodes = new Map() @@ -159,9 +173,9 @@ async function scanTv(library: Library, libraryRoot: string): Promise { for (const episode of scanTvEpisodes(libraryRoot, library.id, show.id, season.id)) { const episodeKey = `${library.id}:tv_episode:${show.id}:${season.id}:${episode.id}` newKeys.add(episodeKey) - const fingerprint = episode.videoPath - ? computeFingerprint(path.join(libraryRoot, episode.videoPath)) - : null + const fingerprint = + existingEpFpMap.get(episodeKey) ?? + (episode.videoPath ? computeFingerprint(path.join(libraryRoot, episode.videoPath)) : null) episodeRows.push({ episode, episodeKey, fingerprint }) newEpisodes.set(episodeKey, { fingerprint }) } @@ -207,83 +221,91 @@ async function scanTv(library: Library, libraryRoot: string): Promise { let episodeCount = 0 - for (const { show, seriesKey, seasons } of allSeries) { - upsertSeries.run({ - library_id: library.id, - item_key: seriesKey, - item_type: 'tv_series', - title: show.title, - year: show.year ?? null, - plot: show.plot ?? null, - genres: JSON.stringify(show.genres), - metadata: JSON.stringify({ - status: show.status, - seasonCount: show.seasonCount, - posterUrl: show.posterUrl, - backdropUrl: show.backdropUrl, - }), - file_path: null, - fingerprint: null, - scanned_at: now, - }) - - if (show.posterUrl) { - await prewarmThumbnailFromUrl(show.posterUrl, library.id, libraryRoot, 'image') - } - - for (const { season, seasonKey, episodes } of seasons) { - upsertChild.run({ + // Phase 1: all DB writes in a single transaction + db.transaction(() => { + for (const { show, seriesKey, seasons } of allSeries) { + upsertSeries.run({ library_id: library.id, - item_key: seasonKey, - item_type: 'tv_season', - parent_key: seriesKey, - title: season.title, - year: null, - plot: null, - genres: JSON.stringify([]), + item_key: seriesKey, + item_type: 'tv_series', + title: show.title, + year: show.year ?? null, + plot: show.plot ?? null, + genres: JSON.stringify(show.genres), metadata: JSON.stringify({ - seasonNumber: season.seasonNumber, - episodeCount: season.episodeCount, - posterUrl: season.posterUrl, + status: show.status, + seasonCount: show.seasonCount, + posterUrl: show.posterUrl, + backdropUrl: show.backdropUrl, }), file_path: null, fingerprint: null, scanned_at: now, }) - if (season.posterUrl) { - await prewarmThumbnailFromUrl(season.posterUrl, library.id, libraryRoot, 'image') - } - - for (const { episode, episodeKey, fingerprint } of episodes) { + for (const { season, seasonKey, episodes } of seasons) { upsertChild.run({ library_id: library.id, - item_key: episodeKey, - item_type: 'tv_episode', - parent_key: seasonKey, - title: episode.title, + item_key: seasonKey, + item_type: 'tv_season', + parent_key: seriesKey, + title: season.title, year: null, - plot: episode.plot ?? null, + plot: null, genres: JSON.stringify([]), metadata: JSON.stringify({ - episodeNumber: episode.episodeNumber, - seasonNumber: episode.seasonNumber, - aired: episode.aired, - rating: episode.rating, - thumbnailUrl: episode.thumbnailUrl, + seasonNumber: season.seasonNumber, + episodeCount: season.episodeCount, + posterUrl: season.posterUrl, }), - file_path: episode.videoPath, - fingerprint, + file_path: null, + fingerprint: null, scanned_at: now, }) + for (const { episode, episodeKey, fingerprint } of episodes) { + upsertChild.run({ + library_id: library.id, + item_key: episodeKey, + item_type: 'tv_episode', + parent_key: seasonKey, + title: episode.title, + year: null, + plot: episode.plot ?? null, + genres: JSON.stringify([]), + metadata: JSON.stringify({ + episodeNumber: episode.episodeNumber, + seasonNumber: episode.seasonNumber, + aired: episode.aired, + rating: episode.rating, + thumbnailUrl: episode.thumbnailUrl, + }), + file_path: episode.videoPath, + fingerprint, + scanned_at: now, + }) + episodeCount++ + } + } + } + })() + + // Phase 2: async thumbnail generation (bounded — one at a time, awaited) + for (const { show, seasons } of allSeries) { + if (show.posterUrl) { + await prewarmThumbnailFromUrl(show.posterUrl, library.id, libraryRoot, 'image') + } + for (const { season, episodes } of seasons) { + if (season.posterUrl) { + await prewarmThumbnailFromUrl(season.posterUrl, library.id, libraryRoot, 'image') + } + for (const { episode } of episodes) { const videoAbsPath = path.join(libraryRoot, episode.videoPath) try { await getThumbnailPath(videoAbsPath, library.id, 'video') } catch (err) { console.warn(`[scanner] Could not generate thumbnail for ${episode.videoPath}:`, err instanceof Error ? err.message : err) } - episodeCount++ } } } @@ -410,6 +432,12 @@ async function scanMixed(library: Library, libraryRoot: string): Promise { const db = getDb() const now = Date.now() + // Load existing fingerprints for incremental hashing (skip re-reading unchanged files) + const existingMixedFps = db + .prepare('SELECT item_key, fingerprint FROM media_items WHERE library_id = ? AND item_type = ? AND fingerprint IS NOT NULL') + .all(library.id, 'mixed_file') as Array<{ item_key: string; fingerprint: string }> + const existingMixedFpMap = new Map(existingMixedFps.map((r) => [r.item_key, r.fingerprint])) + // Collect all new items with fingerprints type MixedEntry = { fingerprint: string | null; relPath: string; title: string } const newItems = new Map() @@ -429,8 +457,10 @@ async function scanMixed(library: Library, libraryRoot: string): Promise { walk(path.join(absDir, name), relPath) } else { const itemKey = `${library.id}:mixed_file:${encodeURIComponent(relPath)}` - const absPath = path.join(absDir, name) - const fingerprint = computeFingerprint(absPath) + // Reuse stored fingerprint if the path is unchanged; only read for new/unknown files + const fingerprint = + existingMixedFpMap.get(itemKey) ?? + computeFingerprint(path.join(absDir, name)) newItems.set(itemKey, { fingerprint, relPath, @@ -456,30 +486,25 @@ async function scanMixed(library: Library, libraryRoot: string): Promise { scanned_at = excluded.scanned_at `) - for (const [itemKey, { fingerprint, relPath, title }] of newItems) { - upsert.run({ - library_id: library.id, - item_key: itemKey, - item_type: 'mixed_file', - title, - file_path: relPath, - fingerprint, - scanned_at: now, - }) - - const ext = path.extname(relPath).toLowerCase() - let mediaType: 'image' | 'video' | null = null - if (IMAGE_EXTENSIONS.has(ext)) mediaType = 'image' - else if (VIDEO_EXTENSIONS.has(ext)) mediaType = 'video' - if (mediaType) { - const absPath = path.join(libraryRoot, relPath) - getThumbnailPath(absPath, library.id, mediaType).catch((err) => { - console.warn(`[scanner] Could not generate thumbnail for ${relPath}:`, err instanceof Error ? err.message : err) + // All upserts in a single transaction — critical for large libraries (48k+ files) + db.transaction(() => { + for (const [itemKey, { fingerprint, relPath, title }] of newItems) { + upsert.run({ + library_id: library.id, + item_key: itemKey, + item_type: 'mixed_file', + title, + file_path: relPath, + fingerprint, + scanned_at: now, }) } - } + })() - console.log(`[scanner] mixed: indexed ${newItems.size} files, pre-generating thumbnails`) + // Thumbnails for mixed libraries are generated on-demand by /api/thumbnail. + // Pre-warming 48k+ files simultaneously was the cause of the post-scan CPU spike. + + console.log(`[scanner] mixed: indexed ${newItems.size} files`) } // --------------------------------------------------------------------------- @@ -555,6 +580,7 @@ function reconcileAndPrune( ): void { const renameItem = db.prepare('UPDATE media_items SET item_key = ? WHERE item_key = ?') + // Apply moves first (outside transaction so console.log is visible as they happen) for (const { oldKey, newKey } of moves) { renameItem.run(newKey, oldKey) // Convert item_keys to the media_key format actually used in media_tags @@ -570,12 +596,15 @@ function reconcileAndPrune( .prepare('SELECT item_key FROM media_items WHERE library_id = ?') .all(libraryId) as Array<{ item_key: string }> + // Batch all deletes in a single transaction const deleteItem = db.prepare('DELETE FROM media_items WHERE item_key = ?') - for (const { item_key } of existing) { - if (!newKeys.has(item_key)) { - deleteItem.run(item_key) + db.transaction(() => { + for (const { item_key } of existing) { + if (!newKeys.has(item_key)) { + deleteItem.run(item_key) + } } - } + })() } /**