Fix post-scan CPU spike and improve scan performance at scale
- Remove fire-and-forget thumbnail pre-warming from scanMixed(): firing 48k+ simultaneous unresolved getThumbnailPath() promises was saturating sharp and ffmpeg after scan completion, keeping CPU pegged. Mixed-library thumbnails are now generated on-demand by /api/thumbnail as before. - Add incremental fingerprinting: load existing (item_key → fingerprint) map from DB before each walk; reuse stored fingerprint for unchanged paths instead of re-reading 64 KB per file. Stable re-scans now do ~0 bytes of fingerprint I/O. - Wrap all bulk DB upsert and delete loops in db.transaction() in scanMovies(), scanTv(), scanMixed(), and reconcileAndPrune(). Reduces N auto-committed WAL writes to a single batch commit per scan phase. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,9 +10,6 @@ import { scanGamesLibrary } from './games'
|
||||
import { getThumbnailPath } from './thumbnails'
|
||||
import { computeFingerprint } from './fingerprint'
|
||||
import { reKeyMediaItem } from './tags'
|
||||
import { VIDEO_EXTENSIONS } from './media-utils'
|
||||
|
||||
const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'])
|
||||
|
||||
let scanRunning = false
|
||||
|
||||
@@ -70,14 +67,20 @@ async function scanMovies(library: Library, libraryRoot: string): Promise<void>
|
||||
const db = getDb()
|
||||
const now = Date.now()
|
||||
|
||||
// Load existing fingerprints for incremental hashing (skip re-reading unchanged files)
|
||||
const existingFps = db
|
||||
.prepare('SELECT item_key, fingerprint FROM media_items WHERE library_id = ? AND fingerprint IS NOT NULL')
|
||||
.all(library.id) as Array<{ item_key: string; fingerprint: string }>
|
||||
const existingFpMap = new Map(existingFps.map((r) => [r.item_key, r.fingerprint]))
|
||||
|
||||
// Build new items map: item_key → { fingerprint, movie }
|
||||
type MovieEntry = { fingerprint: string | null; movie: Movie }
|
||||
const newItems = new Map<string, MovieEntry>()
|
||||
for (const movie of movies) {
|
||||
const itemKey = `${library.id}:movie:${movie.id}`
|
||||
const fingerprint = movie.videoPath
|
||||
? computeFingerprint(path.join(libraryRoot, movie.videoPath))
|
||||
: null
|
||||
const fingerprint =
|
||||
existingFpMap.get(itemKey) ??
|
||||
(movie.videoPath ? computeFingerprint(path.join(libraryRoot, movie.videoPath)) : null)
|
||||
newItems.set(itemKey, { fingerprint, movie })
|
||||
}
|
||||
|
||||
@@ -101,6 +104,7 @@ async function scanMovies(library: Library, libraryRoot: string): Promise<void>
|
||||
scanned_at = excluded.scanned_at
|
||||
`)
|
||||
|
||||
db.transaction(() => {
|
||||
for (const [itemKey, { fingerprint, movie }] of newItems) {
|
||||
upsert.run({
|
||||
library_id: library.id,
|
||||
@@ -120,7 +124,11 @@ async function scanMovies(library: Library, libraryRoot: string): Promise<void>
|
||||
fingerprint,
|
||||
scanned_at: now,
|
||||
})
|
||||
}
|
||||
})()
|
||||
|
||||
// Prewarm poster thumbnails after the transaction (bounded by number of movies)
|
||||
for (const [, { movie }] of newItems) {
|
||||
if (movie.posterUrl) {
|
||||
await prewarmThumbnailFromUrl(movie.posterUrl, library.id, libraryRoot, 'image')
|
||||
}
|
||||
@@ -142,6 +150,12 @@ async function scanTv(library: Library, libraryRoot: string): Promise<void> {
|
||||
type EpisodeRow = { episode: TvEpisode; episodeKey: string; fingerprint: string | null }
|
||||
type SeriesRow = { show: TvSeries; seriesKey: string; seasons: SeasonRow[] }
|
||||
|
||||
// Load existing episode fingerprints for incremental hashing
|
||||
const existingEpFps = db
|
||||
.prepare('SELECT item_key, fingerprint FROM media_items WHERE library_id = ? AND item_type = ? AND fingerprint IS NOT NULL')
|
||||
.all(library.id, 'tv_episode') as Array<{ item_key: string; fingerprint: string }>
|
||||
const existingEpFpMap = new Map(existingEpFps.map((r) => [r.item_key, r.fingerprint]))
|
||||
|
||||
const allSeries: SeriesRow[] = []
|
||||
const newKeys = new Set<string>()
|
||||
const newEpisodes = new Map<string, { fingerprint: string | null }>()
|
||||
@@ -159,9 +173,9 @@ async function scanTv(library: Library, libraryRoot: string): Promise<void> {
|
||||
for (const episode of scanTvEpisodes(libraryRoot, library.id, show.id, season.id)) {
|
||||
const episodeKey = `${library.id}:tv_episode:${show.id}:${season.id}:${episode.id}`
|
||||
newKeys.add(episodeKey)
|
||||
const fingerprint = episode.videoPath
|
||||
? computeFingerprint(path.join(libraryRoot, episode.videoPath))
|
||||
: null
|
||||
const fingerprint =
|
||||
existingEpFpMap.get(episodeKey) ??
|
||||
(episode.videoPath ? computeFingerprint(path.join(libraryRoot, episode.videoPath)) : null)
|
||||
episodeRows.push({ episode, episodeKey, fingerprint })
|
||||
newEpisodes.set(episodeKey, { fingerprint })
|
||||
}
|
||||
@@ -207,6 +221,8 @@ async function scanTv(library: Library, libraryRoot: string): Promise<void> {
|
||||
|
||||
let episodeCount = 0
|
||||
|
||||
// Phase 1: all DB writes in a single transaction
|
||||
db.transaction(() => {
|
||||
for (const { show, seriesKey, seasons } of allSeries) {
|
||||
upsertSeries.run({
|
||||
library_id: library.id,
|
||||
@@ -227,10 +243,6 @@ async function scanTv(library: Library, libraryRoot: string): Promise<void> {
|
||||
scanned_at: now,
|
||||
})
|
||||
|
||||
if (show.posterUrl) {
|
||||
await prewarmThumbnailFromUrl(show.posterUrl, library.id, libraryRoot, 'image')
|
||||
}
|
||||
|
||||
for (const { season, seasonKey, episodes } of seasons) {
|
||||
upsertChild.run({
|
||||
library_id: library.id,
|
||||
@@ -251,10 +263,6 @@ async function scanTv(library: Library, libraryRoot: string): Promise<void> {
|
||||
scanned_at: now,
|
||||
})
|
||||
|
||||
if (season.posterUrl) {
|
||||
await prewarmThumbnailFromUrl(season.posterUrl, library.id, libraryRoot, 'image')
|
||||
}
|
||||
|
||||
for (const { episode, episodeKey, fingerprint } of episodes) {
|
||||
upsertChild.run({
|
||||
library_id: library.id,
|
||||
@@ -276,14 +284,28 @@ async function scanTv(library: Library, libraryRoot: string): Promise<void> {
|
||||
fingerprint,
|
||||
scanned_at: now,
|
||||
})
|
||||
episodeCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
})()
|
||||
|
||||
// Phase 2: async thumbnail generation (bounded — one at a time, awaited)
|
||||
for (const { show, seasons } of allSeries) {
|
||||
if (show.posterUrl) {
|
||||
await prewarmThumbnailFromUrl(show.posterUrl, library.id, libraryRoot, 'image')
|
||||
}
|
||||
for (const { season, episodes } of seasons) {
|
||||
if (season.posterUrl) {
|
||||
await prewarmThumbnailFromUrl(season.posterUrl, library.id, libraryRoot, 'image')
|
||||
}
|
||||
for (const { episode } of episodes) {
|
||||
const videoAbsPath = path.join(libraryRoot, episode.videoPath)
|
||||
try {
|
||||
await getThumbnailPath(videoAbsPath, library.id, 'video')
|
||||
} catch (err) {
|
||||
console.warn(`[scanner] Could not generate thumbnail for ${episode.videoPath}:`, err instanceof Error ? err.message : err)
|
||||
}
|
||||
episodeCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -410,6 +432,12 @@ async function scanMixed(library: Library, libraryRoot: string): Promise<void> {
|
||||
const db = getDb()
|
||||
const now = Date.now()
|
||||
|
||||
// Load existing fingerprints for incremental hashing (skip re-reading unchanged files)
|
||||
const existingMixedFps = db
|
||||
.prepare('SELECT item_key, fingerprint FROM media_items WHERE library_id = ? AND item_type = ? AND fingerprint IS NOT NULL')
|
||||
.all(library.id, 'mixed_file') as Array<{ item_key: string; fingerprint: string }>
|
||||
const existingMixedFpMap = new Map(existingMixedFps.map((r) => [r.item_key, r.fingerprint]))
|
||||
|
||||
// Collect all new items with fingerprints
|
||||
type MixedEntry = { fingerprint: string | null; relPath: string; title: string }
|
||||
const newItems = new Map<string, MixedEntry>()
|
||||
@@ -429,8 +457,10 @@ async function scanMixed(library: Library, libraryRoot: string): Promise<void> {
|
||||
walk(path.join(absDir, name), relPath)
|
||||
} else {
|
||||
const itemKey = `${library.id}:mixed_file:${encodeURIComponent(relPath)}`
|
||||
const absPath = path.join(absDir, name)
|
||||
const fingerprint = computeFingerprint(absPath)
|
||||
// Reuse stored fingerprint if the path is unchanged; only read for new/unknown files
|
||||
const fingerprint =
|
||||
existingMixedFpMap.get(itemKey) ??
|
||||
computeFingerprint(path.join(absDir, name))
|
||||
newItems.set(itemKey, {
|
||||
fingerprint,
|
||||
relPath,
|
||||
@@ -456,6 +486,8 @@ async function scanMixed(library: Library, libraryRoot: string): Promise<void> {
|
||||
scanned_at = excluded.scanned_at
|
||||
`)
|
||||
|
||||
// All upserts in a single transaction — critical for large libraries (48k+ files)
|
||||
db.transaction(() => {
|
||||
for (const [itemKey, { fingerprint, relPath, title }] of newItems) {
|
||||
upsert.run({
|
||||
library_id: library.id,
|
||||
@@ -466,20 +498,13 @@ async function scanMixed(library: Library, libraryRoot: string): Promise<void> {
|
||||
fingerprint,
|
||||
scanned_at: now,
|
||||
})
|
||||
|
||||
const ext = path.extname(relPath).toLowerCase()
|
||||
let mediaType: 'image' | 'video' | null = null
|
||||
if (IMAGE_EXTENSIONS.has(ext)) mediaType = 'image'
|
||||
else if (VIDEO_EXTENSIONS.has(ext)) mediaType = 'video'
|
||||
if (mediaType) {
|
||||
const absPath = path.join(libraryRoot, relPath)
|
||||
getThumbnailPath(absPath, library.id, mediaType).catch((err) => {
|
||||
console.warn(`[scanner] Could not generate thumbnail for ${relPath}:`, err instanceof Error ? err.message : err)
|
||||
})
|
||||
}
|
||||
}
|
||||
})()
|
||||
|
||||
console.log(`[scanner] mixed: indexed ${newItems.size} files, pre-generating thumbnails`)
|
||||
// Thumbnails for mixed libraries are generated on-demand by /api/thumbnail.
|
||||
// Pre-warming 48k+ files simultaneously was the cause of the post-scan CPU spike.
|
||||
|
||||
console.log(`[scanner] mixed: indexed ${newItems.size} files`)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -555,6 +580,7 @@ function reconcileAndPrune(
|
||||
): void {
|
||||
const renameItem = db.prepare('UPDATE media_items SET item_key = ? WHERE item_key = ?')
|
||||
|
||||
// Apply moves first (outside transaction so console.log is visible as they happen)
|
||||
for (const { oldKey, newKey } of moves) {
|
||||
renameItem.run(newKey, oldKey)
|
||||
// Convert item_keys to the media_key format actually used in media_tags
|
||||
@@ -570,12 +596,15 @@ function reconcileAndPrune(
|
||||
.prepare('SELECT item_key FROM media_items WHERE library_id = ?')
|
||||
.all(libraryId) as Array<{ item_key: string }>
|
||||
|
||||
// Batch all deletes in a single transaction
|
||||
const deleteItem = db.prepare('DELETE FROM media_items WHERE item_key = ?')
|
||||
db.transaction(() => {
|
||||
for (const { item_key } of existing) {
|
||||
if (!newKeys.has(item_key)) {
|
||||
deleteItem.run(item_key)
|
||||
}
|
||||
}
|
||||
})()
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user