trash corrupt files
All checks were successful
Build and Push Docker Image / build (push) Successful in 57s

This commit is contained in:
Garret Patti
2026-04-20 11:44:30 -04:00
parent 7d2ae7e95c
commit dee9356004
4 changed files with 152 additions and 24 deletions

View File

@@ -1,9 +1,11 @@
import { NextRequest, NextResponse } from 'next/server' import { NextRequest, NextResponse } from 'next/server'
import fs from 'fs' import fs from 'fs'
import fsPromises from 'fs/promises'
import path from 'path' import path from 'path'
import { getLibrary, resolveLibraryRoot, resolveAndJail } from '@/lib/libraries' import { getLibrary, resolveLibraryRoot, resolveAndJail } from '@/lib/libraries'
import { getThumbnailPath, getCbzThumbnailPath } from '@/lib/thumbnails' import { getThumbnailPath, getCbzThumbnailPath } from '@/lib/thumbnails'
import { requireLibraryAccess } from '@/lib/auth' import { requireLibraryAccess } from '@/lib/auth'
import { isCorruptZipError } from '@/lib/zip-utils'
const VIDEO_EXTENSIONS = new Set(['.mp4', '.mov', '.mkv', '.avi', '.webm', '.m4v']) const VIDEO_EXTENSIONS = new Set(['.mp4', '.mov', '.mkv', '.avi', '.webm', '.m4v'])
const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.tif']) const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.tif'])
@@ -63,7 +65,30 @@ export async function GET(request: NextRequest) {
}, },
}) })
} catch (err) { } catch (err) {
if (isCorruptZipError(err)) {
// Move the corrupt archive to the library's .trash folder so it is excluded
// from future scans and hidden from the UI.
const trashDir = path.join(root, '.trash')
const filename = path.basename(filePath)
let dest = path.join(trashDir, filename)
fsPromises.mkdir(trashDir, { recursive: true })
.then(async () => {
if (fs.existsSync(dest)) {
const ext = path.extname(filename)
dest = path.join(trashDir, `${path.basename(filename, ext)}_${Date.now()}${ext}`)
}
await fsPromises.rename(filePath, dest).catch(async (e: NodeJS.ErrnoException) => {
if (e.code === 'EXDEV') {
await fsPromises.copyFile(filePath, dest)
await fsPromises.unlink(filePath)
} else throw e
})
console.log(`[thumbnail] Moved corrupt archive to trash: ${path.relative(root, filePath)}`)
})
.catch((e) => console.warn(`[thumbnail] Could not move corrupt archive to trash:`, e))
} else {
console.error(`Thumbnail generation failed for ${filePath}:`, err) console.error(`Thumbnail generation failed for ${filePath}:`, err)
}
return new NextResponse(null, { status: 404 }) return new NextResponse(null, { status: 404 })
} }
} }

View File

@@ -5,6 +5,7 @@ import type { ComicIssue, ComicSeries } from '@/types'
import { getDb } from './db' import { getDb } from './db'
import { HIDDEN_FILES, thumbnailApiUrl } from './media-utils' import { HIDDEN_FILES, thumbnailApiUrl } from './media-utils'
import { countZipImages, mapConcurrent } from './zip-utils' import { countZipImages, mapConcurrent } from './zip-utils'
import fsPromises from 'fs/promises'
const CBZ_EXTENSIONS = new Set(['.cbz']) const CBZ_EXTENSIONS = new Set(['.cbz'])
const CBZ_IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.gif']) const CBZ_IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.gif'])
@@ -28,6 +29,30 @@ export interface ScannedComicSeries extends ComicSeries {
issues: ComicIssue[] issues: ComicIssue[]
} }
const TRASH_DIR = '.trash'
async function moveToTrash(absPath: string, libraryRoot: string): Promise<void> {
const trashDir = path.join(libraryRoot, TRASH_DIR)
await fsPromises.mkdir(trashDir, { recursive: true })
const filename = path.basename(absPath)
let dest = path.join(trashDir, filename)
if (fs.existsSync(dest)) {
const ext = path.extname(filename)
const base = path.basename(filename, ext)
dest = path.join(trashDir, `${base}_${Date.now()}${ext}`)
}
await fsPromises.rename(absPath, dest).catch(async (err: NodeJS.ErrnoException) => {
if (err.code === 'EXDEV') {
// Source and destination are on different filesystems — copy then delete.
await fsPromises.copyFile(absPath, dest)
await fsPromises.unlink(absPath)
} else {
throw err
}
})
console.log(`[scanner] Moved corrupt archive to trash: ${path.relative(libraryRoot, absPath)}`)
}
interface CollectedCbz { interface CollectedCbz {
absPath: string absPath: string
filename: string filename: string
@@ -93,22 +118,38 @@ export async function scanComicsLibrary(
// Phase 2: Count pages for all CBZ files concurrently (10 at a time) by reading // Phase 2: Count pages for all CBZ files concurrently (10 at a time) by reading
// only each archive's central directory — no full-file reads. // only each archive's central directory — no full-file reads.
const pageCounts = await mapConcurrent(collected, 10, (c) => const scanResults = await mapConcurrent(collected, 10, (c) =>
countZipImages(c.absPath, CBZ_IMAGE_EXTENSIONS) countZipImages(c.absPath, CBZ_IMAGE_EXTENSIONS)
) )
// Phase 3: Build the result array from collected metadata + page counts. // Move corrupt archives to the library's .trash folder and exclude them from indexing.
const movePromises: Promise<void>[] = []
const valid: Array<{ cbz: CollectedCbz; pageCount: number }> = []
for (let i = 0; i < collected.length; i++) {
const result = scanResults[i]
if (!result.valid) {
movePromises.push(
moveToTrash(collected[i].absPath, libraryRoot).catch((err) =>
console.warn(`[scanner] Could not move corrupt archive to trash: ${collected[i].absPath}`, err)
)
)
continue
}
valid.push({ cbz: collected[i], pageCount: result.pageCount })
}
if (movePromises.length > 0) await Promise.all(movePromises)
// Phase 3: Build the result array from valid files only.
const seriesMap = new Map<string, ScannedComicSeries>() const seriesMap = new Map<string, ScannedComicSeries>()
const standaloneIssues: ComicIssue[] = [] const standaloneIssues: ComicIssue[] = []
for (let i = 0; i < collected.length; i++) { for (const { cbz: c, pageCount } of valid) {
const c = collected[i]
const coverUrl = thumbnailApiUrl(libraryId, c.relPath) const coverUrl = thumbnailApiUrl(libraryId, c.relPath)
const issue: ComicIssue = { const issue: ComicIssue = {
id: encodeURIComponent(c.relPath), id: encodeURIComponent(c.relPath),
title: path.basename(c.filename, path.extname(c.filename)), title: path.basename(c.filename, path.extname(c.filename)),
issueNumber: parseIssueNumber(c.filename), issueNumber: parseIssueNumber(c.filename),
pageCount: pageCounts[i], pageCount,
coverUrl, coverUrl,
filePath: c.relPath, filePath: c.relPath,
isStandalone: c.isStandalone, isStandalone: c.isStandalone,

View File

@@ -3,7 +3,7 @@ import fs from 'fs'
import path from 'path' import path from 'path'
import { spawn } from 'child_process' import { spawn } from 'child_process'
import sharp from 'sharp' import sharp from 'sharp'
import AdmZip from 'adm-zip' import { extractFirstZipImage } from './zip-utils'
const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails') const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
const THUMBNAIL_WIDTH = 400 const THUMBNAIL_WIDTH = 400
@@ -241,15 +241,7 @@ export async function getCbzThumbnailPath(
const cached = getCachedPath(cacheFile, absoluteFilePath) const cached = getCachedPath(cacheFile, absoluteFilePath)
if (cached) return cached if (cached) return cached
const zip = new AdmZip(absoluteFilePath) const buffer = await extractFirstZipImage(absoluteFilePath, CBZ_IMAGE_EXTENSIONS)
const entries = zip
.getEntries()
.filter((e) => !e.isDirectory && CBZ_IMAGE_EXTENSIONS.has(path.extname(e.entryName).toLowerCase()))
.sort((a, b) => a.entryName.localeCompare(b.entryName, undefined, { numeric: true, sensitivity: 'base' }))
if (entries.length === 0) throw new Error('No image entries found in CBZ')
const buffer = entries[0].getData()
const tmp = cacheFile + '.tmp' const tmp = cacheFile + '.tmp'
await sharp(buffer).resize(THUMBNAIL_WIDTH).jpeg({ quality: JPEG_QUALITY }).toFile(tmp) await sharp(buffer).resize(THUMBNAIL_WIDTH).jpeg({ quality: JPEG_QUALITY }).toFile(tmp)
fs.renameSync(tmp, cacheFile) fs.renameSync(tmp, cacheFile)

View File

@@ -19,10 +19,11 @@ export interface CdEntry {
/** /**
* Read a ZIP file's central directory without loading the entire archive. * Read a ZIP file's central directory without loading the entire archive.
* Opens only the last ~2264KB of the file (EOCD + central directory). * Returns null if no EOCD record is found (corrupt/non-ZIP file).
* Returns an empty array for a valid but empty archive.
*/ */
async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[]> { async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[] | null> {
if (fileSize < 22) return [] if (fileSize < 22) return null
// The EOCD record is within the last 65558 bytes (22-byte record + 65535-byte max comment). // The EOCD record is within the last 65558 bytes (22-byte record + 65535-byte max comment).
const tailLen = Math.min(65558, fileSize) const tailLen = Math.min(65558, fileSize)
@@ -34,12 +35,13 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
for (let i = tailLen - 22; i >= 0; i--) { for (let i = tailLen - 22; i >= 0; i--) {
if (tailBuf.readUInt32LE(i) === EOCD_SIG) { eocdOff = i; break } if (tailBuf.readUInt32LE(i) === EOCD_SIG) { eocdOff = i; break }
} }
if (eocdOff === -1) return [] if (eocdOff === -1) return null // no EOCD → corrupt
const entryCount = tailBuf.readUInt16LE(eocdOff + 10) const entryCount = tailBuf.readUInt16LE(eocdOff + 10)
const cdSize = tailBuf.readUInt32LE(eocdOff + 12) const cdSize = tailBuf.readUInt32LE(eocdOff + 12)
const cdOffset = tailBuf.readUInt32LE(eocdOff + 16) const cdOffset = tailBuf.readUInt32LE(eocdOff + 16)
if (cdOffset + cdSize > fileSize || cdSize === 0) return [] if (entryCount === 0) return [] // valid empty archive
if (cdOffset + cdSize > fileSize || cdSize === 0) return null // malformed
const cdBuf = Buffer.allocUnsafe(cdSize) const cdBuf = Buffer.allocUnsafe(cdSize)
await fd.read(cdBuf, 0, cdSize, cdOffset) await fd.read(cdBuf, 0, cdSize, cdOffset)
@@ -62,26 +64,44 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
return entries return entries
} }
/** Thrown when a ZIP archive has no valid End-of-Central-Directory record. */
export class CorruptZipError extends Error {
readonly code = 'ERR_CORRUPT_ZIP'
constructor(absolutePath: string) {
super(`Corrupt or invalid ZIP archive: ${absolutePath}`)
this.name = 'CorruptZipError'
}
}
export function isCorruptZipError(err: unknown): err is CorruptZipError {
return err instanceof CorruptZipError ||
(err instanceof Error && (err as CorruptZipError).code === 'ERR_CORRUPT_ZIP')
}
/** /**
* Count the number of image entries inside a ZIP/CBZ archive by reading * Count the number of image entries inside a ZIP/CBZ archive by reading
* only its central directory — no full-file read required. * only its central directory — no full-file read required.
* Returns { pageCount, valid } where valid=false means the archive has no
* valid EOCD record (corrupt file).
*/ */
export async function countZipImages( export async function countZipImages(
absolutePath: string, absolutePath: string,
imageExtensions: Set<string> imageExtensions: Set<string>
): Promise<number> { ): Promise<{ pageCount: number; valid: boolean }> {
let fd: FileHandle | null = null let fd: FileHandle | null = null
try { try {
fd = await open(absolutePath, 'r') fd = await open(absolutePath, 'r')
const { size } = await fd.stat() const { size } = await fd.stat()
const entries = await readCentralDirectory(fd, size) const entries = await readCentralDirectory(fd, size)
return entries.filter((e) => { if (entries === null) return { pageCount: 0, valid: false }
const pageCount = entries.filter((e) => {
if (e.name.endsWith('/')) return false if (e.name.endsWith('/')) return false
const dot = e.name.lastIndexOf('.') const dot = e.name.lastIndexOf('.')
return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase()) return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
}).length }).length
return { pageCount, valid: true }
} catch { } catch {
return 0 return { pageCount: 0, valid: false }
} finally { } finally {
await fd?.close() await fd?.close()
} }
@@ -128,6 +148,7 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
fd = await open(absolutePath, 'r') fd = await open(absolutePath, 'r')
const { size } = await fd.stat() const { size } = await fd.stat()
const entries = await readCentralDirectory(fd, size) const entries = await readCentralDirectory(fd, size)
if (!entries) return null
const lower = entryName.toLowerCase() const lower = entryName.toLowerCase()
return entries.find((e) => { return entries.find((e) => {
const n = e.name.toLowerCase() const n = e.name.toLowerCase()
@@ -140,6 +161,55 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
} }
} }
/**
* Extract the first image entry (natural sort) from a ZIP/CBZ archive.
* Reads only the central directory and the single chosen entry — no full-file load.
* Throws CorruptZipError if the archive has no valid structure.
*/
export async function extractFirstZipImage(
absolutePath: string,
imageExtensions: Set<string>
): Promise<Buffer> {
let fd: FileHandle | null = null
try {
fd = await open(absolutePath, 'r')
const { size } = await fd.stat()
const entries = await readCentralDirectory(fd, size)
if (entries === null) throw new CorruptZipError(absolutePath)
const imageEntries = entries
.filter((e) => {
if (e.name.endsWith('/')) return false
const dot = e.name.lastIndexOf('.')
return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
})
.sort((a, b) =>
a.name.localeCompare(b.name, undefined, { numeric: true, sensitivity: 'base' })
)
if (imageEntries.length === 0) throw new Error(`No image entries in archive: ${absolutePath}`)
const entry = imageEntries[0]
// Read local file header to get the exact data offset.
const lfhBuf = Buffer.allocUnsafe(30)
await fd.read(lfhBuf, 0, 30, entry.localHeaderOffset)
if (lfhBuf.readUInt32LE(0) !== LFH_SIG) throw new CorruptZipError(absolutePath)
const localFilenameLen = lfhBuf.readUInt16LE(26)
const localExtraLen = lfhBuf.readUInt16LE(28)
const dataOffset = entry.localHeaderOffset + 30 + localFilenameLen + localExtraLen
const compressedBuf = Buffer.allocUnsafe(entry.compressedSize)
await fd.read(compressedBuf, 0, entry.compressedSize, dataOffset)
if (entry.compressionMethod === 0) return compressedBuf
if (entry.compressionMethod === 8) return await inflateRaw(compressedBuf) as Buffer
throw new Error(`Unsupported compression method ${entry.compressionMethod}: ${absolutePath}`)
} finally {
await fd?.close()
}
}
/** /**
* Process an array of items concurrently with a concurrency limit. * Process an array of items concurrently with a concurrency limit.
* Preserves index order in results. * Preserves index order in results.