trash corrupt files
All checks were successful
Build and Push Docker Image / build (push) Successful in 57s

This commit is contained in:
Garret Patti
2026-04-20 11:44:30 -04:00
parent 7d2ae7e95c
commit dee9356004
4 changed files with 152 additions and 24 deletions

View File

@@ -1,9 +1,11 @@
import { NextRequest, NextResponse } from 'next/server'
import fs from 'fs'
import fsPromises from 'fs/promises'
import path from 'path'
import { getLibrary, resolveLibraryRoot, resolveAndJail } from '@/lib/libraries'
import { getThumbnailPath, getCbzThumbnailPath } from '@/lib/thumbnails'
import { requireLibraryAccess } from '@/lib/auth'
import { isCorruptZipError } from '@/lib/zip-utils'
const VIDEO_EXTENSIONS = new Set(['.mp4', '.mov', '.mkv', '.avi', '.webm', '.m4v'])
const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.tif'])
@@ -63,7 +65,30 @@ export async function GET(request: NextRequest) {
},
})
} catch (err) {
console.error(`Thumbnail generation failed for ${filePath}:`, err)
if (isCorruptZipError(err)) {
// Move the corrupt archive to the library's .trash folder so it is excluded
// from future scans and hidden from the UI.
const trashDir = path.join(root, '.trash')
const filename = path.basename(filePath)
let dest = path.join(trashDir, filename)
fsPromises.mkdir(trashDir, { recursive: true })
.then(async () => {
if (fs.existsSync(dest)) {
const ext = path.extname(filename)
dest = path.join(trashDir, `${path.basename(filename, ext)}_${Date.now()}${ext}`)
}
await fsPromises.rename(filePath, dest).catch(async (e: NodeJS.ErrnoException) => {
if (e.code === 'EXDEV') {
await fsPromises.copyFile(filePath, dest)
await fsPromises.unlink(filePath)
} else throw e
})
console.log(`[thumbnail] Moved corrupt archive to trash: ${path.relative(root, filePath)}`)
})
.catch((e) => console.warn(`[thumbnail] Could not move corrupt archive to trash:`, e))
} else {
console.error(`Thumbnail generation failed for ${filePath}:`, err)
}
return new NextResponse(null, { status: 404 })
}
}

View File

@@ -5,6 +5,7 @@ import type { ComicIssue, ComicSeries } from '@/types'
import { getDb } from './db'
import { HIDDEN_FILES, thumbnailApiUrl } from './media-utils'
import { countZipImages, mapConcurrent } from './zip-utils'
import fsPromises from 'fs/promises'
const CBZ_EXTENSIONS = new Set(['.cbz'])
const CBZ_IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.gif'])
@@ -28,6 +29,30 @@ export interface ScannedComicSeries extends ComicSeries {
issues: ComicIssue[]
}
const TRASH_DIR = '.trash'
async function moveToTrash(absPath: string, libraryRoot: string): Promise<void> {
const trashDir = path.join(libraryRoot, TRASH_DIR)
await fsPromises.mkdir(trashDir, { recursive: true })
const filename = path.basename(absPath)
let dest = path.join(trashDir, filename)
if (fs.existsSync(dest)) {
const ext = path.extname(filename)
const base = path.basename(filename, ext)
dest = path.join(trashDir, `${base}_${Date.now()}${ext}`)
}
await fsPromises.rename(absPath, dest).catch(async (err: NodeJS.ErrnoException) => {
if (err.code === 'EXDEV') {
// Source and destination are on different filesystems — copy then delete.
await fsPromises.copyFile(absPath, dest)
await fsPromises.unlink(absPath)
} else {
throw err
}
})
console.log(`[scanner] Moved corrupt archive to trash: ${path.relative(libraryRoot, absPath)}`)
}
interface CollectedCbz {
absPath: string
filename: string
@@ -93,22 +118,38 @@ export async function scanComicsLibrary(
// Phase 2: Count pages for all CBZ files concurrently (10 at a time) by reading
// only each archive's central directory — no full-file reads.
const pageCounts = await mapConcurrent(collected, 10, (c) =>
const scanResults = await mapConcurrent(collected, 10, (c) =>
countZipImages(c.absPath, CBZ_IMAGE_EXTENSIONS)
)
// Phase 3: Build the result array from collected metadata + page counts.
// Move corrupt archives to the library's .trash folder and exclude them from indexing.
const movePromises: Promise<void>[] = []
const valid: Array<{ cbz: CollectedCbz; pageCount: number }> = []
for (let i = 0; i < collected.length; i++) {
const result = scanResults[i]
if (!result.valid) {
movePromises.push(
moveToTrash(collected[i].absPath, libraryRoot).catch((err) =>
console.warn(`[scanner] Could not move corrupt archive to trash: ${collected[i].absPath}`, err)
)
)
continue
}
valid.push({ cbz: collected[i], pageCount: result.pageCount })
}
if (movePromises.length > 0) await Promise.all(movePromises)
// Phase 3: Build the result array from valid files only.
const seriesMap = new Map<string, ScannedComicSeries>()
const standaloneIssues: ComicIssue[] = []
for (let i = 0; i < collected.length; i++) {
const c = collected[i]
for (const { cbz: c, pageCount } of valid) {
const coverUrl = thumbnailApiUrl(libraryId, c.relPath)
const issue: ComicIssue = {
id: encodeURIComponent(c.relPath),
title: path.basename(c.filename, path.extname(c.filename)),
issueNumber: parseIssueNumber(c.filename),
pageCount: pageCounts[i],
pageCount,
coverUrl,
filePath: c.relPath,
isStandalone: c.isStandalone,

View File

@@ -3,7 +3,7 @@ import fs from 'fs'
import path from 'path'
import { spawn } from 'child_process'
import sharp from 'sharp'
import AdmZip from 'adm-zip'
import { extractFirstZipImage } from './zip-utils'
const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
const THUMBNAIL_WIDTH = 400
@@ -241,15 +241,7 @@ export async function getCbzThumbnailPath(
const cached = getCachedPath(cacheFile, absoluteFilePath)
if (cached) return cached
const zip = new AdmZip(absoluteFilePath)
const entries = zip
.getEntries()
.filter((e) => !e.isDirectory && CBZ_IMAGE_EXTENSIONS.has(path.extname(e.entryName).toLowerCase()))
.sort((a, b) => a.entryName.localeCompare(b.entryName, undefined, { numeric: true, sensitivity: 'base' }))
if (entries.length === 0) throw new Error('No image entries found in CBZ')
const buffer = entries[0].getData()
const buffer = await extractFirstZipImage(absoluteFilePath, CBZ_IMAGE_EXTENSIONS)
const tmp = cacheFile + '.tmp'
await sharp(buffer).resize(THUMBNAIL_WIDTH).jpeg({ quality: JPEG_QUALITY }).toFile(tmp)
fs.renameSync(tmp, cacheFile)

View File

@@ -19,10 +19,11 @@ export interface CdEntry {
/**
* Read a ZIP file's central directory without loading the entire archive.
* Opens only the last ~2264KB of the file (EOCD + central directory).
* Returns null if no EOCD record is found (corrupt/non-ZIP file).
* Returns an empty array for a valid but empty archive.
*/
async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[]> {
if (fileSize < 22) return []
async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[] | null> {
if (fileSize < 22) return null
// The EOCD record is within the last 65558 bytes (22-byte record + 65535-byte max comment).
const tailLen = Math.min(65558, fileSize)
@@ -34,12 +35,13 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
for (let i = tailLen - 22; i >= 0; i--) {
if (tailBuf.readUInt32LE(i) === EOCD_SIG) { eocdOff = i; break }
}
if (eocdOff === -1) return []
if (eocdOff === -1) return null // no EOCD → corrupt
const entryCount = tailBuf.readUInt16LE(eocdOff + 10)
const cdSize = tailBuf.readUInt32LE(eocdOff + 12)
const cdOffset = tailBuf.readUInt32LE(eocdOff + 16)
if (cdOffset + cdSize > fileSize || cdSize === 0) return []
if (entryCount === 0) return [] // valid empty archive
if (cdOffset + cdSize > fileSize || cdSize === 0) return null // malformed
const cdBuf = Buffer.allocUnsafe(cdSize)
await fd.read(cdBuf, 0, cdSize, cdOffset)
@@ -62,26 +64,44 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
return entries
}
/** Thrown when a ZIP archive has no valid End-of-Central-Directory record. */
export class CorruptZipError extends Error {
readonly code = 'ERR_CORRUPT_ZIP'
constructor(absolutePath: string) {
super(`Corrupt or invalid ZIP archive: ${absolutePath}`)
this.name = 'CorruptZipError'
}
}
export function isCorruptZipError(err: unknown): err is CorruptZipError {
return err instanceof CorruptZipError ||
(err instanceof Error && (err as CorruptZipError).code === 'ERR_CORRUPT_ZIP')
}
/**
* Count the number of image entries inside a ZIP/CBZ archive by reading
* only its central directory — no full-file read required.
* Returns { pageCount, valid } where valid=false means the archive has no
* valid EOCD record (corrupt file).
*/
export async function countZipImages(
absolutePath: string,
imageExtensions: Set<string>
): Promise<number> {
): Promise<{ pageCount: number; valid: boolean }> {
let fd: FileHandle | null = null
try {
fd = await open(absolutePath, 'r')
const { size } = await fd.stat()
const entries = await readCentralDirectory(fd, size)
return entries.filter((e) => {
if (entries === null) return { pageCount: 0, valid: false }
const pageCount = entries.filter((e) => {
if (e.name.endsWith('/')) return false
const dot = e.name.lastIndexOf('.')
return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
}).length
return { pageCount, valid: true }
} catch {
return 0
return { pageCount: 0, valid: false }
} finally {
await fd?.close()
}
@@ -128,6 +148,7 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
fd = await open(absolutePath, 'r')
const { size } = await fd.stat()
const entries = await readCentralDirectory(fd, size)
if (!entries) return null
const lower = entryName.toLowerCase()
return entries.find((e) => {
const n = e.name.toLowerCase()
@@ -140,6 +161,55 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
}
}
/**
* Extract the first image entry (natural sort) from a ZIP/CBZ archive.
* Reads only the central directory and the single chosen entry — no full-file load.
* Throws CorruptZipError if the archive has no valid structure.
*/
export async function extractFirstZipImage(
absolutePath: string,
imageExtensions: Set<string>
): Promise<Buffer> {
let fd: FileHandle | null = null
try {
fd = await open(absolutePath, 'r')
const { size } = await fd.stat()
const entries = await readCentralDirectory(fd, size)
if (entries === null) throw new CorruptZipError(absolutePath)
const imageEntries = entries
.filter((e) => {
if (e.name.endsWith('/')) return false
const dot = e.name.lastIndexOf('.')
return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
})
.sort((a, b) =>
a.name.localeCompare(b.name, undefined, { numeric: true, sensitivity: 'base' })
)
if (imageEntries.length === 0) throw new Error(`No image entries in archive: ${absolutePath}`)
const entry = imageEntries[0]
// Read local file header to get the exact data offset.
const lfhBuf = Buffer.allocUnsafe(30)
await fd.read(lfhBuf, 0, 30, entry.localHeaderOffset)
if (lfhBuf.readUInt32LE(0) !== LFH_SIG) throw new CorruptZipError(absolutePath)
const localFilenameLen = lfhBuf.readUInt16LE(26)
const localExtraLen = lfhBuf.readUInt16LE(28)
const dataOffset = entry.localHeaderOffset + 30 + localFilenameLen + localExtraLen
const compressedBuf = Buffer.allocUnsafe(entry.compressedSize)
await fd.read(compressedBuf, 0, entry.compressedSize, dataOffset)
if (entry.compressionMethod === 0) return compressedBuf
if (entry.compressionMethod === 8) return await inflateRaw(compressedBuf) as Buffer
throw new Error(`Unsupported compression method ${entry.compressionMethod}: ${absolutePath}`)
} finally {
await fd?.close()
}
}
/**
* Process an array of items concurrently with a concurrency limit.
* Preserves index order in results.