trash corrupt files

2026-04-20 11:44:30 -04:00
parent 7d2ae7e95c
commit dee9356004
4 changed files with 152 additions and 24 deletions
--- a/src/app/api/thumbnail/route.ts
+++ b/src/app/api/thumbnail/route.ts
@@ -1,9 +1,11 @@
 import { NextRequest, NextResponse } from 'next/server'
 import fs from 'fs'
+import fsPromises from 'fs/promises'
 import path from 'path'
 import { getLibrary, resolveLibraryRoot, resolveAndJail } from '@/lib/libraries'
 import { getThumbnailPath, getCbzThumbnailPath } from '@/lib/thumbnails'
 import { requireLibraryAccess } from '@/lib/auth'
+import { isCorruptZipError } from '@/lib/zip-utils'

 const VIDEO_EXTENSIONS = new Set(['.mp4', '.mov', '.mkv', '.avi', '.webm', '.m4v'])
 const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.tif'])
@@ -63,7 +65,30 @@ export async function GET(request: NextRequest) {
      },
    })
  } catch (err) {
-    console.error(`Thumbnail generation failed for ${filePath}:`, err)
+    if (isCorruptZipError(err)) {
+      // Move the corrupt archive to the library's .trash folder so it is excluded
+      // from future scans and hidden from the UI.
+      const trashDir = path.join(root, '.trash')
+      const filename = path.basename(filePath)
+      let dest = path.join(trashDir, filename)
+      fsPromises.mkdir(trashDir, { recursive: true })
+        .then(async () => {
+          if (fs.existsSync(dest)) {
+            const ext = path.extname(filename)
+            dest = path.join(trashDir, `${path.basename(filename, ext)}_${Date.now()}${ext}`)
+          }
+          await fsPromises.rename(filePath, dest).catch(async (e: NodeJS.ErrnoException) => {
+            if (e.code === 'EXDEV') {
+              await fsPromises.copyFile(filePath, dest)
+              await fsPromises.unlink(filePath)
+            } else throw e
+          })
+          console.log(`[thumbnail] Moved corrupt archive to trash: ${path.relative(root, filePath)}`)
+        })
+        .catch((e) => console.warn(`[thumbnail] Could not move corrupt archive to trash:`, e))
+    } else {
+      console.error(`Thumbnail generation failed for ${filePath}:`, err)
+    }
    return new NextResponse(null, { status: 404 })
  }
 }
--- a/src/lib/comics.ts
+++ b/src/lib/comics.ts
@@ -5,6 +5,7 @@ import type { ComicIssue, ComicSeries } from '@/types'
 import { getDb } from './db'
 import { HIDDEN_FILES, thumbnailApiUrl } from './media-utils'
 import { countZipImages, mapConcurrent } from './zip-utils'
+import fsPromises from 'fs/promises'

 const CBZ_EXTENSIONS = new Set(['.cbz'])
 const CBZ_IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.gif'])
@@ -28,6 +29,30 @@ export interface ScannedComicSeries extends ComicSeries {
  issues: ComicIssue[]
 }

+const TRASH_DIR = '.trash'
+
+async function moveToTrash(absPath: string, libraryRoot: string): Promise<void> {
+  const trashDir = path.join(libraryRoot, TRASH_DIR)
+  await fsPromises.mkdir(trashDir, { recursive: true })
+  const filename = path.basename(absPath)
+  let dest = path.join(trashDir, filename)
+  if (fs.existsSync(dest)) {
+    const ext = path.extname(filename)
+    const base = path.basename(filename, ext)
+    dest = path.join(trashDir, `${base}_${Date.now()}${ext}`)
+  }
+  await fsPromises.rename(absPath, dest).catch(async (err: NodeJS.ErrnoException) => {
+    if (err.code === 'EXDEV') {
+      // Source and destination are on different filesystems — copy then delete.
+      await fsPromises.copyFile(absPath, dest)
+      await fsPromises.unlink(absPath)
+    } else {
+      throw err
+    }
+  })
+  console.log(`[scanner] Moved corrupt archive to trash: ${path.relative(libraryRoot, absPath)}`)
+}
+
 interface CollectedCbz {
  absPath: string
  filename: string
@@ -93,22 +118,38 @@ export async function scanComicsLibrary(

  // Phase 2: Count pages for all CBZ files concurrently (10 at a time) by reading
  // only each archive's central directory — no full-file reads.
-  const pageCounts = await mapConcurrent(collected, 10, (c) =>
+  const scanResults = await mapConcurrent(collected, 10, (c) =>
    countZipImages(c.absPath, CBZ_IMAGE_EXTENSIONS)
  )

-  // Phase 3: Build the result array from collected metadata + page counts.
+  // Move corrupt archives to the library's .trash folder and exclude them from indexing.
+  const movePromises: Promise<void>[] = []
+  const valid: Array<{ cbz: CollectedCbz; pageCount: number }> = []
+  for (let i = 0; i < collected.length; i++) {
+    const result = scanResults[i]
+    if (!result.valid) {
+      movePromises.push(
+        moveToTrash(collected[i].absPath, libraryRoot).catch((err) =>
+          console.warn(`[scanner] Could not move corrupt archive to trash: ${collected[i].absPath}`, err)
+        )
+      )
+      continue
+    }
+    valid.push({ cbz: collected[i], pageCount: result.pageCount })
+  }
+  if (movePromises.length > 0) await Promise.all(movePromises)
+
+  // Phase 3: Build the result array from valid files only.
  const seriesMap = new Map<string, ScannedComicSeries>()
  const standaloneIssues: ComicIssue[] = []

-  for (let i = 0; i < collected.length; i++) {
-    const c = collected[i]
+  for (const { cbz: c, pageCount } of valid) {
    const coverUrl = thumbnailApiUrl(libraryId, c.relPath)
    const issue: ComicIssue = {
      id: encodeURIComponent(c.relPath),
      title: path.basename(c.filename, path.extname(c.filename)),
      issueNumber: parseIssueNumber(c.filename),
-      pageCount: pageCounts[i],
+      pageCount,
      coverUrl,
      filePath: c.relPath,
      isStandalone: c.isStandalone,
--- a/src/lib/thumbnails.ts
+++ b/src/lib/thumbnails.ts
@@ -3,7 +3,7 @@ import fs from 'fs'
 import path from 'path'
 import { spawn } from 'child_process'
 import sharp from 'sharp'
-import AdmZip from 'adm-zip'
+import { extractFirstZipImage } from './zip-utils'

 const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
 const THUMBNAIL_WIDTH = 400
@@ -241,15 +241,7 @@ export async function getCbzThumbnailPath(
  const cached = getCachedPath(cacheFile, absoluteFilePath)
  if (cached) return cached

-  const zip = new AdmZip(absoluteFilePath)
-  const entries = zip
-    .getEntries()
-    .filter((e) => !e.isDirectory && CBZ_IMAGE_EXTENSIONS.has(path.extname(e.entryName).toLowerCase()))
-    .sort((a, b) => a.entryName.localeCompare(b.entryName, undefined, { numeric: true, sensitivity: 'base' }))
-
-  if (entries.length === 0) throw new Error('No image entries found in CBZ')
-
-  const buffer = entries[0].getData()
+  const buffer = await extractFirstZipImage(absoluteFilePath, CBZ_IMAGE_EXTENSIONS)
  const tmp = cacheFile + '.tmp'
  await sharp(buffer).resize(THUMBNAIL_WIDTH).jpeg({ quality: JPEG_QUALITY }).toFile(tmp)
  fs.renameSync(tmp, cacheFile)
--- a/src/lib/zip-utils.ts
+++ b/src/lib/zip-utils.ts
@@ -19,10 +19,11 @@ export interface CdEntry {

 /**
 * Read a ZIP file's central directory without loading the entire archive.
- * Opens only the last ~22–64KB of the file (EOCD + central directory).
+ * Returns null if no EOCD record is found (corrupt/non-ZIP file).
+ * Returns an empty array for a valid but empty archive.
 */
-async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[]> {
-  if (fileSize < 22) return []
+async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[] | null> {
+  if (fileSize < 22) return null

  // The EOCD record is within the last 65558 bytes (22-byte record + 65535-byte max comment).
  const tailLen = Math.min(65558, fileSize)
@@ -34,12 +35,13 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
  for (let i = tailLen - 22; i >= 0; i--) {
    if (tailBuf.readUInt32LE(i) === EOCD_SIG) { eocdOff = i; break }
  }
-  if (eocdOff === -1) return []
+  if (eocdOff === -1) return null  // no EOCD → corrupt

  const entryCount = tailBuf.readUInt16LE(eocdOff + 10)
  const cdSize     = tailBuf.readUInt32LE(eocdOff + 12)
  const cdOffset   = tailBuf.readUInt32LE(eocdOff + 16)
-  if (cdOffset + cdSize > fileSize || cdSize === 0) return []
+  if (entryCount === 0) return []  // valid empty archive
+  if (cdOffset + cdSize > fileSize || cdSize === 0) return null  // malformed

  const cdBuf = Buffer.allocUnsafe(cdSize)
  await fd.read(cdBuf, 0, cdSize, cdOffset)
@@ -62,26 +64,44 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
  return entries
 }

+/** Thrown when a ZIP archive has no valid End-of-Central-Directory record. */
+export class CorruptZipError extends Error {
+  readonly code = 'ERR_CORRUPT_ZIP'
+  constructor(absolutePath: string) {
+    super(`Corrupt or invalid ZIP archive: ${absolutePath}`)
+    this.name = 'CorruptZipError'
+  }
+}
+
+export function isCorruptZipError(err: unknown): err is CorruptZipError {
+  return err instanceof CorruptZipError ||
+    (err instanceof Error && (err as CorruptZipError).code === 'ERR_CORRUPT_ZIP')
+}
+
 /**
 * Count the number of image entries inside a ZIP/CBZ archive by reading
 * only its central directory — no full-file read required.
+ * Returns { pageCount, valid } where valid=false means the archive has no
+ * valid EOCD record (corrupt file).
 */
 export async function countZipImages(
  absolutePath: string,
  imageExtensions: Set<string>
-): Promise<number> {
+): Promise<{ pageCount: number; valid: boolean }> {
  let fd: FileHandle | null = null
  try {
    fd = await open(absolutePath, 'r')
    const { size } = await fd.stat()
    const entries = await readCentralDirectory(fd, size)
-    return entries.filter((e) => {
+    if (entries === null) return { pageCount: 0, valid: false }
+    const pageCount = entries.filter((e) => {
      if (e.name.endsWith('/')) return false
      const dot = e.name.lastIndexOf('.')
      return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
    }).length
+    return { pageCount, valid: true }
  } catch {
-    return 0
+    return { pageCount: 0, valid: false }
  } finally {
    await fd?.close()
  }
@@ -128,6 +148,7 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
    fd = await open(absolutePath, 'r')
    const { size } = await fd.stat()
    const entries = await readCentralDirectory(fd, size)
+    if (!entries) return null
    const lower = entryName.toLowerCase()
    return entries.find((e) => {
      const n = e.name.toLowerCase()
@@ -140,6 +161,55 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
  }
 }

+/**
+ * Extract the first image entry (natural sort) from a ZIP/CBZ archive.
+ * Reads only the central directory and the single chosen entry — no full-file load.
+ * Throws CorruptZipError if the archive has no valid structure.
+ */
+export async function extractFirstZipImage(
+  absolutePath: string,
+  imageExtensions: Set<string>
+): Promise<Buffer> {
+  let fd: FileHandle | null = null
+  try {
+    fd = await open(absolutePath, 'r')
+    const { size } = await fd.stat()
+    const entries = await readCentralDirectory(fd, size)
+    if (entries === null) throw new CorruptZipError(absolutePath)
+
+    const imageEntries = entries
+      .filter((e) => {
+        if (e.name.endsWith('/')) return false
+        const dot = e.name.lastIndexOf('.')
+        return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
+      })
+      .sort((a, b) =>
+        a.name.localeCompare(b.name, undefined, { numeric: true, sensitivity: 'base' })
+      )
+
+    if (imageEntries.length === 0) throw new Error(`No image entries in archive: ${absolutePath}`)
+
+    const entry = imageEntries[0]
+
+    // Read local file header to get the exact data offset.
+    const lfhBuf = Buffer.allocUnsafe(30)
+    await fd.read(lfhBuf, 0, 30, entry.localHeaderOffset)
+    if (lfhBuf.readUInt32LE(0) !== LFH_SIG) throw new CorruptZipError(absolutePath)
+    const localFilenameLen = lfhBuf.readUInt16LE(26)
+    const localExtraLen    = lfhBuf.readUInt16LE(28)
+    const dataOffset = entry.localHeaderOffset + 30 + localFilenameLen + localExtraLen
+
+    const compressedBuf = Buffer.allocUnsafe(entry.compressedSize)
+    await fd.read(compressedBuf, 0, entry.compressedSize, dataOffset)
+
+    if (entry.compressionMethod === 0) return compressedBuf
+    if (entry.compressionMethod === 8) return await inflateRaw(compressedBuf) as Buffer
+    throw new Error(`Unsupported compression method ${entry.compressionMethod}: ${absolutePath}`)
+  } finally {
+    await fd?.close()
+  }
+}
+
 /**
 * Process an array of items concurrently with a concurrency limit.
 * Preserves index order in results.