trash corrupt files

2026-04-20 11:44:30 -04:00
parent 7d2ae7e95c
commit dee9356004
4 changed files with 152 additions and 24 deletions
--- a/src/app/api/thumbnail/route.ts
+++ b/src/app/api/thumbnail/route.ts
@@ -1,9 +1,11 @@
 import { NextRequest, NextResponse } from 'next/server'
 import fs from 'fs'
 import fsPromises from 'fs/promises'
 import path from 'path'
 import { getLibrary, resolveLibraryRoot, resolveAndJail } from '@/lib/libraries'
 import { getThumbnailPath, getCbzThumbnailPath } from '@/lib/thumbnails'
 import { requireLibraryAccess } from '@/lib/auth'
 import { isCorruptZipError } from '@/lib/zip-utils'
 const VIDEO_EXTENSIONS = new Set(['.mp4', '.mov', '.mkv', '.avi', '.webm', '.m4v'])
 const IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.tif'])
@@ -63,7 +65,30 @@ export async function GET(request: NextRequest) {
      },
    })
  } catch (err) {
    if (isCorruptZipError(err)) {
      // Move the corrupt archive to the library's .trash folder so it is excluded
      // from future scans and hidden from the UI.
      const trashDir = path.join(root, '.trash')
      const filename = path.basename(filePath)
      let dest = path.join(trashDir, filename)
      fsPromises.mkdir(trashDir, { recursive: true })
        .then(async () => {
          if (fs.existsSync(dest)) {
            const ext = path.extname(filename)
            dest = path.join(trashDir, `${path.basename(filename, ext)}_${Date.now()}${ext}`)
          }
          await fsPromises.rename(filePath, dest).catch(async (e: NodeJS.ErrnoException) => {
            if (e.code === 'EXDEV') {
              await fsPromises.copyFile(filePath, dest)
              await fsPromises.unlink(filePath)
            } else throw e
          })
          console.log(`[thumbnail] Moved corrupt archive to trash: ${path.relative(root, filePath)}`)
        })
        .catch((e) => console.warn(`[thumbnail] Could not move corrupt archive to trash:`, e))
    } else {
      console.error(`Thumbnail generation failed for ${filePath}:`, err)
    }
    return new NextResponse(null, { status: 404 })
  }
 }
--- a/src/lib/comics.ts
+++ b/src/lib/comics.ts
@@ -5,6 +5,7 @@ import type { ComicIssue, ComicSeries } from '@/types'
 import { getDb } from './db'
 import { HIDDEN_FILES, thumbnailApiUrl } from './media-utils'
 import { countZipImages, mapConcurrent } from './zip-utils'
 import fsPromises from 'fs/promises'
 const CBZ_EXTENSIONS = new Set(['.cbz'])
 const CBZ_IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.gif'])
@@ -28,6 +29,30 @@ export interface ScannedComicSeries extends ComicSeries {
  issues: ComicIssue[]
 }
 const TRASH_DIR = '.trash'
 async function moveToTrash(absPath: string, libraryRoot: string): Promise<void> {
  const trashDir = path.join(libraryRoot, TRASH_DIR)
  await fsPromises.mkdir(trashDir, { recursive: true })
  const filename = path.basename(absPath)
  let dest = path.join(trashDir, filename)
  if (fs.existsSync(dest)) {
    const ext = path.extname(filename)
    const base = path.basename(filename, ext)
    dest = path.join(trashDir, `${base}_${Date.now()}${ext}`)
  }
  await fsPromises.rename(absPath, dest).catch(async (err: NodeJS.ErrnoException) => {
    if (err.code === 'EXDEV') {
      // Source and destination are on different filesystems — copy then delete.
      await fsPromises.copyFile(absPath, dest)
      await fsPromises.unlink(absPath)
    } else {
      throw err
    }
  })
  console.log(`[scanner] Moved corrupt archive to trash: ${path.relative(libraryRoot, absPath)}`)
 }
 interface CollectedCbz {
  absPath: string
  filename: string
@@ -93,22 +118,38 @@ export async function scanComicsLibrary(
  // Phase 2: Count pages for all CBZ files concurrently (10 at a time) by reading
  // only each archive's central directory — no full-file reads.
-  const pageCounts = await mapConcurrent(collected, 10, (c) =>
+  const scanResults = await mapConcurrent(collected, 10, (c) =>
    countZipImages(c.absPath, CBZ_IMAGE_EXTENSIONS)
  )
-  // Phase 3: Build the result array from collected metadata + page counts.
+  // Move corrupt archives to the library's .trash folder and exclude them from indexing.
  const movePromises: Promise<void>[] = []
  const valid: Array<{ cbz: CollectedCbz; pageCount: number }> = []
  for (let i = 0; i < collected.length; i++) {
    const result = scanResults[i]
    if (!result.valid) {
      movePromises.push(
        moveToTrash(collected[i].absPath, libraryRoot).catch((err) =>
          console.warn(`[scanner] Could not move corrupt archive to trash: ${collected[i].absPath}`, err)
        )
      )
      continue
    }
    valid.push({ cbz: collected[i], pageCount: result.pageCount })
  }
  if (movePromises.length > 0) await Promise.all(movePromises)
  // Phase 3: Build the result array from valid files only.
  const seriesMap = new Map<string, ScannedComicSeries>()
  const standaloneIssues: ComicIssue[] = []
-  for (let i = 0; i < collected.length; i++) {
+  for (const { cbz: c, pageCount } of valid) {
    const c = collected[i]
    const coverUrl = thumbnailApiUrl(libraryId, c.relPath)
    const issue: ComicIssue = {
      id: encodeURIComponent(c.relPath),
      title: path.basename(c.filename, path.extname(c.filename)),
      issueNumber: parseIssueNumber(c.filename),
-      pageCount: pageCounts[i],
+      pageCount,
      coverUrl,
      filePath: c.relPath,
      isStandalone: c.isStandalone,
--- a/src/lib/thumbnails.ts
+++ b/src/lib/thumbnails.ts
@@ -3,7 +3,7 @@ import fs from 'fs'
 import path from 'path'
 import { spawn } from 'child_process'
 import sharp from 'sharp'
-import AdmZip from 'adm-zip'
+import { extractFirstZipImage } from './zip-utils'
 const CACHE_DIR = path.resolve(process.cwd(), '.thumbnails')
 const THUMBNAIL_WIDTH = 400
@@ -241,15 +241,7 @@ export async function getCbzThumbnailPath(
  const cached = getCachedPath(cacheFile, absoluteFilePath)
  if (cached) return cached
-  const zip = new AdmZip(absoluteFilePath)
+  const buffer = await extractFirstZipImage(absoluteFilePath, CBZ_IMAGE_EXTENSIONS)
  const entries = zip
    .getEntries()
    .filter((e) => !e.isDirectory && CBZ_IMAGE_EXTENSIONS.has(path.extname(e.entryName).toLowerCase()))
    .sort((a, b) => a.entryName.localeCompare(b.entryName, undefined, { numeric: true, sensitivity: 'base' }))
  if (entries.length === 0) throw new Error('No image entries found in CBZ')
  const buffer = entries[0].getData()
  const tmp = cacheFile + '.tmp'
  await sharp(buffer).resize(THUMBNAIL_WIDTH).jpeg({ quality: JPEG_QUALITY }).toFile(tmp)
  fs.renameSync(tmp, cacheFile)
--- a/src/lib/zip-utils.ts
+++ b/src/lib/zip-utils.ts
@@ -19,10 +19,11 @@ export interface CdEntry {
 /**
 * Read a ZIP file's central directory without loading the entire archive.
- * Opens only the last ~22–64KB of the file (EOCD + central directory).
+ * Returns null if no EOCD record is found (corrupt/non-ZIP file).
 * Returns an empty array for a valid but empty archive.
 */
-async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[]> {
+async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<CdEntry[] | null> {
-  if (fileSize < 22) return []
+  if (fileSize < 22) return null
  // The EOCD record is within the last 65558 bytes (22-byte record + 65535-byte max comment).
  const tailLen = Math.min(65558, fileSize)
@@ -34,12 +35,13 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
  for (let i = tailLen - 22; i >= 0; i--) {
    if (tailBuf.readUInt32LE(i) === EOCD_SIG) { eocdOff = i; break }
  }
-  if (eocdOff === -1) return []
+  if (eocdOff === -1) return null  // no EOCD → corrupt
  const entryCount = tailBuf.readUInt16LE(eocdOff + 10)
  const cdSize     = tailBuf.readUInt32LE(eocdOff + 12)
  const cdOffset   = tailBuf.readUInt32LE(eocdOff + 16)
-  if (cdOffset + cdSize > fileSize || cdSize === 0) return []
+  if (entryCount === 0) return []  // valid empty archive
  if (cdOffset + cdSize > fileSize || cdSize === 0) return null  // malformed
  const cdBuf = Buffer.allocUnsafe(cdSize)
  await fd.read(cdBuf, 0, cdSize, cdOffset)
@@ -62,26 +64,44 @@ async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise<C
  return entries
 }
 /** Thrown when a ZIP archive has no valid End-of-Central-Directory record. */
 export class CorruptZipError extends Error {
  readonly code = 'ERR_CORRUPT_ZIP'
  constructor(absolutePath: string) {
    super(`Corrupt or invalid ZIP archive: ${absolutePath}`)
    this.name = 'CorruptZipError'
  }
 }
 export function isCorruptZipError(err: unknown): err is CorruptZipError {
  return err instanceof CorruptZipError ||
    (err instanceof Error && (err as CorruptZipError).code === 'ERR_CORRUPT_ZIP')
 }
 /**
 * Count the number of image entries inside a ZIP/CBZ archive by reading
 * only its central directory — no full-file read required.
 * Returns { pageCount, valid } where valid=false means the archive has no
 * valid EOCD record (corrupt file).
 */
 export async function countZipImages(
  absolutePath: string,
  imageExtensions: Set<string>
-): Promise<number> {
+): Promise<{ pageCount: number; valid: boolean }> {
  let fd: FileHandle | null = null
  try {
    fd = await open(absolutePath, 'r')
    const { size } = await fd.stat()
    const entries = await readCentralDirectory(fd, size)
-    return entries.filter((e) => {
+    if (entries === null) return { pageCount: 0, valid: false }
    const pageCount = entries.filter((e) => {
      if (e.name.endsWith('/')) return false
      const dot = e.name.lastIndexOf('.')
      return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
    }).length
    return { pageCount, valid: true }
  } catch {
-    return 0
+    return { pageCount: 0, valid: false }
  } finally {
    await fd?.close()
  }
@@ -128,6 +148,7 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
    fd = await open(absolutePath, 'r')
    const { size } = await fd.stat()
    const entries = await readCentralDirectory(fd, size)
    if (!entries) return null
    const lower = entryName.toLowerCase()
    return entries.find((e) => {
      const n = e.name.toLowerCase()
@@ -140,6 +161,55 @@ export async function findZipEntry(absolutePath: string, entryName: string): Pro
  }
 }
 /**
 * Extract the first image entry (natural sort) from a ZIP/CBZ archive.
 * Reads only the central directory and the single chosen entry — no full-file load.
 * Throws CorruptZipError if the archive has no valid structure.
 */
 export async function extractFirstZipImage(
  absolutePath: string,
  imageExtensions: Set<string>
 ): Promise<Buffer> {
  let fd: FileHandle | null = null
  try {
    fd = await open(absolutePath, 'r')
    const { size } = await fd.stat()
    const entries = await readCentralDirectory(fd, size)
    if (entries === null) throw new CorruptZipError(absolutePath)
    const imageEntries = entries
      .filter((e) => {
        if (e.name.endsWith('/')) return false
        const dot = e.name.lastIndexOf('.')
        return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase())
      })
      .sort((a, b) =>
        a.name.localeCompare(b.name, undefined, { numeric: true, sensitivity: 'base' })
      )
    if (imageEntries.length === 0) throw new Error(`No image entries in archive: ${absolutePath}`)
    const entry = imageEntries[0]
    // Read local file header to get the exact data offset.
    const lfhBuf = Buffer.allocUnsafe(30)
    await fd.read(lfhBuf, 0, 30, entry.localHeaderOffset)
    if (lfhBuf.readUInt32LE(0) !== LFH_SIG) throw new CorruptZipError(absolutePath)
    const localFilenameLen = lfhBuf.readUInt16LE(26)
    const localExtraLen    = lfhBuf.readUInt16LE(28)
    const dataOffset = entry.localHeaderOffset + 30 + localFilenameLen + localExtraLen
    const compressedBuf = Buffer.allocUnsafe(entry.compressedSize)
    await fd.read(compressedBuf, 0, entry.compressedSize, dataOffset)
    if (entry.compressionMethod === 0) return compressedBuf
    if (entry.compressionMethod === 8) return await inflateRaw(compressedBuf) as Buffer
    throw new Error(`Unsupported compression method ${entry.compressionMethod}: ${absolutePath}`)
  } finally {
    await fd?.close()
  }
 }
 /**
 * Process an array of items concurrently with a concurrency limit.
 * Preserves index order in results.