diff --git a/src/lib/comic-info.ts b/src/lib/comic-info.ts index e2f358f..99b51b6 100644 --- a/src/lib/comic-info.ts +++ b/src/lib/comic-info.ts @@ -1,6 +1,7 @@ import AdmZip from 'adm-zip' import { XMLParser } from 'fast-xml-parser' import type { ComicInfoData } from '@/types' +import { findZipEntry, extractZipEntry } from './zip-utils' const parser = new XMLParser() @@ -70,3 +71,50 @@ export function parseComicInfo(absoluteCbzPath: string): ComicInfoData | null { web: toString(info.Web), } } + +/** + * Async version of parseComicInfo — reads only the ComicInfo.xml entry from the + * archive without loading the entire CBZ into memory. This is significantly faster + * for large libraries since it reads only the ZIP's central directory + the XML entry. + */ +export async function parseComicInfoAsync(absoluteCbzPath: string): Promise { + try { + const entry = await findZipEntry(absoluteCbzPath, 'comicinfo.xml') + if (!entry) return null + const buf = await extractZipEntry(absoluteCbzPath, entry) + if (!buf) return null + return parseXml(buf.toString('utf-8')) + } catch { + return null + } +} + +function parseXml(xml: string): ComicInfoData | null { + let doc: Record + try { + doc = parser.parse(xml) as Record + } catch { + return null + } + + const info = (doc.ComicInfo ?? doc.ComicInfoXml ?? doc.comicinfo) as Record | undefined + if (!info) return null + + const rawTags = toString(info.Tags) + const tags: string[] = rawTags + ? rawTags.split(',').map((t) => t.trim()).filter(Boolean) + : [] + + return { + title: toString(info.Title), + year: toNumber(info.Year), + month: toNumber(info.Month), + day: toNumber(info.Day), + writer: toString(info.Writer), + translator: toString(info.Translator), + publisher: toString(info.Publisher), + genre: toString(info.Genre), + tags, + web: toString(info.Web), + } +} diff --git a/src/lib/comic-metadata.ts b/src/lib/comic-metadata.ts index 8aa3f1a..b341fcb 100644 --- a/src/lib/comic-metadata.ts +++ b/src/lib/comic-metadata.ts @@ -3,7 +3,8 @@ import crypto from 'crypto' import type { Library, ImportedTag, TagMapping } from '@/types' import { getDb } from './db' import { resolveLibraryRoot } from './libraries' -import { parseComicInfo } from './comic-info' +import { parseComicInfoAsync } from './comic-info' +import { mapConcurrent } from './zip-utils' // ─── Metadata Import ────────────────────────────────────────────────────────── @@ -13,7 +14,7 @@ import { parseComicInfo } from './comic-info' * - For each tag: if a mapping exists, assigns the real tag; otherwise creates * an imported tag entry. */ -export function importComicMetadata(library: Library): void { +export async function importComicMetadata(library: Library): Promise { const db = getDb() const libraryRoot = resolveLibraryRoot(library) @@ -56,53 +57,65 @@ export function importComicMetadata(library: Library): void { let importedCount = 0 - db.transaction(() => { - for (const issue of issues) { - const absPath = path.join(libraryRoot, issue.file_path) - const info = parseComicInfo(absPath) - if (!info) continue + // Process in batches: async file reads (10 concurrent) followed by batch DB writes, + // with an event-loop yield between batches to keep the app responsive. + const BATCH_SIZE = 50 + for (let i = 0; i < issues.length; i += BATCH_SIZE) { + const batch = issues.slice(i, i + BATCH_SIZE) - // Merge with existing metadata JSON (preserve pageCount, coverUrl, etc.) - const existingMeta = issue.metadata ? JSON.parse(issue.metadata) : {} - const mergedMeta = { - ...existingMeta, - writer: info.writer, - publisher: info.publisher, - translator: info.translator, - web: info.web, - month: info.month, - day: info.day, - } + // Async: read ComicInfo.xml from each archive concurrently (10 at a time). + // Uses async ZIP central-directory reader — no full-file reads. + const infos = await mapConcurrent(batch, 10, (issue) => + parseComicInfoAsync(path.join(libraryRoot, issue.file_path)) + ) - updateItem.run({ - item_key: issue.item_key, - title: info.title ?? existingMeta.title ?? null, - year: info.year, - genres: info.genre, - metadata: JSON.stringify(mergedMeta), - }) + // Sync: write this batch to the DB in one transaction. + db.transaction(() => { + for (let j = 0; j < batch.length; j++) { + const issue = batch[j] + const info = infos[j] + if (!info) continue - // Process tags - for (const tagName of info.tags) { - const mappedTagId = mappings.get(tagName) - if (mappedTagId) { - // Mapping exists — assign the real tag - addMediaTag.run(issue.item_key, mappedTagId) - } else { - // No mapping — create imported tag - const importedTagId = crypto.randomUUID() - const row = upsertImportedTag.get({ - id: importedTagId, - library_id: library.id, - name: tagName, - }) as { id: string } - addItemImportedTag.run(issue.item_key, row.id) + const existingMeta = issue.metadata ? JSON.parse(issue.metadata) : {} + const mergedMeta = { + ...existingMeta, + writer: info.writer, + publisher: info.publisher, + translator: info.translator, + web: info.web, + month: info.month, + day: info.day, } - } - importedCount++ - } - })() + updateItem.run({ + item_key: issue.item_key, + title: info.title ?? existingMeta.title ?? null, + year: info.year, + genres: info.genre, + metadata: JSON.stringify(mergedMeta), + }) + + for (const tagName of info.tags) { + const mappedTagId = mappings.get(tagName) + if (mappedTagId) { + addMediaTag.run(issue.item_key, mappedTagId) + } else { + const importedTagId = crypto.randomUUID() + const row = upsertImportedTag.get({ + id: importedTagId, + library_id: library.id, + name: tagName, + }) as { id: string } + addItemImportedTag.run(issue.item_key, row.id) + } + } + + importedCount++ + } + })() + + await new Promise((r) => setImmediate(r)) + } console.log(`[comic-metadata] Imported metadata for ${importedCount}/${issues.length} issues in "${library.name}"`) } diff --git a/src/lib/comics.ts b/src/lib/comics.ts index 52c5437..9dca358 100644 --- a/src/lib/comics.ts +++ b/src/lib/comics.ts @@ -4,6 +4,7 @@ import AdmZip from 'adm-zip' import type { ComicIssue, ComicSeries } from '@/types' import { getDb } from './db' import { HIDDEN_FILES, thumbnailApiUrl } from './media-utils' +import { countZipImages, mapConcurrent } from './zip-utils' const CBZ_EXTENSIONS = new Set(['.cbz']) const CBZ_IMAGE_EXTENSIONS = new Set(['.jpg', '.jpeg', '.png', '.webp', '.gif']) @@ -23,52 +24,22 @@ function parseIssueNumber(filename: string): number | null { return parseInt(matches[matches.length - 1], 10) } -function getPageCount(absoluteCbzPath: string): number { - try { - const zip = new AdmZip(absoluteCbzPath) - return zip - .getEntries() - .filter( - (e) => - !e.isDirectory && - CBZ_IMAGE_EXTENSIONS.has(path.extname(e.entryName).toLowerCase()) - ).length - } catch { - return 0 - } -} - -function buildIssue( - absFilePath: string, - filename: string, - filePath: string, - libraryId: string, - isStandalone: boolean -): ComicIssue { - const title = path.basename(filename, path.extname(filename)) - const issueNumber = parseIssueNumber(filename) - const pageCount = getPageCount(absFilePath) - const coverUrl = thumbnailApiUrl(libraryId, filePath) - - return { - id: encodeURIComponent(filePath), - title, - issueNumber, - pageCount, - coverUrl, - filePath, - isStandalone, - } -} - export interface ScannedComicSeries extends ComicSeries { issues: ComicIssue[] } -export function scanComicsLibrary( +interface CollectedCbz { + absPath: string + filename: string + relPath: string + isStandalone: boolean + seriesDirName: string | null +} + +export async function scanComicsLibrary( libraryRoot: string, libraryId: string -): (ComicIssue | ScannedComicSeries)[] { +): Promise<(ComicIssue | ScannedComicSeries)[]> { let topEntries: fs.Dirent[] try { topEntries = fs.readdirSync(libraryRoot, { withFileTypes: true }) @@ -76,15 +47,20 @@ export function scanComicsLibrary( return [] } - const results: (ComicIssue | ScannedComicSeries)[] = [] + // Phase 1: Collect all CBZ paths via fast directory listing (no archive opens). + const collected: CollectedCbz[] = [] for (const entry of topEntries) { if (HIDDEN_FILES.test(entry.name)) continue if (entry.isFile() && isCbzFile(entry.name)) { - // Standalone one-shot comic - const absPath = path.join(libraryRoot, entry.name) - results.push(buildIssue(absPath, entry.name, entry.name, libraryId, true)) + collected.push({ + absPath: path.join(libraryRoot, entry.name), + filename: entry.name, + relPath: entry.name, + isStandalone: true, + seriesDirName: null, + }) continue } @@ -97,32 +73,70 @@ export function scanComicsLibrary( continue } - const cbzFiles = subEntries.filter( - (e) => e.isFile() && isCbzFile(e.name) && !HIDDEN_FILES.test(e.name) - ) + const cbzFiles = subEntries + .filter((e) => e.isFile() && isCbzFile(e.name) && !HIDDEN_FILES.test(e.name)) + .sort((a, b) => naturalCompare(a.name, b.name)) if (cbzFiles.length === 0) continue - // It's a series - const issues: ComicIssue[] = cbzFiles - .sort((a, b) => naturalCompare(a.name, b.name)) - .map((f) => { - const relPath = path.join(entry.name, f.name) - return buildIssue(path.join(dirAbsPath, f.name), f.name, relPath, libraryId, false) + for (const f of cbzFiles) { + collected.push({ + absPath: path.join(dirAbsPath, f.name), + filename: f.name, + relPath: path.join(entry.name, f.name), + isStandalone: false, + seriesDirName: entry.name, }) - - const seriesCoverUrl = issues[0]?.coverUrl ?? null - - results.push({ - id: encodeURIComponent(entry.name), - title: entry.name, - coverUrl: seriesCoverUrl, - issueCount: issues.length, - issues, - }) + } } } + // Phase 2: Count pages for all CBZ files concurrently (10 at a time) by reading + // only each archive's central directory — no full-file reads. + const pageCounts = await mapConcurrent(collected, 10, (c) => + countZipImages(c.absPath, CBZ_IMAGE_EXTENSIONS) + ) + + // Phase 3: Build the result array from collected metadata + page counts. + const seriesMap = new Map() + const standaloneIssues: ComicIssue[] = [] + + for (let i = 0; i < collected.length; i++) { + const c = collected[i] + const coverUrl = thumbnailApiUrl(libraryId, c.relPath) + const issue: ComicIssue = { + id: encodeURIComponent(c.relPath), + title: path.basename(c.filename, path.extname(c.filename)), + issueNumber: parseIssueNumber(c.filename), + pageCount: pageCounts[i], + coverUrl, + filePath: c.relPath, + isStandalone: c.isStandalone, + } + + if (c.isStandalone) { + standaloneIssues.push(issue) + } else { + const key = c.seriesDirName! + if (!seriesMap.has(key)) { + seriesMap.set(key, { + id: encodeURIComponent(key), + title: key, + coverUrl, // first issue (sorted) becomes the series cover + issueCount: 0, + issues: [], + }) + } + const series = seriesMap.get(key)! + series.issues.push(issue) + series.issueCount++ + } + } + + const results: (ComicIssue | ScannedComicSeries)[] = [ + ...Array.from(seriesMap.values()), + ...standaloneIssues, + ] return results.sort((a, b) => naturalCompare(a.title, b.title)) } diff --git a/src/lib/scanner.ts b/src/lib/scanner.ts index 8cf71a6..5947908 100644 --- a/src/lib/scanner.ts +++ b/src/lib/scanner.ts @@ -546,7 +546,7 @@ async function scanMixed(library: Library, libraryRoot: string): Promise { // --------------------------------------------------------------------------- async function scanComics(library: Library, libraryRoot: string): Promise { - const items = scanComicsLibrary(libraryRoot, library.id) + const items = await scanComicsLibrary(libraryRoot, library.id) const db = getDb() const now = Date.now() @@ -678,7 +678,7 @@ async function scanComics(library: Library, libraryRoot: string): Promise // Import ComicInfo.xml metadata (title, year, genres, tags) try { - importComicMetadata(library) + await importComicMetadata(library) } catch (err) { console.error(`[scanner] Error importing comic metadata for "${library.name}":`, err) } diff --git a/src/lib/zip-utils.ts b/src/lib/zip-utils.ts new file mode 100644 index 0000000..d753509 --- /dev/null +++ b/src/lib/zip-utils.ts @@ -0,0 +1,162 @@ +import { open } from 'fs/promises' +import type { FileHandle } from 'fs/promises' +import zlib from 'zlib' +import { promisify } from 'util' + +const inflateRaw = promisify(zlib.inflateRaw) + +const EOCD_SIG = 0x06054b50 +const CD_SIG = 0x02014b50 +const LFH_SIG = 0x04034b50 + +export interface CdEntry { + name: string + compressionMethod: number + compressedSize: number + uncompressedSize: number + localHeaderOffset: number +} + +/** + * Read a ZIP file's central directory without loading the entire archive. + * Opens only the last ~22–64KB of the file (EOCD + central directory). + */ +async function readCentralDirectory(fd: FileHandle, fileSize: number): Promise { + if (fileSize < 22) return [] + + // The EOCD record is within the last 65558 bytes (22-byte record + 65535-byte max comment). + const tailLen = Math.min(65558, fileSize) + const tailBuf = Buffer.allocUnsafe(tailLen) + await fd.read(tailBuf, 0, tailLen, fileSize - tailLen) + + // Scan backwards for the EOCD signature. + let eocdOff = -1 + for (let i = tailLen - 22; i >= 0; i--) { + if (tailBuf.readUInt32LE(i) === EOCD_SIG) { eocdOff = i; break } + } + if (eocdOff === -1) return [] + + const entryCount = tailBuf.readUInt16LE(eocdOff + 10) + const cdSize = tailBuf.readUInt32LE(eocdOff + 12) + const cdOffset = tailBuf.readUInt32LE(eocdOff + 16) + if (cdOffset + cdSize > fileSize || cdSize === 0) return [] + + const cdBuf = Buffer.allocUnsafe(cdSize) + await fd.read(cdBuf, 0, cdSize, cdOffset) + + const entries: CdEntry[] = [] + let pos = 0 + for (let i = 0; i < entryCount && pos + 46 <= cdBuf.length; i++) { + if (cdBuf.readUInt32LE(pos) !== CD_SIG) break + const compressionMethod = cdBuf.readUInt16LE(pos + 10) + const compressedSize = cdBuf.readUInt32LE(pos + 20) + const uncompressedSize = cdBuf.readUInt32LE(pos + 24) + const filenameLen = cdBuf.readUInt16LE(pos + 28) + const extraLen = cdBuf.readUInt16LE(pos + 30) + const commentLen = cdBuf.readUInt16LE(pos + 32) + const localHeaderOffset = cdBuf.readUInt32LE(pos + 42) + const name = cdBuf.toString('utf8', pos + 46, pos + 46 + filenameLen) + entries.push({ name, compressionMethod, compressedSize, uncompressedSize, localHeaderOffset }) + pos += 46 + filenameLen + extraLen + commentLen + } + return entries +} + +/** + * Count the number of image entries inside a ZIP/CBZ archive by reading + * only its central directory — no full-file read required. + */ +export async function countZipImages( + absolutePath: string, + imageExtensions: Set +): Promise { + let fd: FileHandle | null = null + try { + fd = await open(absolutePath, 'r') + const { size } = await fd.stat() + const entries = await readCentralDirectory(fd, size) + return entries.filter((e) => { + if (e.name.endsWith('/')) return false + const dot = e.name.lastIndexOf('.') + return dot !== -1 && imageExtensions.has(e.name.slice(dot).toLowerCase()) + }).length + } catch { + return 0 + } finally { + await fd?.close() + } +} + +/** + * Extract the raw bytes of a specific entry from a ZIP archive. + * Reads only the local file header + compressed data for that entry. + * Supports stored (method 0) and deflate (method 8). + */ +export async function extractZipEntry(absolutePath: string, entry: CdEntry): Promise { + let fd: FileHandle | null = null + try { + fd = await open(absolutePath, 'r') + + // Read local file header (30 bytes) to get exact data offset. + const lfhBuf = Buffer.allocUnsafe(30) + await fd.read(lfhBuf, 0, 30, entry.localHeaderOffset) + if (lfhBuf.readUInt32LE(0) !== LFH_SIG) return null + const localFilenameLen = lfhBuf.readUInt16LE(26) + const localExtraLen = lfhBuf.readUInt16LE(28) + const dataOffset = entry.localHeaderOffset + 30 + localFilenameLen + localExtraLen + + const compressedBuf = Buffer.allocUnsafe(entry.compressedSize) + await fd.read(compressedBuf, 0, entry.compressedSize, dataOffset) + + if (entry.compressionMethod === 0) return compressedBuf + if (entry.compressionMethod === 8) return await inflateRaw(compressedBuf) as Buffer + return null + } catch { + return null + } finally { + await fd?.close() + } +} + +/** + * Find a named entry (case-insensitive) in a ZIP archive's central directory. + * Returns null if not found or on error. + */ +export async function findZipEntry(absolutePath: string, entryName: string): Promise { + let fd: FileHandle | null = null + try { + fd = await open(absolutePath, 'r') + const { size } = await fd.stat() + const entries = await readCentralDirectory(fd, size) + const lower = entryName.toLowerCase() + return entries.find((e) => { + const n = e.name.toLowerCase() + return n === lower || n.endsWith('/' + lower) + }) ?? null + } catch { + return null + } finally { + await fd?.close() + } +} + +/** + * Process an array of items concurrently with a concurrency limit. + * Preserves index order in results. + */ +export async function mapConcurrent( + items: T[], + limit: number, + fn: (item: T) => Promise +): Promise { + const results: U[] = new Array(items.length) + let next = 0 + async function worker(): Promise { + while (next < items.length) { + const i = next++ + results[i] = await fn(items[i]) + } + } + await Promise.all(Array.from({ length: Math.min(limit, items.length) }, worker)) + return results +}