import hashlib from pathlib import Path from datetime import datetime from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select from app.models import MediaItem IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".avif", ".heic"} VIDEO_EXTENSIONS = {".mp4", ".mkv", ".mov", ".avi", ".webm", ".m4v", ".flv", ".wmv", ".ts"} def classify(path: Path) -> str | None: ext = path.suffix.lower() if ext in IMAGE_EXTENSIONS: return "image" if ext in VIDEO_EXTENSIONS: return "video" return None def hash_file(path: Path) -> str: h = hashlib.sha256() with path.open("rb") as f: for chunk in iter(lambda: f.read(65536), b""): h.update(chunk) return h.hexdigest() async def scan_library(library_id: int, library_path: str, db: AsyncSession) -> None: root = Path(library_path) existing = await db.execute( select(MediaItem).where(MediaItem.library_id == library_id) ) db_items = {item.rel_path: item for item in existing.scalars().all()} seen_paths: set[str] = set() for file_path in root.rglob("*"): if not file_path.is_file(): continue media_type = classify(file_path) if not media_type: continue rel = str(file_path.relative_to(root)) seen_paths.add(rel) if rel in db_items: item = db_items[rel] if item.missing: item.missing = False item.updated_at = datetime.utcnow() else: file_hash = hash_file(file_path) # Check if this hash matches an orphaned (missing) item — file was moved while offline moved = await _find_by_hash(library_id, file_hash, db) if moved: moved.rel_path = rel moved.filename = file_path.name moved.missing = False moved.updated_at = datetime.utcnow() else: item = MediaItem( library_id=library_id, rel_path=rel, filename=file_path.name, file_hash=file_hash, media_type=media_type, size_bytes=file_path.stat().st_size, missing=False, ) db.add(item) # Mark items no longer on disk as missing for rel_path, item in db_items.items(): if rel_path not in seen_paths and not item.missing: item.missing = True item.updated_at = datetime.utcnow() await db.commit() async def _find_by_hash(library_id: int, file_hash: str, db: AsyncSession) -> MediaItem | None: result = await db.execute( select(MediaItem).where( MediaItem.library_id == library_id, MediaItem.file_hash == file_hash, MediaItem.missing == True, # noqa: E712 ) ) return result.scalars().first()