143 lines
4.9 KiB
Python
143 lines
4.9 KiB
Python
import asyncio
|
|
import hashlib
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select
|
|
from app.models import MediaItem
|
|
from app.database import SessionLocal
|
|
from app.services.thumbnails import thumbnail_path
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_scanning: set[int] = set()
|
|
|
|
|
|
def is_scanning(library_id: int) -> bool:
|
|
return library_id in _scanning
|
|
|
|
|
|
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".avif", ".heic"}
|
|
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".mov", ".avi", ".webm", ".m4v", ".flv", ".wmv", ".ts"}
|
|
|
|
|
|
def classify(path: Path) -> str | None:
|
|
ext = path.suffix.lower()
|
|
if ext in IMAGE_EXTENSIONS:
|
|
return "image"
|
|
if ext in VIDEO_EXTENSIONS:
|
|
return "video"
|
|
return None
|
|
|
|
|
|
def hash_file(path: Path) -> str:
|
|
h = hashlib.sha256()
|
|
with path.open("rb") as f:
|
|
for chunk in iter(lambda: f.read(65536), b""):
|
|
h.update(chunk)
|
|
return h.hexdigest()
|
|
|
|
|
|
async def scan_library_background(library_id: int, library_path: str) -> None:
|
|
"""Run a full library scan in a fresh session. Safe to call as a background task."""
|
|
_scanning.add(library_id)
|
|
try:
|
|
async with SessionLocal() as db:
|
|
await _do_scan(library_id, library_path, db)
|
|
except Exception:
|
|
log.exception("Scan failed for library %d at %s", library_id, library_path)
|
|
finally:
|
|
_scanning.discard(library_id)
|
|
|
|
|
|
async def scan_library(library_id: int, library_path: str, db: AsyncSession) -> None:
|
|
await _do_scan(library_id, library_path, db)
|
|
|
|
|
|
async def _do_scan(library_id: int, library_path: str, db: AsyncSession) -> None:
|
|
root = Path(library_path)
|
|
log.info("Starting scan for library %d at %s", library_id, library_path)
|
|
|
|
existing = await db.execute(
|
|
select(MediaItem).where(MediaItem.library_id == library_id)
|
|
)
|
|
db_items = {item.rel_path: item for item in existing.scalars().all()}
|
|
|
|
seen_paths: set[str] = set()
|
|
loop = asyncio.get_running_loop()
|
|
total_dirs = 0
|
|
|
|
for dirpath, dirnames, filenames in os.walk(library_path):
|
|
dirnames[:] = sorted(d for d in dirnames if not d.startswith("."))
|
|
dir = Path(dirpath)
|
|
rel_dir = str(dir.relative_to(root)) if dir != root else "."
|
|
found_in_dir = 0
|
|
|
|
for filename in sorted(f for f in filenames if not f.startswith(".")):
|
|
file_path = dir / filename
|
|
media_type = classify(file_path)
|
|
if not media_type:
|
|
continue
|
|
|
|
rel = str(file_path.relative_to(root))
|
|
seen_paths.add(rel)
|
|
found_in_dir += 1
|
|
|
|
if rel in db_items:
|
|
item = db_items[rel]
|
|
new_hash = await loop.run_in_executor(None, hash_file, file_path)
|
|
if item.file_hash != new_hash:
|
|
item.file_hash = new_hash
|
|
item.updated_at = datetime.utcnow()
|
|
thumbnail_path(item.id).unlink(missing_ok=True)
|
|
if item.missing:
|
|
item.missing = False
|
|
item.updated_at = datetime.utcnow()
|
|
else:
|
|
file_hash = await loop.run_in_executor(None, hash_file, file_path)
|
|
moved = await _find_by_hash(library_id, file_hash, db)
|
|
if moved:
|
|
thumbnail_path(moved.id).unlink(missing_ok=True)
|
|
moved.rel_path = rel
|
|
moved.filename = file_path.name
|
|
moved.missing = False
|
|
moved.updated_at = datetime.utcnow()
|
|
else:
|
|
db.add(MediaItem(
|
|
library_id=library_id,
|
|
rel_path=rel,
|
|
filename=file_path.name,
|
|
file_hash=file_hash,
|
|
media_type=media_type,
|
|
size_bytes=file_path.stat().st_size,
|
|
missing=False,
|
|
))
|
|
|
|
log.info("Scanned directory %s — %d media file(s) found", rel_dir, found_in_dir)
|
|
total_dirs += 1
|
|
|
|
for rel_path, item in db_items.items():
|
|
if rel_path not in seen_paths and not item.missing:
|
|
item.missing = True
|
|
item.updated_at = datetime.utcnow()
|
|
|
|
await db.commit()
|
|
log.info(
|
|
"Scan complete for library %d — %d director%s, %d media file(s) indexed",
|
|
library_id, total_dirs, "y" if total_dirs == 1 else "ies", len(seen_paths),
|
|
)
|
|
|
|
|
|
async def _find_by_hash(library_id: int, file_hash: str, db: AsyncSession) -> MediaItem | None:
|
|
result = await db.execute(
|
|
select(MediaItem).where(
|
|
MediaItem.library_id == library_id,
|
|
MediaItem.file_hash == file_hash,
|
|
MediaItem.missing == True, # noqa: E712
|
|
)
|
|
)
|
|
rows = result.scalars().all()
|
|
return rows[0] if len(rows) == 1 else None
|