This commit is contained in:
2026-05-09 14:51:25 -04:00
parent 97fabc2c17
commit f23a8a2be6
20 changed files with 382 additions and 185 deletions

View File

@@ -1,9 +1,22 @@
import asyncio
import hashlib
import logging
import os
from pathlib import Path
from datetime import datetime
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.models import MediaItem
from app.database import SessionLocal
log = logging.getLogger(__name__)
_scanning: set[int] = set()
def is_scanning(library_id: int) -> bool:
return library_id in _scanning
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".avif", ".heic"}
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".mov", ".avi", ".webm", ".m4v", ".flv", ".wmv", ".ts"}
@@ -26,58 +39,88 @@ def hash_file(path: Path) -> str:
return h.hexdigest()
async def scan_library_background(library_id: int, library_path: str) -> None:
"""Run a full library scan in a fresh session. Safe to call as a background task."""
_scanning.add(library_id)
try:
async with SessionLocal() as db:
await _do_scan(library_id, library_path, db)
except Exception:
log.exception("Scan failed for library %d at %s", library_id, library_path)
finally:
_scanning.discard(library_id)
async def scan_library(library_id: int, library_path: str, db: AsyncSession) -> None:
await _do_scan(library_id, library_path, db)
async def _do_scan(library_id: int, library_path: str, db: AsyncSession) -> None:
root = Path(library_path)
log.info("Starting scan for library %d at %s", library_id, library_path)
existing = await db.execute(
select(MediaItem).where(MediaItem.library_id == library_id)
)
db_items = {item.rel_path: item for item in existing.scalars().all()}
seen_paths: set[str] = set()
loop = asyncio.get_running_loop()
total_dirs = 0
for file_path in root.rglob("*"):
if not file_path.is_file():
continue
media_type = classify(file_path)
if not media_type:
continue
for dirpath, dirnames, filenames in os.walk(library_path):
dirnames[:] = sorted(d for d in dirnames if not d.startswith("."))
dir = Path(dirpath)
rel_dir = str(dir.relative_to(root)) if dir != root else "."
found_in_dir = 0
rel = str(file_path.relative_to(root))
seen_paths.add(rel)
for filename in sorted(f for f in filenames if not f.startswith(".")):
file_path = dir / filename
media_type = classify(file_path)
if not media_type:
continue
if rel in db_items:
item = db_items[rel]
if item.missing:
item.missing = False
item.updated_at = datetime.utcnow()
else:
file_hash = hash_file(file_path)
# Check if this hash matches an orphaned (missing) item — file was moved while offline
moved = await _find_by_hash(library_id, file_hash, db)
if moved:
moved.rel_path = rel
moved.filename = file_path.name
moved.missing = False
moved.updated_at = datetime.utcnow()
rel = str(file_path.relative_to(root))
seen_paths.add(rel)
found_in_dir += 1
if rel in db_items:
item = db_items[rel]
if item.missing:
item.missing = False
item.updated_at = datetime.utcnow()
else:
item = MediaItem(
library_id=library_id,
rel_path=rel,
filename=file_path.name,
file_hash=file_hash,
media_type=media_type,
size_bytes=file_path.stat().st_size,
missing=False,
)
db.add(item)
file_hash = await loop.run_in_executor(None, hash_file, file_path)
moved = await _find_by_hash(library_id, file_hash, db)
if moved:
moved.rel_path = rel
moved.filename = file_path.name
moved.missing = False
moved.updated_at = datetime.utcnow()
else:
db.add(MediaItem(
library_id=library_id,
rel_path=rel,
filename=file_path.name,
file_hash=file_hash,
media_type=media_type,
size_bytes=file_path.stat().st_size,
missing=False,
))
log.info("Scanned directory %s%d media file(s) found", rel_dir, found_in_dir)
total_dirs += 1
# Mark items no longer on disk as missing
for rel_path, item in db_items.items():
if rel_path not in seen_paths and not item.missing:
item.missing = True
item.updated_at = datetime.utcnow()
await db.commit()
log.info(
"Scan complete for library %d%d director%s, %d media file(s) indexed",
library_id, total_dirs, "y" if total_dirs == 1 else "ies", len(seen_paths),
)
async def _find_by_hash(library_id: int, file_hash: str, db: AsyncSession) -> MediaItem | None:

View File

@@ -111,12 +111,15 @@ class LibraryEventHandler(FileSystemEventHandler):
def start_watcher(library_id: int, library_path: str):
if library_id in _observers:
return
handler = LibraryEventHandler(library_id, library_path)
observer = Observer()
observer.schedule(handler, library_path, recursive=True)
observer.start()
_observers[library_id] = observer
log.info("Started watcher for library %d at %s", library_id, library_path)
try:
handler = LibraryEventHandler(library_id, library_path)
observer = Observer()
observer.schedule(handler, library_path, recursive=True)
observer.start()
_observers[library_id] = observer
log.info("Started watcher for library %d at %s", library_id, library_path)
except Exception:
log.exception("Failed to start watcher for library %d at %s", library_id, library_path)
def stop_watcher(library_id: int):