fixes
This commit is contained in:
@@ -1,9 +1,22 @@
|
||||
import asyncio
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from app.models import MediaItem
|
||||
from app.database import SessionLocal
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_scanning: set[int] = set()
|
||||
|
||||
|
||||
def is_scanning(library_id: int) -> bool:
|
||||
return library_id in _scanning
|
||||
|
||||
|
||||
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".avif", ".heic"}
|
||||
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".mov", ".avi", ".webm", ".m4v", ".flv", ".wmv", ".ts"}
|
||||
@@ -26,58 +39,88 @@ def hash_file(path: Path) -> str:
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
async def scan_library_background(library_id: int, library_path: str) -> None:
|
||||
"""Run a full library scan in a fresh session. Safe to call as a background task."""
|
||||
_scanning.add(library_id)
|
||||
try:
|
||||
async with SessionLocal() as db:
|
||||
await _do_scan(library_id, library_path, db)
|
||||
except Exception:
|
||||
log.exception("Scan failed for library %d at %s", library_id, library_path)
|
||||
finally:
|
||||
_scanning.discard(library_id)
|
||||
|
||||
|
||||
async def scan_library(library_id: int, library_path: str, db: AsyncSession) -> None:
|
||||
await _do_scan(library_id, library_path, db)
|
||||
|
||||
|
||||
async def _do_scan(library_id: int, library_path: str, db: AsyncSession) -> None:
|
||||
root = Path(library_path)
|
||||
log.info("Starting scan for library %d at %s", library_id, library_path)
|
||||
|
||||
existing = await db.execute(
|
||||
select(MediaItem).where(MediaItem.library_id == library_id)
|
||||
)
|
||||
db_items = {item.rel_path: item for item in existing.scalars().all()}
|
||||
|
||||
seen_paths: set[str] = set()
|
||||
loop = asyncio.get_running_loop()
|
||||
total_dirs = 0
|
||||
|
||||
for file_path in root.rglob("*"):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
media_type = classify(file_path)
|
||||
if not media_type:
|
||||
continue
|
||||
for dirpath, dirnames, filenames in os.walk(library_path):
|
||||
dirnames[:] = sorted(d for d in dirnames if not d.startswith("."))
|
||||
dir = Path(dirpath)
|
||||
rel_dir = str(dir.relative_to(root)) if dir != root else "."
|
||||
found_in_dir = 0
|
||||
|
||||
rel = str(file_path.relative_to(root))
|
||||
seen_paths.add(rel)
|
||||
for filename in sorted(f for f in filenames if not f.startswith(".")):
|
||||
file_path = dir / filename
|
||||
media_type = classify(file_path)
|
||||
if not media_type:
|
||||
continue
|
||||
|
||||
if rel in db_items:
|
||||
item = db_items[rel]
|
||||
if item.missing:
|
||||
item.missing = False
|
||||
item.updated_at = datetime.utcnow()
|
||||
else:
|
||||
file_hash = hash_file(file_path)
|
||||
# Check if this hash matches an orphaned (missing) item — file was moved while offline
|
||||
moved = await _find_by_hash(library_id, file_hash, db)
|
||||
if moved:
|
||||
moved.rel_path = rel
|
||||
moved.filename = file_path.name
|
||||
moved.missing = False
|
||||
moved.updated_at = datetime.utcnow()
|
||||
rel = str(file_path.relative_to(root))
|
||||
seen_paths.add(rel)
|
||||
found_in_dir += 1
|
||||
|
||||
if rel in db_items:
|
||||
item = db_items[rel]
|
||||
if item.missing:
|
||||
item.missing = False
|
||||
item.updated_at = datetime.utcnow()
|
||||
else:
|
||||
item = MediaItem(
|
||||
library_id=library_id,
|
||||
rel_path=rel,
|
||||
filename=file_path.name,
|
||||
file_hash=file_hash,
|
||||
media_type=media_type,
|
||||
size_bytes=file_path.stat().st_size,
|
||||
missing=False,
|
||||
)
|
||||
db.add(item)
|
||||
file_hash = await loop.run_in_executor(None, hash_file, file_path)
|
||||
moved = await _find_by_hash(library_id, file_hash, db)
|
||||
if moved:
|
||||
moved.rel_path = rel
|
||||
moved.filename = file_path.name
|
||||
moved.missing = False
|
||||
moved.updated_at = datetime.utcnow()
|
||||
else:
|
||||
db.add(MediaItem(
|
||||
library_id=library_id,
|
||||
rel_path=rel,
|
||||
filename=file_path.name,
|
||||
file_hash=file_hash,
|
||||
media_type=media_type,
|
||||
size_bytes=file_path.stat().st_size,
|
||||
missing=False,
|
||||
))
|
||||
|
||||
log.info("Scanned directory %s — %d media file(s) found", rel_dir, found_in_dir)
|
||||
total_dirs += 1
|
||||
|
||||
# Mark items no longer on disk as missing
|
||||
for rel_path, item in db_items.items():
|
||||
if rel_path not in seen_paths and not item.missing:
|
||||
item.missing = True
|
||||
item.updated_at = datetime.utcnow()
|
||||
|
||||
await db.commit()
|
||||
log.info(
|
||||
"Scan complete for library %d — %d director%s, %d media file(s) indexed",
|
||||
library_id, total_dirs, "y" if total_dirs == 1 else "ies", len(seen_paths),
|
||||
)
|
||||
|
||||
|
||||
async def _find_by_hash(library_id: int, file_hash: str, db: AsyncSession) -> MediaItem | None:
|
||||
|
||||
Reference in New Issue
Block a user