initial commit
This commit is contained in:
91
backend/app/services/scanner.py
Normal file
91
backend/app/services/scanner.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from app.models import MediaItem
|
||||
|
||||
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".avif", ".heic"}
|
||||
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".mov", ".avi", ".webm", ".m4v", ".flv", ".wmv", ".ts"}
|
||||
|
||||
|
||||
def classify(path: Path) -> str | None:
|
||||
ext = path.suffix.lower()
|
||||
if ext in IMAGE_EXTENSIONS:
|
||||
return "image"
|
||||
if ext in VIDEO_EXTENSIONS:
|
||||
return "video"
|
||||
return None
|
||||
|
||||
|
||||
def hash_file(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
async def scan_library(library_id: int, library_path: str, db: AsyncSession) -> None:
|
||||
root = Path(library_path)
|
||||
existing = await db.execute(
|
||||
select(MediaItem).where(MediaItem.library_id == library_id)
|
||||
)
|
||||
db_items = {item.rel_path: item for item in existing.scalars().all()}
|
||||
|
||||
seen_paths: set[str] = set()
|
||||
|
||||
for file_path in root.rglob("*"):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
media_type = classify(file_path)
|
||||
if not media_type:
|
||||
continue
|
||||
|
||||
rel = str(file_path.relative_to(root))
|
||||
seen_paths.add(rel)
|
||||
|
||||
if rel in db_items:
|
||||
item = db_items[rel]
|
||||
if item.missing:
|
||||
item.missing = False
|
||||
item.updated_at = datetime.utcnow()
|
||||
else:
|
||||
file_hash = hash_file(file_path)
|
||||
# Check if this hash matches an orphaned (missing) item — file was moved while offline
|
||||
moved = await _find_by_hash(library_id, file_hash, db)
|
||||
if moved:
|
||||
moved.rel_path = rel
|
||||
moved.filename = file_path.name
|
||||
moved.missing = False
|
||||
moved.updated_at = datetime.utcnow()
|
||||
else:
|
||||
item = MediaItem(
|
||||
library_id=library_id,
|
||||
rel_path=rel,
|
||||
filename=file_path.name,
|
||||
file_hash=file_hash,
|
||||
media_type=media_type,
|
||||
size_bytes=file_path.stat().st_size,
|
||||
missing=False,
|
||||
)
|
||||
db.add(item)
|
||||
|
||||
# Mark items no longer on disk as missing
|
||||
for rel_path, item in db_items.items():
|
||||
if rel_path not in seen_paths and not item.missing:
|
||||
item.missing = True
|
||||
item.updated_at = datetime.utcnow()
|
||||
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def _find_by_hash(library_id: int, file_hash: str, db: AsyncSession) -> MediaItem | None:
|
||||
result = await db.execute(
|
||||
select(MediaItem).where(
|
||||
MediaItem.library_id == library_id,
|
||||
MediaItem.file_hash == file_hash,
|
||||
MediaItem.missing == True, # noqa: E712
|
||||
)
|
||||
)
|
||||
return result.scalars().first()
|
||||
Reference in New Issue
Block a user