Files
Jackify/jackify/backend/services/download_watcher_service.py
2026-03-13 14:43:25 +00:00

139 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Watches a directory for newly downloaded files and matches them against a
list of pending manual download items by lax filename comparison.
"""
import os
import time
import logging
from dataclasses import dataclass, field
from pathlib import Path
from threading import Thread, Event
from typing import Callable, Optional
logger = logging.getLogger(__name__)
@dataclass
class WatcherConfig:
watch_directory: Path
watch_recursive: bool = False
debounce_seconds: float = 2.0
additional_dirs: list = field(default_factory=list)
class DownloadWatcherService:
"""
Monitors a directory for files that match pending download items.
Caller sets pending_items (list of dicts with at least 'file_name') and
registers an on_candidate callback that receives (Path, dict) when a
potential match is detected (after debounce, before hash validation).
"""
def __init__(self, config: WatcherConfig, on_candidate: Callable[[Path, dict], None]):
self._config = config
self._on_candidate = on_candidate
self._pending_items: list[dict] = []
self._pending_exact: list[tuple[str, dict]] = []
self._stop_event = Event()
self._thread: Optional[Thread] = None
# Track known files so we only react to new/changed ones
self._known: dict[Path, float] = {}
def set_pending_items(self, items: list[dict]) -> None:
"""Replace the pending items list. Thread-safe for simple list swap."""
self._pending_items = list(items)
self._pending_exact = [
(str(item.get('file_name', '')).lower(), item)
for item in self._pending_items
if item.get('file_name')
]
def start(self) -> None:
if self._thread and self._thread.is_alive():
return
self._stop_event.clear()
self._thread = Thread(target=self._watch_loop, daemon=True, name='DownloadWatcher')
self._thread.start()
logger.debug(f"Download watcher started on: {self._config.watch_directory}")
def stop(self) -> None:
self._stop_event.set()
if self._thread:
self._thread.join(timeout=5)
logger.debug("Download watcher stopped")
def _all_watch_dirs(self) -> list[Path]:
dirs = [self._config.watch_directory]
dirs.extend(self._config.additional_dirs)
return [d for d in dirs if d.is_dir()]
def _scan(self) -> None:
for watch_dir in self._all_watch_dirs():
try:
entries = list(watch_dir.iterdir()) if not self._config.watch_recursive else \
[p for p in watch_dir.rglob('*') if p.is_file()]
for path in entries:
if not path.is_file():
continue
# Skip browser temp files
if path.suffix in ('.part', '.crdownload', '.tmp'):
continue
try:
mtime = path.stat().st_mtime
except OSError:
continue
prev_mtime = self._known.get(path)
if prev_mtime == mtime:
continue
self._known[path] = mtime
self._check_candidate(path)
except OSError as e:
logger.debug(f"Watcher scan error on {watch_dir}: {e}")
def _check_candidate(self, path: Path) -> None:
candidate_name = path.name.lower()
# Exact filename match (case-insensitive).
for expected_name, item in self._pending_exact:
if expected_name == candidate_name:
logger.debug(f"Candidate exact match: {path.name}")
self._debounce_and_emit(path, item)
return
# Some modlist metadata stores filenames with a leading dot that browsers
# strip when saving the download. Match against the stripped expected name.
for expected_name, item in self._pending_exact:
if expected_name.lstrip('.') == candidate_name:
logger.debug(f"Candidate dot-normalized match: {path.name} -> {expected_name}")
self._debounce_and_emit(path, item)
return
def _debounce_and_emit(self, path: Path, item: dict) -> None:
def _wait_and_emit():
prev_size = -1
stable_count = 0
needed = max(1, int(self._config.debounce_seconds / 0.5))
for _ in range(needed * 4): # max ~2× debounce time
if self._stop_event.is_set():
return
time.sleep(0.5)
try:
size = path.stat().st_size
except OSError:
return
if size == prev_size:
stable_count += 1
if stable_count >= needed:
break
else:
stable_count = 0
prev_size = size
if path.exists():
self._on_candidate(path, item)
Thread(target=_wait_and_emit, daemon=True, name=f'Debounce-{path.name[:20]}').start()
def _watch_loop(self) -> None:
while not self._stop_event.is_set():
self._scan()
self._stop_event.wait(timeout=1.0)