""" arma_modlist_tools.cleaner ~~~~~~~~~~~~~~~~~~~~~~~~~~ Identify orphaned mod folders in the downloads directory. An *orphan* is a downloaded ``@ModName`` folder that is no longer referenced by any group in ``comparison.json``. This happens when the user swaps out a modlist preset and re-runs the compare step — mods that were removed from the preset remain on disk but are no longer tracked. Typical usage:: from arma_modlist_tools.cleaner import find_orphan_folders comparison = json.loads(Path("modlist_json/comparison.json").read_text()) orphans = find_orphan_folders(Path("downloads"), comparison) for o in orphans: print(o["group"], o["name"], o["size"]) """ from __future__ import annotations from pathlib import Path from .fetcher import _normalize_name as _normalize def folder_size(path: Path) -> int: """Return the total size in bytes of all files under *path* (recursive).""" return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) def find_orphan_folders( downloads: Path, comparison: dict, ) -> list[dict]: """Return a list of orphan mod folder entries. A folder ``downloads/{group}/@ModName`` is considered an orphan when its normalised name does not match any mod in *comparison* under the same group. Groups in ``downloads/`` that do not exist in *comparison* at all are treated as entirely orphaned. :param downloads: Path to the ``downloads/`` directory. :param comparison: Parsed ``comparison.json`` dict (output of :func:`~arma_modlist_tools.compare.compare_presets`). :returns: List of dicts, each with: - ``path`` — absolute :class:`~pathlib.Path` of the folder - ``group`` — group name (e.g. ``"shared"``) - ``name`` — folder name as it appears on disk (e.g. ``"@ace"``) - ``size`` — total size in bytes (recursive) """ # Build group → set-of-normalised-mod-names from comparison data known: dict[str, set[str]] = {} for mod in comparison.get("shared", {}).get("mods", []): known.setdefault("shared", set()).add(_normalize(mod["name"])) for preset, pdata in comparison.get("unique", {}).items(): for mod in pdata.get("mods", []): known.setdefault(preset, set()).add(_normalize(mod["name"])) orphans: list[dict] = [] if not downloads.is_dir(): return orphans for group_dir in sorted(downloads.iterdir()): if not group_dir.is_dir(): continue group_known = known.get(group_dir.name, set()) # empty → group removed for mod_dir in sorted(group_dir.iterdir()): if not mod_dir.is_dir() or not mod_dir.name.startswith("@"): continue if _normalize(mod_dir.name) not in group_known: orphans.append({ "path": mod_dir, "group": group_dir.name, "name": mod_dir.name, "size": folder_size(mod_dir), }) return orphans