Pipeline: parse HTML presets, compare modlists, download from Caddy file server, create junctions/symlinks to Arma 3 Server directory. Includes update/sync flows, missing-mod reporting, OS compat layer, shared config, dep checker, comprehensive test suite (71 tests). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
147 lines
4.4 KiB
Python
147 lines
4.4 KiB
Python
"""
|
|
arma_modlist_tools.parser
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
Parse Arma 3 Launcher mod preset HTML files (.html exported from the launcher)
|
|
into plain Python dicts / lists suitable for JSON serialisation.
|
|
|
|
Typical usage::
|
|
|
|
from arma_modlist_tools.parser import parse_modlist_html, parse_modlist_dir
|
|
|
|
# single file
|
|
preset = parse_modlist_html("modlist_html/my_preset.html")
|
|
|
|
# whole folder
|
|
presets = parse_modlist_dir("modlist_html")
|
|
"""
|
|
|
|
import re
|
|
import xml.etree.ElementTree as ET
|
|
from pathlib import Path
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public types (plain dicts — keep it dependency-free)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# ModEntry:
|
|
# name : str display name from the launcher
|
|
# source : "steam" | "local" | "unknown"
|
|
# url : str | None full workshop / local path URL
|
|
# steam_id : str | None numeric workshop item ID extracted from the URL
|
|
|
|
# Preset:
|
|
# preset_name : str stem of the source filename
|
|
# source_file : str basename of the source filename
|
|
# mod_count : int
|
|
# mods : list[ModEntry]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Low-level helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_STEAM_ID_RE = re.compile(r"[?&]id=(\d+)")
|
|
|
|
|
|
def _extract_steam_id(url: str) -> str | None:
|
|
"""Return the numeric workshop item ID from a Steam URL, or None."""
|
|
m = _STEAM_ID_RE.search(url)
|
|
return m.group(1) if m else None
|
|
|
|
|
|
def _source_from_class(css_class: str) -> str:
|
|
"""Map a span CSS class to a source label."""
|
|
if "from-steam" in css_class:
|
|
return "steam"
|
|
if "from-local" in css_class:
|
|
return "local"
|
|
return "unknown"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Core parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def parse_mod_entry(tr_element: ET.Element) -> dict | None:
|
|
"""
|
|
Parse a single ``<tr data-type="ModContainer">`` element into a mod dict.
|
|
|
|
Returns ``None`` if the element does not contain a display name (i.e. it
|
|
is not a valid mod row).
|
|
"""
|
|
name: str | None = None
|
|
source: str = "unknown"
|
|
url: str | None = None
|
|
steam_id: str | None = None
|
|
|
|
for td in tr_element:
|
|
dtype = td.get("data-type")
|
|
|
|
if dtype == "DisplayName":
|
|
name = (td.text or "").strip()
|
|
continue
|
|
|
|
for span in td.iter("span"):
|
|
css = span.get("class", "")
|
|
if "from-" in css:
|
|
source = _source_from_class(css)
|
|
|
|
for a in td.iter("a"):
|
|
if a.get("data-type") == "Link":
|
|
href = (a.get("href") or "").strip()
|
|
if href:
|
|
url = href
|
|
steam_id = _extract_steam_id(href)
|
|
|
|
if name is None:
|
|
return None
|
|
|
|
return {"name": name, "source": source, "url": url, "steam_id": steam_id}
|
|
|
|
|
|
def parse_modlist_html(filepath: str | Path) -> dict:
|
|
"""
|
|
Parse an Arma 3 Launcher preset HTML file and return a preset dict.
|
|
|
|
:param filepath: Path to the ``.html`` preset file.
|
|
:returns: Dict with keys ``preset_name``, ``source_file``, ``mod_count``,
|
|
and ``mods`` (list of mod entry dicts).
|
|
:raises FileNotFoundError: If *filepath* does not exist.
|
|
:raises ET.ParseError: If the file is not valid XML/HTML.
|
|
"""
|
|
path = Path(filepath)
|
|
tree = ET.parse(path)
|
|
root = tree.getroot()
|
|
|
|
mods = []
|
|
for tr in root.iter("tr"):
|
|
if tr.get("data-type") != "ModContainer":
|
|
continue
|
|
entry = parse_mod_entry(tr)
|
|
if entry is not None:
|
|
mods.append(entry)
|
|
|
|
return {
|
|
"preset_name": path.stem,
|
|
"source_file": path.name,
|
|
"mod_count": len(mods),
|
|
"mods": mods,
|
|
}
|
|
|
|
|
|
def parse_modlist_dir(directory: str | Path) -> list[dict]:
|
|
"""
|
|
Parse all ``.html`` preset files in *directory* and return a list of
|
|
preset dicts (one per file, sorted by filename).
|
|
|
|
:param directory: Folder containing ``.html`` preset files.
|
|
:returns: List of preset dicts as returned by :func:`parse_modlist_html`.
|
|
:raises NotADirectoryError: If *directory* does not exist or is not a dir.
|
|
"""
|
|
d = Path(directory)
|
|
if not d.is_dir():
|
|
raise NotADirectoryError(f"Not a directory: {d}")
|
|
|
|
return [parse_modlist_html(f) for f in sorted(d.glob("*.html"))]
|