feat: implement full backend + frontend server detail, settings, and create server pages

Backend:
- Complete FastAPI backend with 42+ REST endpoints (auth, servers, config,
  players, bans, missions, mods, games, system)
- Game adapter architecture with Arma 3 as first-class adapter
- WebSocket real-time events for status, metrics, logs, players
- Background thread system (process monitor, metrics, log tail, RCon poller)
- Fernet encryption for sensitive config fields at rest
- JWT auth with admin/viewer roles, bcrypt password hashing
- SQLite with WAL mode, parameterized queries, migration system
- APScheduler cleanup jobs for logs, metrics, events

Frontend:
- Server Detail page with 7 tabs (overview, config, players, bans,
  missions, mods, logs)
- Settings page with password change and admin user management
- Create Server wizard (4-step; known bug: silent validation failure)
- New hooks: useServerDetail, useAuth, useGames
- New components: ServerHeader, ConfigEditor, PlayerTable, BanTable,
  MissionList, ModList, LogViewer, PasswordChange, UserManager
- WebSocket onEvent callback for real-time log accumulation
- 120 unit tests passing (Vitest + React Testing Library)

Docs:
- Added .gitignore, CLAUDE.md, README.md
- Updated FRONTEND.md, ARCHITECTURE.md with current implementation state
- Added .env.example for backend configuration

Known issues:
- Create Server form: "Next" buttons don't validate before advancing,
  causing silent submit failure when fields are invalid
- Config sub-tabs need UX redesign for non-technical users
This commit is contained in:
Tran G. (Revernomad) Khoa
2026-04-17 11:58:34 +07:00
parent 620429c9b8
commit 6511353b55
119 changed files with 13752 additions and 5000 deletions

View File

@@ -0,0 +1,3 @@
from core.threads.thread_registry import ThreadRegistry
__all__ = ["ThreadRegistry"]

View File

@@ -0,0 +1,123 @@
"""
BaseServerThread — base class for all per-server background threads.
Rules every subclass MUST follow:
- Call super().__init__(server_id, name) in __init__
- Implement _run_loop() — called repeatedly until _stop_event is set
- Do NOT override run() directly
- Use self._db for all database operations — it is a thread-local connection
- Call self._close_db() in your finally block if you open additional connections
- Exceptions raised from _run_loop() are caught, logged, and the loop continues
unless the exception is a fatal error — set self._fatal_error = True to stop
"""
from __future__ import annotations
import logging
import threading
from abc import ABC, abstractmethod
from database import get_thread_db
logger = logging.getLogger(__name__)
_EXCEPTION_BACKOFF_BASE = 2.0
_EXCEPTION_BACKOFF_MAX = 60.0
_EXCEPTION_BACKOFF_MULTIPLIER = 2.0
class BaseServerThread(ABC, threading.Thread):
"""
Abstract base for all per-server background threads.
Subclasses implement _run_loop(). This base class handles:
- Stop event signaling
- Thread-local DB connection lifecycle
- Exception backoff to prevent tight crash loops
- Structured logging with server_id context
"""
def __init__(self, server_id: int, name: str) -> None:
super().__init__(name=f"{name}-server-{server_id}", daemon=True)
self.server_id = server_id
self._stop_event = threading.Event()
self._fatal_error = False
self._db = None
self._exception_count = 0
# ── Public API ──
def stop(self) -> None:
"""Signal the thread to stop. Does not block."""
self._stop_event.set()
def stop_and_join(self, timeout: float = 5.0) -> None:
"""Signal stop and wait for the thread to exit."""
self._stop_event.set()
self.join(timeout=timeout)
@property
def is_stopping(self) -> bool:
return self._stop_event.is_set()
# ── Thread entry point ──
def run(self) -> None:
logger.info("[%s] Starting", self.name)
backoff = _EXCEPTION_BACKOFF_BASE
try:
self._db = get_thread_db()
self._on_start()
while not self._stop_event.is_set() and not self._fatal_error:
try:
self._run_loop()
backoff = _EXCEPTION_BACKOFF_BASE
self._exception_count = 0
except Exception as exc:
self._exception_count += 1
logger.error(
"[%s] Unhandled exception in _run_loop (count=%d): %s",
self.name, self._exception_count, exc, exc_info=True,
)
if self._fatal_error:
break
self._stop_event.wait(timeout=backoff)
backoff = min(backoff * _EXCEPTION_BACKOFF_MULTIPLIER, _EXCEPTION_BACKOFF_MAX)
except Exception as exc:
logger.critical("[%s] Fatal error in thread setup: %s", self.name, exc, exc_info=True)
finally:
self._on_stop()
self._close_db()
logger.info("[%s] Stopped", self.name)
# ── Hooks for subclasses ──
def _on_start(self) -> None:
"""Called once before the loop starts. Override for setup."""
def _on_stop(self) -> None:
"""Called once after the loop ends. Override for cleanup."""
@abstractmethod
def _run_loop(self) -> None:
"""
Implement the thread's work here.
Called repeatedly until stop() is called or _fatal_error is set.
Should block for a short period (sleep or wait) to avoid busy-looping.
"""
# ── Internal helpers ──
def _close_db(self) -> None:
if self._db is not None:
try:
self._db.close()
except Exception as exc:
logger.debug("[%s] Error closing DB connection: %s", self.name, exc)
self._db = None
def _sleep(self, seconds: float) -> None:
"""Interruptible sleep — wakes up early if stop() is called."""
self._stop_event.wait(timeout=seconds)

View File

@@ -0,0 +1,167 @@
"""
LogTailThread — tails a server's log file, parses lines via LogParser,
and persists parsed entries to the logs table.
Design notes:
- Opens the log file in text mode with errors="replace" to handle encoding issues
- Detects log rotation by checking if the inode changes (Unix) or file shrinks (Windows)
- On rotation: closes old handle, reopens from position 0
- Flushes inserts in batches of up to LOG_BATCH_SIZE per loop iteration
"""
from __future__ import annotations
import logging
import os
import queue
from pathlib import Path
from typing import Callable, Optional
from core.dal.log_repository import LogRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_LOG_BATCH_SIZE = 50
_POLL_INTERVAL = 1.0
_REOPEN_DELAY = 2.0
class LogTailThread(BaseServerThread):
"""
Tails a log file for a specific server.
Args:
server_id: The database server ID.
log_path: Absolute path to the log file to tail.
log_parser: LogParser adapter instance for this game type.
broadcast_queue: Optional queue.Queue to push parsed events to BroadcastThread.
"""
def __init__(
self,
server_id: int,
log_path: str,
log_parser,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "LogTail")
self._log_path = log_path
self._log_parser = log_parser
self._broadcast_queue = broadcast_queue
self._file_handle = None
self._last_inode = None
self._last_size = 0
# ── Lifecycle ──
def _on_start(self) -> None:
self._open_log_file()
def _on_stop(self) -> None:
self._close_file()
# ── Main loop ──
def _run_loop(self) -> None:
if self._file_handle is None:
self._stop_event.wait(timeout=_POLL_INTERVAL)
self._open_log_file()
return
if self._detect_rotation():
logger.info("[%s] Log rotation detected, reopening", self.name)
self._close_file()
self._stop_event.wait(timeout=_REOPEN_DELAY)
self._open_log_file()
return
lines_read = 0
entries_to_insert = []
while lines_read < _LOG_BATCH_SIZE:
line = self._file_handle.readline()
if not line:
break
lines_read += 1
line = line.rstrip("\n").rstrip("\r")
if not line:
continue
parsed = self._log_parser.parse_line(line)
if parsed is not None:
entries_to_insert.append(parsed)
if entries_to_insert and self._db is not None:
log_repo = LogRepository(self._db)
for entry in entries_to_insert:
log_repo.insert(server_id=self.server_id, entry=entry)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed: %s", self.name, exc)
self._db.rollback()
if self._broadcast_queue is not None:
for entry in entries_to_insert:
try:
self._broadcast_queue.put_nowait({
"type": "log",
"server_id": self.server_id,
"data": entry,
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping log event", self.name)
if lines_read == 0:
self._stop_event.wait(timeout=_POLL_INTERVAL)
# ── File management ──
def _open_log_file(self) -> None:
if not os.path.exists(self._log_path):
return
try:
self._file_handle = open(
self._log_path, "r", encoding="utf-8", errors="replace"
)
# Start tailing from the end of the file
self._file_handle.seek(0, 2)
self._last_size = self._file_handle.tell()
stat = os.stat(self._log_path)
self._last_inode = getattr(stat, "st_ino", None)
logger.debug("[%s] Opened log file: %s", self.name, self._log_path)
except OSError as exc:
logger.warning("[%s] Cannot open log file %s: %s", self.name, self._log_path, exc)
self._file_handle = None
def _close_file(self) -> None:
if self._file_handle is not None:
try:
self._file_handle.close()
except OSError as exc:
logger.debug("[%s] Error closing log file: %s", self.name, exc)
self._file_handle = None
self._last_inode = None
self._last_size = 0
def _detect_rotation(self) -> bool:
"""Returns True if the log file has been rotated."""
try:
stat = os.stat(self._log_path)
except OSError:
return True
current_inode = getattr(stat, "st_ino", None)
if current_inode is not None and self._last_inode is not None:
if current_inode != self._last_inode:
return True
# Windows fallback: file shrunk
current_size = stat.st_size
if self._file_handle is not None:
current_pos = self._file_handle.tell()
if current_size < current_pos:
return True
self._last_size = current_size
return False

View File

@@ -0,0 +1,118 @@
"""
MetricsCollectorThread — collects CPU and memory usage for a server process
and persists to the metrics table every COLLECTION_INTERVAL seconds.
Uses psutil to inspect the process identified by ProcessManager.get_pid().
If the process is not running, the thread sleeps and retries.
"""
from __future__ import annotations
import logging
import queue
import psutil
from core.dal.metrics_repository import MetricsRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_COLLECTION_INTERVAL = 10.0
_RETENTION_DAYS = 1
class MetricsCollectorThread(BaseServerThread):
"""
Collects process metrics for a running game server.
Args:
server_id: Database server ID.
process_manager: ProcessManager singleton instance.
broadcast_queue: Optional queue.Queue for real-time metric pushes.
"""
def __init__(
self,
server_id: int,
process_manager,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "MetricsCollector")
self._process_manager = process_manager
self._broadcast_queue = broadcast_queue
self._psutil_process = None
self._samples_since_cleanup = 0
self._cleanup_every = 360 # ~1 hour at 10s intervals
# ── Main loop ──
def _run_loop(self) -> None:
pid = self._process_manager.get_pid(self.server_id)
if pid is None:
self._psutil_process = None
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
return
# Reuse or create psutil.Process handle
if self._psutil_process is None or self._psutil_process.pid != pid:
try:
self._psutil_process = psutil.Process(pid)
self._psutil_process.cpu_percent(interval=None)
except psutil.NoSuchProcess:
self._psutil_process = None
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
return
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
if self._stop_event.is_set():
return
try:
cpu_pct = self._psutil_process.cpu_percent(interval=None)
mem_info = self._psutil_process.memory_info()
mem_mb = round(mem_info.rss / (1024 * 1024), 2)
except psutil.NoSuchProcess:
logger.info("[%s] Process %d no longer exists", self.name, pid)
self._psutil_process = None
return
except psutil.AccessDenied as exc:
logger.warning("[%s] Access denied reading process %d: %s", self.name, pid, exc)
return
if self._db is None:
return
metrics_repo = MetricsRepository(self._db)
metrics_repo.insert(
server_id=self.server_id,
cpu_percent=cpu_pct,
ram_mb=mem_mb,
)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed: %s", self.name, exc)
self._db.rollback()
return
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "metrics",
"server_id": self.server_id,
"data": {"cpu_percent": cpu_pct, "memory_mb": mem_mb},
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping metrics event", self.name)
# Periodic cleanup
self._samples_since_cleanup += 1
if self._samples_since_cleanup >= self._cleanup_every:
self._samples_since_cleanup = 0
try:
metrics_repo.cleanup_old(server_id=self.server_id, retention_days=_RETENTION_DAYS)
self._db.commit()
except Exception as exc:
logger.warning("[%s] Cleanup failed: %s", self.name, exc)
self._db.rollback()

View File

@@ -0,0 +1,158 @@
"""
ProcessMonitorThread — watches a running game server process.
Responsibilities:
1. Detect when the process exits unexpectedly (crash).
2. On crash: update server status to "crashed" in DB, emit a crash event.
3. If auto_restart is enabled on the server record: trigger restart.
4. Respect max_restarts — if exceeded, leave server in "crashed" state.
Poll interval: 5 seconds.
"""
from __future__ import annotations
import logging
import queue
from core.dal.event_repository import EventRepository
from core.dal.server_repository import ServerRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_POLL_INTERVAL = 5.0
class ProcessMonitorThread(BaseServerThread):
"""
Monitors the OS process for a running game server.
Args:
server_id: Database server ID.
process_manager: ProcessManager singleton (injected).
broadcast_queue: Optional queue.Queue for crash notifications.
"""
def __init__(
self,
server_id: int,
process_manager,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "ProcessMonitor")
self._process_manager = process_manager
self._broadcast_queue = broadcast_queue
# ── Main loop ──
def _run_loop(self) -> None:
self._stop_event.wait(timeout=_POLL_INTERVAL)
if self._stop_event.is_set():
return
if not self._process_manager.is_running(self.server_id):
self._handle_unexpected_exit()
# After handling, stop this monitor — the server is no longer running
self._fatal_error = True
# ── Crash handling ──
def _handle_unexpected_exit(self) -> None:
if self._db is None:
return
server_repo = ServerRepository(self._db)
event_repo = EventRepository(self._db)
server = server_repo.get_by_id(self.server_id)
if server is None:
return
# Only treat as crash if the server was supposed to be running
if server["status"] not in ("running", "starting"):
return
logger.warning(
"[%s] Server %d process exited unexpectedly (status was '%s')",
self.name, self.server_id, server["status"],
)
# Increment crash counter
server_repo.increment_restart_count(self.server_id)
restart_count = server["restart_count"] + 1
max_restarts = server.get("max_restarts", 3)
# Record crash event
event_repo.insert(
server_id=self.server_id,
event_type="crash",
detail={"restart_count": restart_count},
)
should_restart = (
server.get("auto_restart", False)
and restart_count <= max_restarts
)
if should_restart:
server_repo.update_status(self.server_id, "restarting")
event_repo.insert(
server_id=self.server_id,
event_type="restart_scheduled",
detail={"attempt": restart_count, "max": max_restarts},
)
else:
server_repo.update_status(self.server_id, "crashed")
if restart_count > max_restarts:
event_repo.insert(
server_id=self.server_id,
event_type="restart_limit_reached",
detail={"restart_count": restart_count, "max_restarts": max_restarts},
)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed during crash handling: %s", self.name, exc)
self._db.rollback()
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "server_status",
"server_id": self.server_id,
"data": {
"status": "restarting" if should_restart else "crashed",
"restart_count": restart_count,
},
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping server_status event", self.name)
# Trigger actual restart outside DB work
if should_restart:
self._trigger_restart()
def _trigger_restart(self) -> None:
"""
Calls ServerService.start() to restart the server.
This is safe to call from a background thread.
"""
try:
from database import get_thread_db
from core.servers.service import ServerService
db = get_thread_db()
try:
service = ServerService(db)
service.start(self.server_id)
except Exception as exc:
logger.error("[%s] Auto-restart start() failed: %s", self.name, exc, exc_info=True)
finally:
try:
db.close()
except Exception as exc:
logger.debug("[%s] Error closing restart DB connection: %s", self.name, exc)
except Exception as exc:
logger.error("[%s] Auto-restart failed: %s", self.name, exc, exc_info=True)

View File

@@ -0,0 +1,169 @@
"""
RemoteAdminPollerThread — polls the game server's remote admin interface
(e.g. BattlEye RCon for Arma3) to sync the player list.
Design notes:
- Uses the RemoteAdminClient protocol injected at construction time
- Reconnects automatically on disconnect with exponential backoff
- Persists current player list to players table via PlayerRepository
- Emits player_join / player_leave events via EventRepository
- Pushes player list updates to broadcast_queue if provided
Poll interval: 30 seconds.
Reconnect backoff: 5s -> 10s -> 20s -> 40s -> 60s (cap).
"""
from __future__ import annotations
import logging
import queue
from core.dal.event_repository import EventRepository
from core.dal.player_repository import PlayerRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_POLL_INTERVAL = 30.0
_RECONNECT_BACKOFF_BASE = 5.0
_RECONNECT_BACKOFF_MAX = 60.0
_RECONNECT_BACKOFF_MULT = 2.0
class RemoteAdminPollerThread(BaseServerThread):
"""
Polls the remote admin interface for a game server.
Args:
server_id: Database server ID.
remote_admin_client: Connected RemoteAdminClient instance.
broadcast_queue: Optional queue.Queue for player list pushes.
"""
def __init__(
self,
server_id: int,
remote_admin_client,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "RemoteAdminPoller")
self._client = remote_admin_client
self._broadcast_queue = broadcast_queue
self._connected = False
self._reconnect_backoff = _RECONNECT_BACKOFF_BASE
self._known_players: dict[str, dict] = {} # player_uid -> player data
# ── Lifecycle ──
def _on_stop(self) -> None:
if self._connected and self._client is not None:
try:
self._client.disconnect()
except Exception as exc:
logger.debug("[%s] Error disconnecting remote admin on stop: %s", self.name, exc)
self._connected = False
# ── Main loop ──
def _run_loop(self) -> None:
if not self._connected:
self._attempt_connect()
return
self._stop_event.wait(timeout=_POLL_INTERVAL)
if self._stop_event.is_set():
return
try:
players = self._client.get_players()
self._reconnect_backoff = _RECONNECT_BACKOFF_BASE
self._sync_players(players)
except Exception as exc:
logger.warning("[%s] Poll failed: %s — will reconnect", self.name, exc)
self._connected = False
try:
if self._client is not None:
self._client.disconnect()
except Exception as exc:
logger.debug("[%s] Error disconnecting after poll failure: %s", self.name, exc)
# ── Connection management ──
def _attempt_connect(self) -> None:
try:
self._client.connect() if hasattr(self._client, "connect") else None
self._connected = True
self._reconnect_backoff = _RECONNECT_BACKOFF_BASE
logger.info("[%s] Connected to remote admin", self.name)
except Exception as exc:
logger.warning(
"[%s] Connection failed: %s — retrying in %.1fs",
self.name, exc, self._reconnect_backoff,
)
self._stop_event.wait(timeout=self._reconnect_backoff)
self._reconnect_backoff = min(
self._reconnect_backoff * _RECONNECT_BACKOFF_MULT,
_RECONNECT_BACKOFF_MAX,
)
# ── Player sync ──
def _sync_players(self, current_players: list[dict]) -> None:
"""
Diff current_players against self._known_players.
Insert join events for new players, leave events for departed ones.
Upsert all current players in the DB.
Each player dict must have at least: slot_id, name (other fields optional).
"""
if self._db is None:
return
player_repo = PlayerRepository(self._db)
event_repo = EventRepository(self._db)
# Build uid sets for diffing — use slot_id as key
current_slots = {str(p.get("slot_id", i)): p for i, p in enumerate(current_players)}
current_keys = set(current_slots.keys())
known_keys = set(self._known_players.keys())
joined = current_keys - known_keys
left = known_keys - current_keys
for slot_key, player in current_slots.items():
player_repo.upsert(server_id=self.server_id, player=player)
if slot_key in joined:
event_repo.insert(
server_id=self.server_id,
event_type="player_join",
detail={"name": player.get("name", ""), "slot": slot_key},
)
logger.debug("[%s] Player joined: %s (slot %s)", self.name, player.get("name"), slot_key)
for slot_key in left:
departed = self._known_players[slot_key]
event_repo.insert(
server_id=self.server_id,
event_type="player_leave",
detail={"name": departed.get("name", ""), "slot": slot_key},
)
logger.debug("[%s] Player left: %s (slot %s)", self.name, departed.get("name"), slot_key)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed during player sync: %s", self.name, exc)
self._db.rollback()
# Update known players
self._known_players = current_slots
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "players",
"server_id": self.server_id,
"data": current_players,
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping players event", self.name)

View File

@@ -0,0 +1,257 @@
"""
ThreadRegistry — manages the lifecycle of all per-server background threads.
One instance is created at app startup and stored in app.state.thread_registry.
Also provides class-level methods for convenience (called from ServerService).
Thread set per server:
- LogTailThread (started if adapter has "log_parser" capability and log_path is known)
- MetricsCollectorThread (always started)
- ProcessMonitorThread (always started)
- RemoteAdminPollerThread (started only if adapter has "remote_admin" capability)
Key methods:
start_server_threads(server_id, db) — start all threads for a server
stop_server_threads(server_id) — stop all threads for a server
reattach_server_threads(server_id, db) — re-attach threads without restarting process
stop_all() — called at app shutdown
"""
from __future__ import annotations
import logging
import queue
from adapters.registry import GameAdapterRegistry
from core.dal.config_repository import ConfigRepository
from core.dal.server_repository import ServerRepository
from core.threads.log_tail import LogTailThread
from core.threads.metrics_collector import MetricsCollectorThread
from core.threads.process_monitor import ProcessMonitorThread
from core.threads.remote_admin_poller import RemoteAdminPollerThread
logger = logging.getLogger(__name__)
# Module-level singleton for convenience (used by ServerService)
_instance: ThreadRegistry | None = None
class ThreadRegistry:
"""
Manages all background threads for all running servers.
"""
def __init__(
self,
process_manager,
adapter_registry: GameAdapterRegistry | None = None,
global_broadcast_queue: queue.Queue | None = None,
) -> None:
self._process_manager = process_manager
self._adapter_registry = adapter_registry or GameAdapterRegistry
self._broadcast_queue = global_broadcast_queue or queue.Queue(maxsize=1000)
self._bundles: dict[int, dict] = {} # server_id -> thread bundle
# ── Class-level convenience API ──
@classmethod
def _get_instance(cls) -> "ThreadRegistry | None":
return _instance
@classmethod
def set_instance(cls, registry: "ThreadRegistry") -> None:
global _instance
_instance = registry
@classmethod
def start_server_threads(cls, server_id: int, db) -> None:
"""Class-level convenience — starts threads for a server using the singleton."""
registry = cls._get_instance()
if registry is not None:
registry._start_server_threads(server_id, db)
@classmethod
def stop_server_threads(cls, server_id: int) -> None:
"""Class-level convenience — stops threads for a server using the singleton."""
registry = cls._get_instance()
if registry is not None:
registry._stop_server_threads(server_id)
@classmethod
def reattach_server_threads(cls, server_id: int, db) -> None:
"""Class-level convenience — re-attaches threads for a recovered server."""
registry = cls._get_instance()
if registry is not None:
registry._reattach_server_threads(server_id, db)
@classmethod
def stop_all(cls) -> None:
"""Class-level convenience — stops all threads."""
registry = cls._get_instance()
if registry is not None:
registry._stop_all()
# ── Instance methods ──
def _start_server_threads(self, server_id: int, db) -> None:
if server_id in self._bundles:
logger.warning(
"ThreadRegistry: threads already exist for server %d — stopping first",
server_id,
)
self._stop_server_threads(server_id)
bundle = self._build_bundle(server_id, db)
self._bundles[server_id] = bundle
self._start_bundle(server_id, bundle)
def _stop_server_threads(self, server_id: int) -> None:
bundle = self._bundles.pop(server_id, None)
if bundle is None:
return
self._stop_bundle(server_id, bundle)
def _reattach_server_threads(self, server_id: int, db) -> None:
logger.info("ThreadRegistry: reattaching threads for server %d", server_id)
self._start_server_threads(server_id, db)
def _stop_all(self) -> None:
server_ids = list(self._bundles.keys())
for server_id in server_ids:
self._stop_server_threads(server_id)
logger.info("ThreadRegistry: all threads stopped")
def get_thread_count(self, server_id: int) -> int:
"""Returns the number of running threads for a server."""
bundle = self._bundles.get(server_id)
if bundle is None:
return 0
return sum(
1
for key in ("log_tail", "metrics", "monitor", "rcon_poller")
if bundle.get(key) is not None and bundle[key].is_alive()
)
# ── Bundle construction ──
def _build_bundle(self, server_id: int, db) -> dict:
"""Reads server + config data from DB and constructs (but does not start) the thread bundle."""
server_repo = ServerRepository(db)
config_repo = ConfigRepository(db)
server = server_repo.get_by_id(server_id)
if server is None:
raise ValueError(f"Server {server_id} not found in database")
game_type = server["game_type"]
adapter = self._adapter_registry.get(game_type)
# Log path: read from config if present, else use adapter default
log_path = None
if adapter.has_capability("log_parser"):
log_parser = adapter.get_log_parser()
# Try to resolve log path via the adapter's log file resolver
from core.utils.file_utils import get_server_dir
server_dir = get_server_dir(server_id)
if server_dir.exists():
resolver = log_parser.get_log_file_resolver(server_id)
resolved = resolver(server_dir)
if resolved is not None:
log_path = str(resolved)
bundle: dict = {
"log_tail": None,
"metrics": None,
"monitor": None,
"rcon_poller": None,
}
# Always: ProcessMonitorThread
bundle["monitor"] = ProcessMonitorThread(
server_id=server_id,
process_manager=self._process_manager,
broadcast_queue=self._broadcast_queue,
)
# Always: MetricsCollectorThread
bundle["metrics"] = MetricsCollectorThread(
server_id=server_id,
process_manager=self._process_manager,
broadcast_queue=self._broadcast_queue,
)
# Conditional: LogTailThread
if log_path and adapter.has_capability("log_parser"):
log_parser = adapter.get_log_parser()
bundle["log_tail"] = LogTailThread(
server_id=server_id,
log_path=log_path,
log_parser=log_parser,
broadcast_queue=self._broadcast_queue,
)
# Conditional: RemoteAdminPollerThread
if adapter.has_capability("remote_admin"):
remote_admin = adapter.get_remote_admin()
if remote_admin is not None:
# Get RCon password from config
rcon_password = self._get_remote_admin_password(server_id, config_repo)
if rcon_password:
try:
rcon_port = server.get("rcon_port") or server.get("game_port", 0) + 1
client = remote_admin.create_client(
host="127.0.0.1",
port=rcon_port,
password=rcon_password,
)
bundle["rcon_poller"] = RemoteAdminPollerThread(
server_id=server_id,
remote_admin_client=client,
broadcast_queue=self._broadcast_queue,
)
except Exception as exc:
logger.warning(
"ThreadRegistry: could not create RCon client for server %d: %s",
server_id, exc,
)
return bundle
def _start_bundle(self, server_id: int, bundle: dict) -> None:
started = []
for key in ("monitor", "metrics", "log_tail", "rcon_poller"):
thread = bundle.get(key)
if thread is not None:
thread.start()
started.append(key)
logger.info("ThreadRegistry: started threads for server %d: %s", server_id, started)
def _stop_bundle(self, server_id: int, bundle: dict) -> None:
for key in ("rcon_poller", "log_tail", "metrics", "monitor"):
thread = bundle.get(key)
if thread is not None and thread.is_alive():
thread.stop_and_join(timeout=5.0)
logger.info("ThreadRegistry: stopped all threads for server %d", server_id)
# ── Helpers ──
def _get_remote_admin_password(
self, server_id: int, config_repo: ConfigRepository
) -> str | None:
"""Read the RCon password from the rcon config section."""
# Need to decrypt sensitive fields
from adapters.registry import GameAdapterRegistry
try:
server = ServerRepository(config_repo._db).get_by_id(server_id)
if server is None:
return None
adapter = self._adapter_registry.get(server["game_type"])
config_gen = adapter.get_config_generator()
sensitive = config_gen.get_sensitive_fields("rcon") if "rcon" in config_gen.get_sections() else []
except Exception as exc:
logger.debug("Could not determine sensitive fields for RCon config: %s", exc)
sensitive = []
rcon_section = config_repo.get_section(server_id, "rcon", sensitive)
if rcon_section is None:
return None
return rcon_section.get("password") or None