Backend: - Complete FastAPI backend with 42+ REST endpoints (auth, servers, config, players, bans, missions, mods, games, system) - Game adapter architecture with Arma 3 as first-class adapter - WebSocket real-time events for status, metrics, logs, players - Background thread system (process monitor, metrics, log tail, RCon poller) - Fernet encryption for sensitive config fields at rest - JWT auth with admin/viewer roles, bcrypt password hashing - SQLite with WAL mode, parameterized queries, migration system - APScheduler cleanup jobs for logs, metrics, events Frontend: - Server Detail page with 7 tabs (overview, config, players, bans, missions, mods, logs) - Settings page with password change and admin user management - Create Server wizard (4-step; known bug: silent validation failure) - New hooks: useServerDetail, useAuth, useGames - New components: ServerHeader, ConfigEditor, PlayerTable, BanTable, MissionList, ModList, LogViewer, PasswordChange, UserManager - WebSocket onEvent callback for real-time log accumulation - 120 unit tests passing (Vitest + React Testing Library) Docs: - Added .gitignore, CLAUDE.md, README.md - Updated FRONTEND.md, ARCHITECTURE.md with current implementation state - Added .env.example for backend configuration Known issues: - Create Server form: "Next" buttons don't validate before advancing, causing silent submit failure when fields are invalid - Config sub-tabs need UX redesign for non-technical users
158 lines
5.2 KiB
Python
158 lines
5.2 KiB
Python
"""
|
|
ProcessMonitorThread — watches a running game server process.
|
|
|
|
Responsibilities:
|
|
1. Detect when the process exits unexpectedly (crash).
|
|
2. On crash: update server status to "crashed" in DB, emit a crash event.
|
|
3. If auto_restart is enabled on the server record: trigger restart.
|
|
4. Respect max_restarts — if exceeded, leave server in "crashed" state.
|
|
|
|
Poll interval: 5 seconds.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import queue
|
|
|
|
from core.dal.event_repository import EventRepository
|
|
from core.dal.server_repository import ServerRepository
|
|
from core.threads.base_thread import BaseServerThread
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_POLL_INTERVAL = 5.0
|
|
|
|
|
|
class ProcessMonitorThread(BaseServerThread):
|
|
"""
|
|
Monitors the OS process for a running game server.
|
|
|
|
Args:
|
|
server_id: Database server ID.
|
|
process_manager: ProcessManager singleton (injected).
|
|
broadcast_queue: Optional queue.Queue for crash notifications.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
server_id: int,
|
|
process_manager,
|
|
broadcast_queue=None,
|
|
) -> None:
|
|
super().__init__(server_id, "ProcessMonitor")
|
|
self._process_manager = process_manager
|
|
self._broadcast_queue = broadcast_queue
|
|
|
|
# ── Main loop ──
|
|
|
|
def _run_loop(self) -> None:
|
|
self._stop_event.wait(timeout=_POLL_INTERVAL)
|
|
|
|
if self._stop_event.is_set():
|
|
return
|
|
|
|
if not self._process_manager.is_running(self.server_id):
|
|
self._handle_unexpected_exit()
|
|
# After handling, stop this monitor — the server is no longer running
|
|
self._fatal_error = True
|
|
|
|
# ── Crash handling ──
|
|
|
|
def _handle_unexpected_exit(self) -> None:
|
|
if self._db is None:
|
|
return
|
|
|
|
server_repo = ServerRepository(self._db)
|
|
event_repo = EventRepository(self._db)
|
|
|
|
server = server_repo.get_by_id(self.server_id)
|
|
if server is None:
|
|
return
|
|
|
|
# Only treat as crash if the server was supposed to be running
|
|
if server["status"] not in ("running", "starting"):
|
|
return
|
|
|
|
logger.warning(
|
|
"[%s] Server %d process exited unexpectedly (status was '%s')",
|
|
self.name, self.server_id, server["status"],
|
|
)
|
|
|
|
# Increment crash counter
|
|
server_repo.increment_restart_count(self.server_id)
|
|
restart_count = server["restart_count"] + 1
|
|
max_restarts = server.get("max_restarts", 3)
|
|
|
|
# Record crash event
|
|
event_repo.insert(
|
|
server_id=self.server_id,
|
|
event_type="crash",
|
|
detail={"restart_count": restart_count},
|
|
)
|
|
|
|
should_restart = (
|
|
server.get("auto_restart", False)
|
|
and restart_count <= max_restarts
|
|
)
|
|
|
|
if should_restart:
|
|
server_repo.update_status(self.server_id, "restarting")
|
|
event_repo.insert(
|
|
server_id=self.server_id,
|
|
event_type="restart_scheduled",
|
|
detail={"attempt": restart_count, "max": max_restarts},
|
|
)
|
|
else:
|
|
server_repo.update_status(self.server_id, "crashed")
|
|
if restart_count > max_restarts:
|
|
event_repo.insert(
|
|
server_id=self.server_id,
|
|
event_type="restart_limit_reached",
|
|
detail={"restart_count": restart_count, "max_restarts": max_restarts},
|
|
)
|
|
|
|
try:
|
|
self._db.commit()
|
|
except Exception as exc:
|
|
logger.error("[%s] DB commit failed during crash handling: %s", self.name, exc)
|
|
self._db.rollback()
|
|
|
|
if self._broadcast_queue is not None:
|
|
try:
|
|
self._broadcast_queue.put_nowait({
|
|
"type": "server_status",
|
|
"server_id": self.server_id,
|
|
"data": {
|
|
"status": "restarting" if should_restart else "crashed",
|
|
"restart_count": restart_count,
|
|
},
|
|
})
|
|
except queue.Full:
|
|
logger.debug("[%s] Broadcast queue full, dropping server_status event", self.name)
|
|
|
|
# Trigger actual restart outside DB work
|
|
if should_restart:
|
|
self._trigger_restart()
|
|
|
|
def _trigger_restart(self) -> None:
|
|
"""
|
|
Calls ServerService.start() to restart the server.
|
|
This is safe to call from a background thread.
|
|
"""
|
|
try:
|
|
from database import get_thread_db
|
|
from core.servers.service import ServerService
|
|
|
|
db = get_thread_db()
|
|
try:
|
|
service = ServerService(db)
|
|
service.start(self.server_id)
|
|
except Exception as exc:
|
|
logger.error("[%s] Auto-restart start() failed: %s", self.name, exc, exc_info=True)
|
|
finally:
|
|
try:
|
|
db.close()
|
|
except Exception as exc:
|
|
logger.debug("[%s] Error closing restart DB connection: %s", self.name, exc)
|
|
except Exception as exc:
|
|
logger.error("[%s] Auto-restart failed: %s", self.name, exc, exc_info=True) |