feat: implement full backend + frontend server detail, settings, and create server pages
Backend: - Complete FastAPI backend with 42+ REST endpoints (auth, servers, config, players, bans, missions, mods, games, system) - Game adapter architecture with Arma 3 as first-class adapter - WebSocket real-time events for status, metrics, logs, players - Background thread system (process monitor, metrics, log tail, RCon poller) - Fernet encryption for sensitive config fields at rest - JWT auth with admin/viewer roles, bcrypt password hashing - SQLite with WAL mode, parameterized queries, migration system - APScheduler cleanup jobs for logs, metrics, events Frontend: - Server Detail page with 7 tabs (overview, config, players, bans, missions, mods, logs) - Settings page with password change and admin user management - Create Server wizard (4-step; known bug: silent validation failure) - New hooks: useServerDetail, useAuth, useGames - New components: ServerHeader, ConfigEditor, PlayerTable, BanTable, MissionList, ModList, LogViewer, PasswordChange, UserManager - WebSocket onEvent callback for real-time log accumulation - 120 unit tests passing (Vitest + React Testing Library) Docs: - Added .gitignore, CLAUDE.md, README.md - Updated FRONTEND.md, ARCHITECTURE.md with current implementation state - Added .env.example for backend configuration Known issues: - Create Server form: "Next" buttons don't validate before advancing, causing silent submit failure when fields are invalid - Config sub-tabs need UX redesign for non-technical users
This commit is contained in:
158
backend/core/threads/process_monitor.py
Normal file
158
backend/core/threads/process_monitor.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
ProcessMonitorThread — watches a running game server process.
|
||||
|
||||
Responsibilities:
|
||||
1. Detect when the process exits unexpectedly (crash).
|
||||
2. On crash: update server status to "crashed" in DB, emit a crash event.
|
||||
3. If auto_restart is enabled on the server record: trigger restart.
|
||||
4. Respect max_restarts — if exceeded, leave server in "crashed" state.
|
||||
|
||||
Poll interval: 5 seconds.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import queue
|
||||
|
||||
from core.dal.event_repository import EventRepository
|
||||
from core.dal.server_repository import ServerRepository
|
||||
from core.threads.base_thread import BaseServerThread
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_POLL_INTERVAL = 5.0
|
||||
|
||||
|
||||
class ProcessMonitorThread(BaseServerThread):
|
||||
"""
|
||||
Monitors the OS process for a running game server.
|
||||
|
||||
Args:
|
||||
server_id: Database server ID.
|
||||
process_manager: ProcessManager singleton (injected).
|
||||
broadcast_queue: Optional queue.Queue for crash notifications.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
server_id: int,
|
||||
process_manager,
|
||||
broadcast_queue=None,
|
||||
) -> None:
|
||||
super().__init__(server_id, "ProcessMonitor")
|
||||
self._process_manager = process_manager
|
||||
self._broadcast_queue = broadcast_queue
|
||||
|
||||
# ── Main loop ──
|
||||
|
||||
def _run_loop(self) -> None:
|
||||
self._stop_event.wait(timeout=_POLL_INTERVAL)
|
||||
|
||||
if self._stop_event.is_set():
|
||||
return
|
||||
|
||||
if not self._process_manager.is_running(self.server_id):
|
||||
self._handle_unexpected_exit()
|
||||
# After handling, stop this monitor — the server is no longer running
|
||||
self._fatal_error = True
|
||||
|
||||
# ── Crash handling ──
|
||||
|
||||
def _handle_unexpected_exit(self) -> None:
|
||||
if self._db is None:
|
||||
return
|
||||
|
||||
server_repo = ServerRepository(self._db)
|
||||
event_repo = EventRepository(self._db)
|
||||
|
||||
server = server_repo.get_by_id(self.server_id)
|
||||
if server is None:
|
||||
return
|
||||
|
||||
# Only treat as crash if the server was supposed to be running
|
||||
if server["status"] not in ("running", "starting"):
|
||||
return
|
||||
|
||||
logger.warning(
|
||||
"[%s] Server %d process exited unexpectedly (status was '%s')",
|
||||
self.name, self.server_id, server["status"],
|
||||
)
|
||||
|
||||
# Increment crash counter
|
||||
server_repo.increment_restart_count(self.server_id)
|
||||
restart_count = server["restart_count"] + 1
|
||||
max_restarts = server.get("max_restarts", 3)
|
||||
|
||||
# Record crash event
|
||||
event_repo.insert(
|
||||
server_id=self.server_id,
|
||||
event_type="crash",
|
||||
detail={"restart_count": restart_count},
|
||||
)
|
||||
|
||||
should_restart = (
|
||||
server.get("auto_restart", False)
|
||||
and restart_count <= max_restarts
|
||||
)
|
||||
|
||||
if should_restart:
|
||||
server_repo.update_status(self.server_id, "restarting")
|
||||
event_repo.insert(
|
||||
server_id=self.server_id,
|
||||
event_type="restart_scheduled",
|
||||
detail={"attempt": restart_count, "max": max_restarts},
|
||||
)
|
||||
else:
|
||||
server_repo.update_status(self.server_id, "crashed")
|
||||
if restart_count > max_restarts:
|
||||
event_repo.insert(
|
||||
server_id=self.server_id,
|
||||
event_type="restart_limit_reached",
|
||||
detail={"restart_count": restart_count, "max_restarts": max_restarts},
|
||||
)
|
||||
|
||||
try:
|
||||
self._db.commit()
|
||||
except Exception as exc:
|
||||
logger.error("[%s] DB commit failed during crash handling: %s", self.name, exc)
|
||||
self._db.rollback()
|
||||
|
||||
if self._broadcast_queue is not None:
|
||||
try:
|
||||
self._broadcast_queue.put_nowait({
|
||||
"type": "server_status",
|
||||
"server_id": self.server_id,
|
||||
"data": {
|
||||
"status": "restarting" if should_restart else "crashed",
|
||||
"restart_count": restart_count,
|
||||
},
|
||||
})
|
||||
except queue.Full:
|
||||
logger.debug("[%s] Broadcast queue full, dropping server_status event", self.name)
|
||||
|
||||
# Trigger actual restart outside DB work
|
||||
if should_restart:
|
||||
self._trigger_restart()
|
||||
|
||||
def _trigger_restart(self) -> None:
|
||||
"""
|
||||
Calls ServerService.start() to restart the server.
|
||||
This is safe to call from a background thread.
|
||||
"""
|
||||
try:
|
||||
from database import get_thread_db
|
||||
from core.servers.service import ServerService
|
||||
|
||||
db = get_thread_db()
|
||||
try:
|
||||
service = ServerService(db)
|
||||
service.start(self.server_id)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] Auto-restart start() failed: %s", self.name, exc, exc_info=True)
|
||||
finally:
|
||||
try:
|
||||
db.close()
|
||||
except Exception as exc:
|
||||
logger.debug("[%s] Error closing restart DB connection: %s", self.name, exc)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] Auto-restart failed: %s", self.name, exc, exc_info=True)
|
||||
Reference in New Issue
Block a user