Files
languard-servers-manager/backend/core/threads/process_monitor.py
Tran G. (Revernomad) Khoa 6511353b55 feat: implement full backend + frontend server detail, settings, and create server pages
Backend:
- Complete FastAPI backend with 42+ REST endpoints (auth, servers, config,
  players, bans, missions, mods, games, system)
- Game adapter architecture with Arma 3 as first-class adapter
- WebSocket real-time events for status, metrics, logs, players
- Background thread system (process monitor, metrics, log tail, RCon poller)
- Fernet encryption for sensitive config fields at rest
- JWT auth with admin/viewer roles, bcrypt password hashing
- SQLite with WAL mode, parameterized queries, migration system
- APScheduler cleanup jobs for logs, metrics, events

Frontend:
- Server Detail page with 7 tabs (overview, config, players, bans,
  missions, mods, logs)
- Settings page with password change and admin user management
- Create Server wizard (4-step; known bug: silent validation failure)
- New hooks: useServerDetail, useAuth, useGames
- New components: ServerHeader, ConfigEditor, PlayerTable, BanTable,
  MissionList, ModList, LogViewer, PasswordChange, UserManager
- WebSocket onEvent callback for real-time log accumulation
- 120 unit tests passing (Vitest + React Testing Library)

Docs:
- Added .gitignore, CLAUDE.md, README.md
- Updated FRONTEND.md, ARCHITECTURE.md with current implementation state
- Added .env.example for backend configuration

Known issues:
- Create Server form: "Next" buttons don't validate before advancing,
  causing silent submit failure when fields are invalid
- Config sub-tabs need UX redesign for non-technical users
2026-04-17 11:58:34 +07:00

158 lines
5.2 KiB
Python

"""
ProcessMonitorThread — watches a running game server process.
Responsibilities:
1. Detect when the process exits unexpectedly (crash).
2. On crash: update server status to "crashed" in DB, emit a crash event.
3. If auto_restart is enabled on the server record: trigger restart.
4. Respect max_restarts — if exceeded, leave server in "crashed" state.
Poll interval: 5 seconds.
"""
from __future__ import annotations
import logging
import queue
from core.dal.event_repository import EventRepository
from core.dal.server_repository import ServerRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_POLL_INTERVAL = 5.0
class ProcessMonitorThread(BaseServerThread):
"""
Monitors the OS process for a running game server.
Args:
server_id: Database server ID.
process_manager: ProcessManager singleton (injected).
broadcast_queue: Optional queue.Queue for crash notifications.
"""
def __init__(
self,
server_id: int,
process_manager,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "ProcessMonitor")
self._process_manager = process_manager
self._broadcast_queue = broadcast_queue
# ── Main loop ──
def _run_loop(self) -> None:
self._stop_event.wait(timeout=_POLL_INTERVAL)
if self._stop_event.is_set():
return
if not self._process_manager.is_running(self.server_id):
self._handle_unexpected_exit()
# After handling, stop this monitor — the server is no longer running
self._fatal_error = True
# ── Crash handling ──
def _handle_unexpected_exit(self) -> None:
if self._db is None:
return
server_repo = ServerRepository(self._db)
event_repo = EventRepository(self._db)
server = server_repo.get_by_id(self.server_id)
if server is None:
return
# Only treat as crash if the server was supposed to be running
if server["status"] not in ("running", "starting"):
return
logger.warning(
"[%s] Server %d process exited unexpectedly (status was '%s')",
self.name, self.server_id, server["status"],
)
# Increment crash counter
server_repo.increment_restart_count(self.server_id)
restart_count = server["restart_count"] + 1
max_restarts = server.get("max_restarts", 3)
# Record crash event
event_repo.insert(
server_id=self.server_id,
event_type="crash",
detail={"restart_count": restart_count},
)
should_restart = (
server.get("auto_restart", False)
and restart_count <= max_restarts
)
if should_restart:
server_repo.update_status(self.server_id, "restarting")
event_repo.insert(
server_id=self.server_id,
event_type="restart_scheduled",
detail={"attempt": restart_count, "max": max_restarts},
)
else:
server_repo.update_status(self.server_id, "crashed")
if restart_count > max_restarts:
event_repo.insert(
server_id=self.server_id,
event_type="restart_limit_reached",
detail={"restart_count": restart_count, "max_restarts": max_restarts},
)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed during crash handling: %s", self.name, exc)
self._db.rollback()
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "server_status",
"server_id": self.server_id,
"data": {
"status": "restarting" if should_restart else "crashed",
"restart_count": restart_count,
},
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping server_status event", self.name)
# Trigger actual restart outside DB work
if should_restart:
self._trigger_restart()
def _trigger_restart(self) -> None:
"""
Calls ServerService.start() to restart the server.
This is safe to call from a background thread.
"""
try:
from database import get_thread_db
from core.servers.service import ServerService
db = get_thread_db()
try:
service = ServerService(db)
service.start(self.server_id)
except Exception as exc:
logger.error("[%s] Auto-restart start() failed: %s", self.name, exc, exc_info=True)
finally:
try:
db.close()
except Exception as exc:
logger.debug("[%s] Error closing restart DB connection: %s", self.name, exc)
except Exception as exc:
logger.error("[%s] Auto-restart failed: %s", self.name, exc, exc_info=True)