Files
languard-servers-manager/backend/core/threads/metrics_collector.py
Tran G. (Revernomad) Khoa 6511353b55 feat: implement full backend + frontend server detail, settings, and create server pages
Backend:
- Complete FastAPI backend with 42+ REST endpoints (auth, servers, config,
  players, bans, missions, mods, games, system)
- Game adapter architecture with Arma 3 as first-class adapter
- WebSocket real-time events for status, metrics, logs, players
- Background thread system (process monitor, metrics, log tail, RCon poller)
- Fernet encryption for sensitive config fields at rest
- JWT auth with admin/viewer roles, bcrypt password hashing
- SQLite with WAL mode, parameterized queries, migration system
- APScheduler cleanup jobs for logs, metrics, events

Frontend:
- Server Detail page with 7 tabs (overview, config, players, bans,
  missions, mods, logs)
- Settings page with password change and admin user management
- Create Server wizard (4-step; known bug: silent validation failure)
- New hooks: useServerDetail, useAuth, useGames
- New components: ServerHeader, ConfigEditor, PlayerTable, BanTable,
  MissionList, ModList, LogViewer, PasswordChange, UserManager
- WebSocket onEvent callback for real-time log accumulation
- 120 unit tests passing (Vitest + React Testing Library)

Docs:
- Added .gitignore, CLAUDE.md, README.md
- Updated FRONTEND.md, ARCHITECTURE.md with current implementation state
- Added .env.example for backend configuration

Known issues:
- Create Server form: "Next" buttons don't validate before advancing,
  causing silent submit failure when fields are invalid
- Config sub-tabs need UX redesign for non-technical users
2026-04-17 11:58:34 +07:00

118 lines
3.9 KiB
Python

"""
MetricsCollectorThread — collects CPU and memory usage for a server process
and persists to the metrics table every COLLECTION_INTERVAL seconds.
Uses psutil to inspect the process identified by ProcessManager.get_pid().
If the process is not running, the thread sleeps and retries.
"""
from __future__ import annotations
import logging
import queue
import psutil
from core.dal.metrics_repository import MetricsRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_COLLECTION_INTERVAL = 10.0
_RETENTION_DAYS = 1
class MetricsCollectorThread(BaseServerThread):
"""
Collects process metrics for a running game server.
Args:
server_id: Database server ID.
process_manager: ProcessManager singleton instance.
broadcast_queue: Optional queue.Queue for real-time metric pushes.
"""
def __init__(
self,
server_id: int,
process_manager,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "MetricsCollector")
self._process_manager = process_manager
self._broadcast_queue = broadcast_queue
self._psutil_process = None
self._samples_since_cleanup = 0
self._cleanup_every = 360 # ~1 hour at 10s intervals
# ── Main loop ──
def _run_loop(self) -> None:
pid = self._process_manager.get_pid(self.server_id)
if pid is None:
self._psutil_process = None
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
return
# Reuse or create psutil.Process handle
if self._psutil_process is None or self._psutil_process.pid != pid:
try:
self._psutil_process = psutil.Process(pid)
self._psutil_process.cpu_percent(interval=None)
except psutil.NoSuchProcess:
self._psutil_process = None
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
return
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
if self._stop_event.is_set():
return
try:
cpu_pct = self._psutil_process.cpu_percent(interval=None)
mem_info = self._psutil_process.memory_info()
mem_mb = round(mem_info.rss / (1024 * 1024), 2)
except psutil.NoSuchProcess:
logger.info("[%s] Process %d no longer exists", self.name, pid)
self._psutil_process = None
return
except psutil.AccessDenied as exc:
logger.warning("[%s] Access denied reading process %d: %s", self.name, pid, exc)
return
if self._db is None:
return
metrics_repo = MetricsRepository(self._db)
metrics_repo.insert(
server_id=self.server_id,
cpu_percent=cpu_pct,
ram_mb=mem_mb,
)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed: %s", self.name, exc)
self._db.rollback()
return
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "metrics",
"server_id": self.server_id,
"data": {"cpu_percent": cpu_pct, "memory_mb": mem_mb},
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping metrics event", self.name)
# Periodic cleanup
self._samples_since_cleanup += 1
if self._samples_since_cleanup >= self._cleanup_every:
self._samples_since_cleanup = 0
try:
metrics_repo.cleanup_old(server_id=self.server_id, retention_days=_RETENTION_DAYS)
self._db.commit()
except Exception as exc:
logger.warning("[%s] Cleanup failed: %s", self.name, exc)
self._db.rollback()