""" MetricsCollectorThread — collects CPU and memory usage for a server process and persists to the metrics table every COLLECTION_INTERVAL seconds. Uses psutil to inspect the process identified by ProcessManager.get_pid(). If the process is not running, the thread sleeps and retries. """ from __future__ import annotations import logging import queue import psutil from core.dal.metrics_repository import MetricsRepository from core.threads.base_thread import BaseServerThread logger = logging.getLogger(__name__) _COLLECTION_INTERVAL = 10.0 _RETENTION_DAYS = 1 class MetricsCollectorThread(BaseServerThread): """ Collects process metrics for a running game server. Args: server_id: Database server ID. process_manager: ProcessManager singleton instance. broadcast_queue: Optional queue.Queue for real-time metric pushes. """ def __init__( self, server_id: int, process_manager, broadcast_queue=None, ) -> None: super().__init__(server_id, "MetricsCollector") self._process_manager = process_manager self._broadcast_queue = broadcast_queue self._psutil_process = None self._samples_since_cleanup = 0 self._cleanup_every = 360 # ~1 hour at 10s intervals # ── Main loop ── def _run_loop(self) -> None: pid = self._process_manager.get_pid(self.server_id) if pid is None: self._psutil_process = None self._stop_event.wait(timeout=_COLLECTION_INTERVAL) return # Reuse or create psutil.Process handle if self._psutil_process is None or self._psutil_process.pid != pid: try: self._psutil_process = psutil.Process(pid) self._psutil_process.cpu_percent(interval=None) except psutil.NoSuchProcess: self._psutil_process = None self._stop_event.wait(timeout=_COLLECTION_INTERVAL) return self._stop_event.wait(timeout=_COLLECTION_INTERVAL) if self._stop_event.is_set(): return try: cpu_pct = self._psutil_process.cpu_percent(interval=None) mem_info = self._psutil_process.memory_info() mem_mb = round(mem_info.rss / (1024 * 1024), 2) except psutil.NoSuchProcess: logger.info("[%s] Process %d no longer exists", self.name, pid) self._psutil_process = None return except psutil.AccessDenied as exc: logger.warning("[%s] Access denied reading process %d: %s", self.name, pid, exc) return if self._db is None: return metrics_repo = MetricsRepository(self._db) metrics_repo.insert( server_id=self.server_id, cpu_percent=cpu_pct, ram_mb=mem_mb, ) try: self._db.commit() except Exception as exc: logger.error("[%s] DB commit failed: %s", self.name, exc) self._db.rollback() return if self._broadcast_queue is not None: try: self._broadcast_queue.put_nowait({ "type": "metrics", "server_id": self.server_id, "data": {"cpu_percent": cpu_pct, "memory_mb": mem_mb}, }) except queue.Full: logger.debug("[%s] Broadcast queue full, dropping metrics event", self.name) # Periodic cleanup self._samples_since_cleanup += 1 if self._samples_since_cleanup >= self._cleanup_every: self._samples_since_cleanup = 0 try: metrics_repo.cleanup_old(server_id=self.server_id, retention_days=_RETENTION_DAYS) self._db.commit() except Exception as exc: logger.warning("[%s] Cleanup failed: %s", self.name, exc) self._db.rollback()