feat: implement full backend + frontend server detail, settings, and create server pages

Backend:
- Complete FastAPI backend with 42+ REST endpoints (auth, servers, config,
  players, bans, missions, mods, games, system)
- Game adapter architecture with Arma 3 as first-class adapter
- WebSocket real-time events for status, metrics, logs, players
- Background thread system (process monitor, metrics, log tail, RCon poller)
- Fernet encryption for sensitive config fields at rest
- JWT auth with admin/viewer roles, bcrypt password hashing
- SQLite with WAL mode, parameterized queries, migration system
- APScheduler cleanup jobs for logs, metrics, events

Frontend:
- Server Detail page with 7 tabs (overview, config, players, bans,
  missions, mods, logs)
- Settings page with password change and admin user management
- Create Server wizard (4-step; known bug: silent validation failure)
- New hooks: useServerDetail, useAuth, useGames
- New components: ServerHeader, ConfigEditor, PlayerTable, BanTable,
  MissionList, ModList, LogViewer, PasswordChange, UserManager
- WebSocket onEvent callback for real-time log accumulation
- 120 unit tests passing (Vitest + React Testing Library)

Docs:
- Added .gitignore, CLAUDE.md, README.md
- Updated FRONTEND.md, ARCHITECTURE.md with current implementation state
- Added .env.example for backend configuration

Known issues:
- Create Server form: "Next" buttons don't validate before advancing,
  causing silent submit failure when fields are invalid
- Config sub-tabs need UX redesign for non-technical users
This commit is contained in:
Tran G. (Revernomad) Khoa
2026-04-17 11:58:34 +07:00
parent 620429c9b8
commit 6511353b55
119 changed files with 13752 additions and 5000 deletions

0
backend/core/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,77 @@
from typing import Annotated
from fastapi import APIRouter, Depends, Request
from sqlalchemy.engine import Connection
from core.auth.schemas import (
ChangePasswordRequest, CreateUserRequest, LoginRequest,
)
from core.auth.service import AuthService
from database import get_db
from dependencies import get_current_user, require_admin
router = APIRouter(prefix="/auth", tags=["auth"])
# Rate limiter will be attached after main.py is imported
_limiter = None
def _ok(data):
return {"success": True, "data": data, "error": None}
@router.post("/login")
def login(
request: Request,
body: LoginRequest,
db: Annotated[Connection, Depends(get_db)],
):
return _ok(AuthService(db).login(body.username, body.password))
@router.post("/logout")
def logout(user: Annotated[dict, Depends(get_current_user)]):
# Client-side token deletion. No server-side blacklist.
return _ok({"message": "Logged out"})
@router.get("/me")
def me(user: Annotated[dict, Depends(get_current_user)]):
return _ok({"id": user["id"], "username": user["username"], "role": user["role"]})
@router.put("/password")
def change_password(
body: ChangePasswordRequest,
user: Annotated[dict, Depends(get_current_user)],
db: Annotated[Connection, Depends(get_db)],
):
AuthService(db).change_password(user["id"], body.current_password, body.new_password)
return _ok({"message": "Password changed"})
@router.get("/users")
def list_users(
_admin: Annotated[dict, Depends(require_admin)],
db: Annotated[Connection, Depends(get_db)],
):
return _ok(AuthService(db).list_users())
@router.post("/users", status_code=201)
def create_user(
body: CreateUserRequest,
_admin: Annotated[dict, Depends(require_admin)],
db: Annotated[Connection, Depends(get_db)],
):
user = AuthService(db).create_user(body.username, body.password, body.role)
return _ok(user)
@router.delete("/users/{user_id}", status_code=204)
def delete_user(
user_id: int,
admin: Annotated[dict, Depends(require_admin)],
db: Annotated[Connection, Depends(get_db)],
):
AuthService(db).delete_user(user_id, admin["id"])

View File

@@ -0,0 +1,31 @@
from pydantic import BaseModel
class LoginRequest(BaseModel):
username: str
password: str
class TokenResponse(BaseModel):
access_token: str
token_type: str = "bearer"
expires_in: int
user: dict
class UserResponse(BaseModel):
id: int
username: str
role: str
created_at: str
class CreateUserRequest(BaseModel):
username: str
password: str
role: str = "viewer"
class ChangePasswordRequest(BaseModel):
current_password: str
new_password: str

View File

@@ -0,0 +1,105 @@
from __future__ import annotations
from fastapi import HTTPException, status
from sqlalchemy import text
from sqlalchemy.engine import Connection
from core.auth.utils import create_access_token, hash_password, verify_password
from config import settings
class AuthService:
def __init__(self, db: Connection):
self._db = db
def login(self, username: str, password: str) -> dict:
row = self._db.execute(
text("SELECT * FROM users WHERE username = :u"), {"u": username}
).fetchone()
if row is None or not verify_password(password, row.password_hash):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail={"code": "UNAUTHORIZED", "message": "Invalid credentials"},
)
user = dict(row._mapping)
self._db.execute(
text("UPDATE users SET last_login = datetime('now') WHERE id = :id"),
{"id": user["id"]},
)
token = create_access_token(user["id"], user["username"], user["role"])
return {
"access_token": token,
"token_type": "bearer",
"expires_in": settings.jwt_expire_hours * 3600,
"user": {"id": user["id"], "username": user["username"], "role": user["role"]},
}
def create_user(self, username: str, password: str, role: str = "viewer") -> dict:
existing = self._db.execute(
text("SELECT id FROM users WHERE username = :u"), {"u": username}
).fetchone()
if existing:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={"code": "CONFLICT", "message": f"Username '{username}' already taken"},
)
self._db.execute(
text(
"INSERT INTO users (username, password_hash, role) VALUES (:u, :ph, :r)"
),
{"u": username, "ph": hash_password(password), "r": role},
)
row = self._db.execute(
text("SELECT id, username, role, created_at FROM users WHERE username = :u"),
{"u": username},
).fetchone()
return dict(row._mapping)
def change_password(self, user_id: int, current_password: str, new_password: str) -> None:
row = self._db.execute(
text("SELECT password_hash FROM users WHERE id = :id"),
{"id": user_id},
).fetchone()
if row is None or not verify_password(current_password, row.password_hash):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail={"code": "UNAUTHORIZED", "message": "Current password is incorrect"},
)
self._db.execute(
text("UPDATE users SET password_hash = :ph WHERE id = :id"),
{"ph": hash_password(new_password), "id": user_id},
)
def list_users(self) -> list[dict]:
rows = self._db.execute(
text("SELECT id, username, role, created_at, last_login FROM users ORDER BY id")
).fetchall()
return [dict(r._mapping) for r in rows]
def delete_user(self, user_id: int, requesting_user_id: int) -> None:
if user_id == requesting_user_id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "VALIDATION_ERROR", "message": "Cannot delete yourself"},
)
self._db.execute(
text("DELETE FROM users WHERE id = :id"),
{"id": user_id},
)
def seed_admin_if_empty(self) -> str | None:
"""
Create a default admin user if no users exist.
Returns the generated password (printed to stdout on startup).
"""
count = self._db.execute(text("SELECT COUNT(*) FROM users")).fetchone()[0]
if count > 0:
return None
import secrets
password = secrets.token_urlsafe(16)
self.create_user("admin", password, "admin")
return password

View File

@@ -0,0 +1,48 @@
"""JWT creation/validation and password hashing."""
from __future__ import annotations
import logging
from datetime import datetime, timedelta, timezone
import bcrypt
from jose import JWTError, jwt
logger = logging.getLogger(__name__)
def hash_password(password: str) -> str:
"""Hash a password using bcrypt. Returns UTF-8 encoded hash string."""
password_bytes = password.encode("utf-8")
salt = bcrypt.gensalt()
hashed = bcrypt.hashpw(password_bytes, salt)
return hashed.decode("utf-8")
def verify_password(plain: str, hashed: str) -> bool:
"""Verify a plain password against a bcrypt hash."""
try:
return bcrypt.checkpw(plain.encode("utf-8"), hashed.encode("utf-8"))
except Exception as exc:
logger.warning("Password verification failed: %s", exc)
return False
def create_access_token(user_id: int, username: str, role: str) -> str:
from config import settings
expire = datetime.now(timezone.utc) + timedelta(hours=settings.jwt_expire_hours)
payload = {
"sub": str(user_id),
"username": username,
"role": role,
"exp": expire,
}
return jwt.encode(payload, settings.secret_key, algorithm="HS256")
def decode_access_token(token: str) -> dict:
"""
Decode and validate JWT. Returns payload dict.
Raises JWTError on invalid/expired token.
"""
from config import settings
return jwt.decode(token, settings.secret_key, algorithms=["HS256"])

View File

View File

@@ -0,0 +1 @@
"""Data Access Layer repositories."""

View File

@@ -0,0 +1,52 @@
import json
from datetime import datetime, timezone
from core.dal.base_repository import BaseRepository
class BanRepository(BaseRepository):
def get_all(self, server_id: int, active_only: bool = True) -> list[dict]:
if active_only:
return self._fetchall(
"SELECT * FROM bans WHERE server_id = :sid AND is_active = 1 ORDER BY banned_at DESC",
{"sid": server_id},
)
return self._fetchall(
"SELECT * FROM bans WHERE server_id = :sid ORDER BY banned_at DESC",
{"sid": server_id},
)
def create(
self,
server_id: int,
guid: str | None,
name: str | None,
reason: str | None,
banned_by: str,
expires_at: str | None = None,
game_data: dict | None = None,
) -> int:
return self._lastrowid(
"""
INSERT INTO bans (server_id, guid, name, reason, banned_by, expires_at, game_data)
VALUES (:sid, :guid, :name, :reason, :by, :exp, :gd)
""",
{
"sid": server_id,
"guid": guid,
"name": name,
"reason": reason,
"by": banned_by,
"exp": expires_at,
"gd": json.dumps(game_data or {}),
},
)
def deactivate(self, ban_id: int) -> None:
self._execute(
"UPDATE bans SET is_active = 0 WHERE id = :id",
{"id": ban_id},
)
def get_by_id(self, ban_id: int) -> dict | None:
return self._fetchone("SELECT * FROM bans WHERE id = :id", {"id": ban_id})

View File

@@ -0,0 +1,27 @@
"""Base repository with common DB helpers."""
from __future__ import annotations
from sqlalchemy import text
from sqlalchemy.engine import Connection
class BaseRepository:
def __init__(self, db: Connection):
self._db = db
def _execute(self, query: str, params: dict | None = None):
return self._db.execute(text(query), params or {})
def _fetchone(self, query: str, params: dict | None = None) -> dict | None:
row = self._db.execute(text(query), params or {}).fetchone()
if row is None:
return None
return dict(row._mapping)
def _fetchall(self, query: str, params: dict | None = None) -> list[dict]:
rows = self._db.execute(text(query), params or {}).fetchall()
return [dict(r._mapping) for r in rows]
def _lastrowid(self, query: str, params: dict | None = None) -> int:
result = self._db.execute(text(query), params or {})
return result.lastrowid

View File

@@ -0,0 +1,163 @@
"""
Manages the game_configs table.
Handles Fernet encryption/decryption of sensitive fields transparently.
"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from core.dal.base_repository import BaseRepository
from core.utils.crypto import decrypt, encrypt, is_encrypted
class ConfigRepository(BaseRepository):
def _encrypt_sensitive(
self, config: dict, sensitive_fields: list[str]
) -> dict:
"""Return new dict with sensitive fields encrypted."""
result = dict(config)
for field in sensitive_fields:
if field in result and result[field] and not is_encrypted(str(result[field])):
result[field] = encrypt(str(result[field]))
return result
def _decrypt_sensitive(
self, config: dict, sensitive_fields: list[str]
) -> dict:
"""Return new dict with sensitive fields decrypted."""
result = dict(config)
for field in sensitive_fields:
if field in result and is_encrypted(str(result[field])):
result[field] = decrypt(str(result[field]))
return result
def get_section(
self,
server_id: int,
section: str,
sensitive_fields: list[str] | None = None,
) -> dict | None:
"""Get a config section. Decrypts sensitive fields automatically."""
row = self._fetchone(
"SELECT * FROM game_configs WHERE server_id = :sid AND section = :sec",
{"sid": server_id, "sec": section},
)
if row is None:
return None
config = json.loads(row["config_json"])
if sensitive_fields:
config = self._decrypt_sensitive(config, sensitive_fields)
config["_meta"] = {
"config_version": row["config_version"],
"schema_version": row["schema_version"],
}
return config
def get_all_sections(
self,
server_id: int,
sensitive_fields_by_section: dict[str, list[str]] | None = None,
) -> dict[str, dict]:
"""Get all config sections for a server."""
rows = self._fetchall(
"SELECT * FROM game_configs WHERE server_id = :sid ORDER BY section",
{"sid": server_id},
)
result = {}
for row in rows:
config = json.loads(row["config_json"])
sf = (sensitive_fields_by_section or {}).get(row["section"], [])
if sf:
config = self._decrypt_sensitive(config, sf)
config["_meta"] = {
"config_version": row["config_version"],
"schema_version": row["schema_version"],
}
result[row["section"]] = config
return result
def upsert_section(
self,
server_id: int,
game_type: str,
section: str,
config_data: dict,
schema_version: str,
sensitive_fields: list[str] | None = None,
expected_config_version: int | None = None,
) -> int:
"""
Upsert a config section.
If expected_config_version is provided, checks optimistic lock.
Returns the new config_version.
Raises ValueError on version conflict (caller returns 409).
"""
now = datetime.now(timezone.utc).isoformat()
# Strip _meta before storing
data_to_store = {k: v for k, v in config_data.items() if k != "_meta"}
# Encrypt sensitive fields
if sensitive_fields:
data_to_store = self._encrypt_sensitive(data_to_store, sensitive_fields)
# Check if row exists
existing = self._fetchone(
"SELECT id, config_version FROM game_configs WHERE server_id = :sid AND section = :sec",
{"sid": server_id, "sec": section},
)
if existing is None:
# Insert
self._execute(
"""
INSERT INTO game_configs
(server_id, game_type, section, config_json, config_version, schema_version, updated_at)
VALUES (:sid, :gt, :sec, :json, 1, :sv, :now)
""",
{
"sid": server_id, "gt": game_type, "sec": section,
"json": json.dumps(data_to_store), "sv": schema_version, "now": now,
},
)
return 1
else:
current_version = existing["config_version"]
if expected_config_version is not None and expected_config_version != current_version:
raise ValueError(
f"CONFIG_VERSION_CONFLICT:{current_version}"
)
new_version = current_version + 1
self._execute(
"""
UPDATE game_configs
SET config_json = :json, config_version = :cv,
schema_version = :sv, updated_at = :now
WHERE server_id = :sid AND section = :sec
""",
{
"json": json.dumps(data_to_store),
"cv": new_version,
"sv": schema_version,
"now": now,
"sid": server_id,
"sec": section,
},
)
return new_version
def delete_sections(self, server_id: int) -> None:
self._execute(
"DELETE FROM game_configs WHERE server_id = :sid",
{"sid": server_id},
)
def get_raw_sections(self, server_id: int) -> dict[str, dict]:
"""Get all sections without decryption — for config file generation."""
rows = self._fetchall(
"SELECT section, config_json FROM game_configs WHERE server_id = :sid",
{"sid": server_id},
)
return {row["section"]: json.loads(row["config_json"]) for row in rows}

View File

@@ -0,0 +1,62 @@
import json
from core.dal.base_repository import BaseRepository
class EventRepository(BaseRepository):
def insert(
self,
server_id: int,
event_type: str,
actor: str = "system",
detail: dict | None = None,
) -> None:
self._execute(
"""
INSERT INTO server_events (server_id, event_type, actor, detail)
VALUES (:sid, :et, :actor, :detail)
""",
{
"sid": server_id,
"et": event_type,
"actor": actor,
"detail": json.dumps(detail) if detail else None,
},
)
def get_events(
self,
server_id: int,
limit: int = 50,
offset: int = 0,
event_type: str | None = None,
) -> list[dict]:
if event_type:
return self._fetchall(
"""
SELECT * FROM server_events
WHERE server_id = :sid AND event_type = :et
ORDER BY created_at DESC LIMIT :limit OFFSET :offset
""",
{"sid": server_id, "et": event_type, "limit": limit, "offset": offset},
)
return self._fetchall(
"""
SELECT * FROM server_events WHERE server_id = :sid
ORDER BY created_at DESC LIMIT :limit OFFSET :offset
""",
{"sid": server_id, "limit": limit, "offset": offset},
)
def get_recent_all_servers(self, limit: int = 20) -> list[dict]:
return self._fetchall(
"SELECT * FROM server_events ORDER BY created_at DESC LIMIT :limit",
{"limit": limit},
)
def cleanup_old(self, retention_days: int) -> None:
"""Delete events older than retention_days."""
self._execute(
"DELETE FROM server_events WHERE created_at < datetime('now', :delta)",
{"delta": f"-{retention_days} days"},
)

View File

@@ -0,0 +1,61 @@
from core.dal.base_repository import BaseRepository
class LogRepository(BaseRepository):
def insert(self, server_id: int, entry: dict) -> None:
"""entry = {timestamp, level, message}"""
self._execute(
"""
INSERT INTO logs (server_id, timestamp, level, message)
VALUES (:sid, :ts, :level, :msg)
""",
{
"sid": server_id,
"ts": entry.get("timestamp", ""),
"level": entry.get("level", "info"),
"msg": entry.get("message", ""),
},
)
def query(
self,
server_id: int,
limit: int = 200,
offset: int = 0,
level: str | None = None,
since: str | None = None,
search: str | None = None,
) -> tuple[int, list[dict]]:
conditions = ["server_id = :sid"]
params: dict = {"sid": server_id, "limit": limit, "offset": offset}
if level:
conditions.append("level = :level")
params["level"] = level
if since:
conditions.append("timestamp >= :since")
params["since"] = since
if search:
conditions.append("message LIKE :search")
params["search"] = f"%{search}%"
where = " AND ".join(conditions)
total_row = self._fetchone(f"SELECT COUNT(*) as cnt FROM logs WHERE {where}", params)
total = total_row["cnt"] if total_row else 0
rows = self._fetchall(
f"SELECT * FROM logs WHERE {where} ORDER BY timestamp DESC LIMIT :limit OFFSET :offset",
params,
)
return total, rows
def clear(self, server_id: int) -> int:
result = self._execute(
"DELETE FROM logs WHERE server_id = :sid", {"sid": server_id}
)
return result.rowcount
def cleanup_old(self, retention_days: int) -> None:
self._execute(
"DELETE FROM logs WHERE created_at < datetime('now', :delta)",
{"delta": f"-{retention_days} days"},
)

View File

@@ -0,0 +1,53 @@
from core.dal.base_repository import BaseRepository
class MetricsRepository(BaseRepository):
def insert(
self, server_id: int, cpu_percent: float, ram_mb: float = 0.0, player_count: int = 0
) -> None:
self._execute(
"""
INSERT INTO metrics (server_id, cpu_percent, ram_mb, player_count)
VALUES (:sid, :cpu, :ram, :pc)
""",
{"sid": server_id, "cpu": cpu_percent, "ram": ram_mb, "pc": player_count},
)
def query(
self,
server_id: int,
from_ts: str | None = None,
to_ts: str | None = None,
) -> list[dict]:
conditions = ["server_id = :sid"]
params: dict = {"sid": server_id}
if from_ts:
conditions.append("timestamp >= :from_ts")
params["from_ts"] = from_ts
if to_ts:
conditions.append("timestamp <= :to_ts")
params["to_ts"] = to_ts
where = " AND ".join(conditions)
return self._fetchall(
f"SELECT * FROM metrics WHERE {where} ORDER BY timestamp ASC",
params,
)
def get_latest(self, server_id: int) -> dict | None:
return self._fetchone(
"SELECT * FROM metrics WHERE server_id = :sid ORDER BY timestamp DESC LIMIT 1",
{"sid": server_id},
)
def cleanup_old(self, retention_days: int = 1, server_id: int | None = None) -> None:
if server_id is not None:
self._execute(
"DELETE FROM metrics WHERE server_id = :sid AND timestamp < datetime('now', :delta)",
{"sid": server_id, "delta": f"-{retention_days} days"},
)
else:
self._execute(
"DELETE FROM metrics WHERE timestamp < datetime('now', :delta)",
{"delta": f"-{retention_days} days"},
)

View File

@@ -0,0 +1,70 @@
import json
from datetime import datetime, timezone
from core.dal.base_repository import BaseRepository
class PlayerRepository(BaseRepository):
def get_all(self, server_id: int) -> list[dict]:
return self._fetchall(
"SELECT * FROM players WHERE server_id = :sid ORDER BY slot_id",
{"sid": server_id},
)
def count(self, server_id: int) -> int:
row = self._fetchone(
"SELECT COUNT(*) as cnt FROM players WHERE server_id = :sid",
{"sid": server_id},
)
return row["cnt"] if row else 0
def upsert(self, server_id: int, player: dict) -> None:
now = datetime.now(timezone.utc).isoformat()
self._execute(
"""
INSERT INTO players (server_id, slot_id, name, guid, ip, ping, game_data, joined_at, updated_at)
VALUES (:sid, :slot, :name, :guid, :ip, :ping, :gd, :now, :now)
ON CONFLICT(server_id, slot_id) DO UPDATE SET
name = excluded.name,
guid = excluded.guid,
ping = excluded.ping,
game_data = excluded.game_data,
updated_at = excluded.updated_at
""",
{
"sid": server_id,
"slot": str(player.get("slot_id", "")),
"name": player.get("name", ""),
"guid": player.get("guid"),
"ip": player.get("ip"),
"ping": player.get("ping"),
"gd": json.dumps(player.get("game_data", {})),
"now": now,
},
)
def clear(self, server_id: int) -> None:
self._execute("DELETE FROM players WHERE server_id = :sid", {"sid": server_id})
def get_history(
self,
server_id: int,
limit: int = 50,
offset: int = 0,
search: str | None = None,
) -> tuple[int, list[dict]]:
conditions = ["server_id = :sid"]
params: dict = {"sid": server_id, "limit": limit, "offset": offset}
if search:
conditions.append("name LIKE :search")
params["search"] = f"%{search}%"
where = " AND ".join(conditions)
total_row = self._fetchone(
f"SELECT COUNT(*) as cnt FROM player_history WHERE {where}", params
)
total = total_row["cnt"] if total_row else 0
rows = self._fetchall(
f"SELECT * FROM player_history WHERE {where} ORDER BY left_at DESC LIMIT :limit OFFSET :offset",
params,
)
return total, rows

View File

@@ -0,0 +1,111 @@
from __future__ import annotations
from datetime import datetime, timezone
from core.dal.base_repository import BaseRepository
class ServerRepository(BaseRepository):
def get_all(self, game_type: str | None = None) -> list[dict]:
if game_type:
return self._fetchall(
"SELECT * FROM servers WHERE game_type = :gt ORDER BY name",
{"gt": game_type},
)
return self._fetchall("SELECT * FROM servers ORDER BY name")
def get_by_id(self, server_id: int) -> dict | None:
return self._fetchone("SELECT * FROM servers WHERE id = :id", {"id": server_id})
def create(
self,
name: str,
game_type: str,
exe_path: str,
game_port: int,
rcon_port: int | None = None,
description: str | None = None,
auto_restart: bool = False,
max_restarts: int = 3,
) -> int:
return self._lastrowid(
"""
INSERT INTO servers
(name, description, game_type, exe_path, game_port, rcon_port,
auto_restart, max_restarts)
VALUES
(:name, :desc, :game_type, :exe, :gp, :rp, :ar, :mr)
""",
{
"name": name,
"desc": description,
"game_type": game_type,
"exe": exe_path,
"gp": game_port,
"rp": rcon_port,
"ar": int(auto_restart),
"mr": max_restarts,
},
)
def update(self, server_id: int, **fields) -> None:
if not fields:
return
fields["updated_at"] = datetime.now(timezone.utc).isoformat()
fields["id"] = server_id
set_clause = ", ".join(f"{k} = :{k}" for k in fields if k != "id")
self._execute(f"UPDATE servers SET {set_clause} WHERE id = :id", fields)
def update_status(
self,
server_id: int,
status: str,
pid: int | None = None,
started_at: str | None = None,
stopped_at: str | None = None,
) -> None:
now = datetime.now(timezone.utc).isoformat()
self._execute(
"""
UPDATE servers
SET status = :status, pid = :pid, started_at = :sa,
stopped_at = :sta, updated_at = :now
WHERE id = :id
""",
{
"status": status,
"pid": pid,
"sa": started_at,
"sta": stopped_at,
"now": now,
"id": server_id,
},
)
def delete(self, server_id: int) -> None:
self._execute("DELETE FROM servers WHERE id = :id", {"id": server_id})
def get_running(self) -> list[dict]:
return self._fetchall(
"SELECT * FROM servers WHERE status IN ('running', 'starting')"
)
def increment_restart_count(self, server_id: int) -> None:
now = datetime.now(timezone.utc).isoformat()
self._execute(
"""
UPDATE servers
SET restart_count = restart_count + 1,
last_restart_at = :now,
updated_at = :now
WHERE id = :id
""",
{"now": now, "id": server_id},
)
def reset_restart_count(self, server_id: int) -> None:
self._execute(
"UPDATE servers SET restart_count = 0 WHERE id = :id",
{"id": server_id},
)

View File

View File

View File

@@ -0,0 +1,70 @@
from fastapi import APIRouter, HTTPException, status
from adapters.registry import GameAdapterRegistry
router = APIRouter(prefix="/games", tags=["games"])
def _ok(data):
return {"success": True, "data": data, "error": None}
@router.get("")
def list_games():
return _ok(GameAdapterRegistry.list_game_types())
@router.get("/{game_type}")
def get_game(game_type: str):
try:
adapter = GameAdapterRegistry.get(game_type)
except KeyError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"code": "GAME_TYPE_NOT_FOUND", "message": f"Unknown game type: {game_type}"},
)
caps = []
for cap in ["config_generator", "process_config", "log_parser",
"remote_admin", "mission_manager", "mod_manager", "ban_manager"]:
if adapter.has_capability(cap):
caps.append(cap)
config_gen = adapter.get_config_generator()
sections = list(config_gen.get_sections().keys())
process_config = adapter.get_process_config()
return _ok({
"game_type": adapter.game_type,
"display_name": adapter.display_name,
"version": adapter.version,
"schema_version": config_gen.get_config_version(),
"capabilities": caps,
"config_sections": sections,
"allowed_executables": process_config.get_allowed_executables(),
})
@router.get("/{game_type}/config-schema")
def get_config_schema(game_type: str):
try:
adapter = GameAdapterRegistry.get(game_type)
except KeyError:
raise HTTPException(status_code=404, detail={"code": "GAME_TYPE_NOT_FOUND"})
config_gen = adapter.get_config_generator()
schemas = {}
for section, model_cls in config_gen.get_sections().items():
schemas[section] = model_cls.model_json_schema()
return _ok(schemas)
@router.get("/{game_type}/defaults")
def get_defaults(game_type: str):
try:
adapter = GameAdapterRegistry.get(game_type)
except KeyError:
raise HTTPException(status_code=404, detail={"code": "GAME_TYPE_NOT_FOUND"})
config_gen = adapter.get_config_generator()
defaults = {}
for section in config_gen.get_sections():
defaults[section] = config_gen.get_defaults(section)
return _ok(defaults)

View File

View File

@@ -0,0 +1,102 @@
"""
Cleanup jobs registered with APScheduler.
Jobs:
- cleanup_old_logs: Delete log entries older than 7 days, daily at 03:00
- cleanup_old_metrics: Delete metrics older than 1 day, every 6 hours
- cleanup_old_events: Delete events older than 30 days, weekly on Sunday
"""
from __future__ import annotations
import logging
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from core.jobs.scheduler import get_scheduler
from database import get_thread_db
from core.dal.log_repository import LogRepository
from core.dal.metrics_repository import MetricsRepository
from core.dal.event_repository import EventRepository
logger = logging.getLogger(__name__)
_LOG_RETENTION_DAYS = 7
_METRICS_RETENTION_DAYS = 1
_EVENT_RETENTION_DAYS = 30
def register_cleanup_jobs() -> None:
"""Register all cleanup jobs with the scheduler. Call at startup."""
sched = get_scheduler()
sched.add_job(
func=_cleanup_old_logs,
trigger=CronTrigger(hour=3, minute=0),
id="cleanup_old_logs",
name="Clean up old log entries",
replace_existing=True,
)
sched.add_job(
func=_cleanup_old_metrics,
trigger=IntervalTrigger(hours=6),
id="cleanup_old_metrics",
name="Clean up old metrics",
replace_existing=True,
)
sched.add_job(
func=_cleanup_old_events,
trigger=CronTrigger(day_of_week="sun", hour=4, minute=0),
id="cleanup_old_events",
name="Clean up old events",
replace_existing=True,
)
logger.info("Cleanup jobs registered")
def _cleanup_old_logs() -> None:
logger.info("Running log cleanup (retention=%d days)", _LOG_RETENTION_DAYS)
try:
db = get_thread_db()
try:
log_repo = LogRepository(db)
log_repo.cleanup_old(retention_days=_LOG_RETENTION_DAYS)
db.commit()
finally:
db.close()
logger.info("Log cleanup complete")
except Exception as exc:
logger.error("Log cleanup failed: %s", exc, exc_info=True)
def _cleanup_old_metrics() -> None:
logger.info("Running metrics cleanup (retention=%d days)", _METRICS_RETENTION_DAYS)
try:
db = get_thread_db()
try:
metrics_repo = MetricsRepository(db)
metrics_repo.cleanup_old(retention_days=_METRICS_RETENTION_DAYS)
db.commit()
finally:
db.close()
logger.info("Metrics cleanup complete")
except Exception as exc:
logger.error("Metrics cleanup failed: %s", exc, exc_info=True)
def _cleanup_old_events() -> None:
logger.info("Running event cleanup (retention=%d days)", _EVENT_RETENTION_DAYS)
try:
db = get_thread_db()
try:
event_repo = EventRepository(db)
event_repo.cleanup_old(retention_days=_EVENT_RETENTION_DAYS)
db.commit()
finally:
db.close()
logger.info("Event cleanup complete")
except Exception as exc:
logger.error("Event cleanup failed: %s", exc, exc_info=True)

View File

@@ -0,0 +1,40 @@
"""
APScheduler setup for background cleanup jobs.
One scheduler instance runs per process.
Jobs run in their own threads (ThreadPoolExecutor).
"""
from __future__ import annotations
import logging
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.executors.pool import ThreadPoolExecutor
logger = logging.getLogger(__name__)
_scheduler: BackgroundScheduler | None = None
def get_scheduler() -> BackgroundScheduler:
global _scheduler
if _scheduler is None:
_scheduler = BackgroundScheduler(
executors={"default": ThreadPoolExecutor(max_workers=2)},
job_defaults={"coalesce": True, "max_instances": 1},
)
return _scheduler
def start_scheduler() -> None:
sched = get_scheduler()
if not sched.running:
sched.start()
logger.info("APScheduler started")
def stop_scheduler() -> None:
global _scheduler
if _scheduler is not None and _scheduler.running:
_scheduler.shutdown(wait=False)
logger.info("APScheduler stopped")

View File

View File

View File

@@ -0,0 +1,187 @@
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT NOT NULL UNIQUE,
password_hash TEXT NOT NULL,
role TEXT NOT NULL DEFAULT 'viewer',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
last_login TEXT,
CHECK (role IN ('admin', 'viewer'))
);
CREATE TABLE IF NOT EXISTS servers (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
description TEXT,
game_type TEXT NOT NULL DEFAULT 'arma3',
status TEXT NOT NULL DEFAULT 'stopped',
pid INTEGER,
exe_path TEXT NOT NULL,
started_at TEXT,
stopped_at TEXT,
game_port INTEGER NOT NULL,
rcon_port INTEGER,
auto_restart INTEGER NOT NULL DEFAULT 0,
max_restarts INTEGER NOT NULL DEFAULT 3,
restart_window_seconds INTEGER NOT NULL DEFAULT 300,
restart_count INTEGER NOT NULL DEFAULT 0,
last_restart_at TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
CHECK (status IN ('stopped','starting','running','stopping','crashed','error')),
CHECK (game_port BETWEEN 1024 AND 65535),
CHECK (rcon_port IS NULL OR (rcon_port BETWEEN 1024 AND 65535))
);
CREATE INDEX IF NOT EXISTS idx_servers_status ON servers(status);
CREATE INDEX IF NOT EXISTS idx_servers_game_type ON servers(game_type);
CREATE INDEX IF NOT EXISTS idx_servers_game_port ON servers(game_port);
CREATE TABLE IF NOT EXISTS game_configs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
game_type TEXT NOT NULL,
section TEXT NOT NULL,
config_json TEXT NOT NULL DEFAULT '{}',
config_version INTEGER NOT NULL DEFAULT 1,
schema_version TEXT NOT NULL DEFAULT '1.0.0',
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(server_id, section)
);
CREATE INDEX IF NOT EXISTS idx_game_configs_server ON game_configs(server_id);
CREATE INDEX IF NOT EXISTS idx_game_configs_type_section ON game_configs(game_type, section);
CREATE TABLE IF NOT EXISTS mods (
id INTEGER PRIMARY KEY AUTOINCREMENT,
game_type TEXT NOT NULL,
name TEXT NOT NULL,
folder_path TEXT NOT NULL,
workshop_id TEXT,
description TEXT,
game_data TEXT DEFAULT '{}',
created_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE (game_type, folder_path)
);
CREATE TABLE IF NOT EXISTS server_mods (
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
mod_id INTEGER NOT NULL REFERENCES mods(id) ON DELETE CASCADE,
is_server_mod INTEGER NOT NULL DEFAULT 0,
sort_order INTEGER NOT NULL DEFAULT 0,
game_data TEXT DEFAULT '{}',
PRIMARY KEY (server_id, mod_id)
);
CREATE INDEX IF NOT EXISTS idx_server_mods_server ON server_mods(server_id);
CREATE TABLE IF NOT EXISTS missions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
filename TEXT NOT NULL,
mission_name TEXT NOT NULL,
terrain TEXT,
file_size INTEGER,
game_data TEXT DEFAULT '{}',
uploaded_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE (server_id, filename)
);
CREATE INDEX IF NOT EXISTS idx_missions_server ON missions(server_id);
CREATE TABLE IF NOT EXISTS mission_rotation (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
mission_id INTEGER NOT NULL REFERENCES missions(id) ON DELETE CASCADE,
sort_order INTEGER NOT NULL DEFAULT 0,
difficulty TEXT,
params_json TEXT NOT NULL DEFAULT '{}',
game_data TEXT DEFAULT '{}',
UNIQUE (server_id, sort_order)
);
CREATE INDEX IF NOT EXISTS idx_mission_rotation_server ON mission_rotation(server_id);
CREATE TABLE IF NOT EXISTS players (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
slot_id TEXT NOT NULL,
name TEXT NOT NULL,
guid TEXT,
ip TEXT,
ping INTEGER,
game_data TEXT DEFAULT '{}',
joined_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE (server_id, slot_id)
);
CREATE INDEX IF NOT EXISTS idx_players_server ON players(server_id);
CREATE TABLE IF NOT EXISTS player_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
name TEXT NOT NULL,
guid TEXT,
ip TEXT,
game_data TEXT DEFAULT '{}',
joined_at TEXT NOT NULL,
left_at TEXT NOT NULL DEFAULT (datetime('now')),
session_duration_seconds INTEGER
);
CREATE INDEX IF NOT EXISTS idx_player_history_server ON player_history(server_id);
CREATE INDEX IF NOT EXISTS idx_player_history_guid ON player_history(guid);
CREATE TABLE IF NOT EXISTS bans (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
guid TEXT,
name TEXT,
reason TEXT,
banned_by TEXT,
banned_at TEXT NOT NULL DEFAULT (datetime('now')),
expires_at TEXT,
is_active INTEGER NOT NULL DEFAULT 1,
game_data TEXT DEFAULT '{}',
CHECK (is_active IN (0, 1))
);
CREATE INDEX IF NOT EXISTS idx_bans_server ON bans(server_id);
CREATE INDEX IF NOT EXISTS idx_bans_guid ON bans(guid);
CREATE INDEX IF NOT EXISTS idx_bans_active ON bans(is_active);
CREATE TABLE IF NOT EXISTS logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
timestamp TEXT NOT NULL,
level TEXT NOT NULL DEFAULT 'info',
message TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
CHECK (level IN ('info', 'warning', 'error'))
);
CREATE INDEX IF NOT EXISTS idx_logs_server_ts ON logs(server_id, timestamp);
CREATE INDEX IF NOT EXISTS idx_logs_level ON logs(level);
CREATE INDEX IF NOT EXISTS idx_logs_created ON logs(created_at);
CREATE TABLE IF NOT EXISTS metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
timestamp TEXT NOT NULL DEFAULT (datetime('now')),
cpu_percent REAL,
ram_mb REAL,
player_count INTEGER
);
CREATE INDEX IF NOT EXISTS idx_metrics_server_ts ON metrics(server_id, timestamp);
CREATE TABLE IF NOT EXISTS server_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
server_id INTEGER NOT NULL REFERENCES servers(id) ON DELETE CASCADE,
event_type TEXT NOT NULL,
actor TEXT,
detail TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_events_server ON server_events(server_id, created_at)

View File

View File

View File

View File

@@ -0,0 +1,142 @@
"""Ban management endpoints — create, list, and revoke bans."""
from __future__ import annotations
import logging
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel, field_validator
from sqlalchemy.engine import Connection
from adapters.arma3.ban_manager import Arma3BanManager
from core.dal.ban_repository import BanRepository
from core.servers.service import ServerService
from database import get_db
from dependencies import get_current_user, require_admin
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/servers/{server_id}/bans", tags=["bans"])
def _ok(data):
return {"success": True, "data": data, "error": None}
class CreateBanRequest(BaseModel):
player_uid: str
ban_type: str = "GUID"
reason: str = ""
duration_minutes: int = 0 # 0 = permanent
@field_validator("ban_type")
@classmethod
def validate_ban_type(cls, v: str) -> str:
if v not in ("GUID", "IP"):
raise ValueError("ban_type must be 'GUID' or 'IP'")
return v
@field_validator("duration_minutes")
@classmethod
def validate_duration(cls, v: int) -> int:
if v < 0:
raise ValueError("duration_minutes cannot be negative")
return v
@router.get("")
def list_bans(
server_id: int,
db: Annotated[Connection, Depends(get_db)],
_user: Annotated[dict, Depends(get_current_user)],
) -> dict:
"""List all active bans for the server."""
ServerService(db).get_server(server_id) # raises 404 if not found
ban_repo = BanRepository(db)
bans = ban_repo.get_all(server_id=server_id)
return _ok(bans)
@router.post("", status_code=status.HTTP_201_CREATED)
def create_ban(
server_id: int,
body: CreateBanRequest,
db: Annotated[Connection, Depends(get_db)],
_admin: Annotated[dict, Depends(require_admin)],
) -> dict:
"""Create a new ban. Writes to DB and syncs to bans.txt."""
ServerService(db).get_server(server_id) # raises 404 if not found
ban_repo = BanRepository(db)
# Calculate expires_at if duration is set
expires_at = None
if body.duration_minutes > 0:
from datetime import datetime, timezone, timedelta
expires_at = (
datetime.now(timezone.utc) + timedelta(minutes=body.duration_minutes)
).isoformat()
ban_id = ban_repo.create(
server_id=server_id,
guid=body.player_uid if body.ban_type == "GUID" else None,
name=None,
reason=body.reason,
banned_by=_admin["username"],
expires_at=expires_at,
game_data={"ban_type": body.ban_type, "duration_minutes": body.duration_minutes},
)
db.commit()
ban = ban_repo.get_by_id(ban_id)
# Sync to bans.txt (non-blocking — log error but don't fail request)
_sync_ban_to_file(server_id, body.player_uid, body.ban_type, body.reason, body.duration_minutes)
return _ok(ban)
@router.delete("/{ban_id}")
def revoke_ban(
server_id: int,
ban_id: int,
db: Annotated[Connection, Depends(get_db)],
_admin: Annotated[dict, Depends(require_admin)],
) -> dict:
"""Revoke a ban (marks as inactive in DB, removes from bans.txt)."""
ServerService(db).get_server(server_id) # raises 404 if not found
ban_repo = BanRepository(db)
ban = ban_repo.get_by_id(ban_id)
if ban is None or ban["server_id"] != server_id:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"code": "NOT_FOUND", "message": "Ban not found"},
)
ban_repo.deactivate(ban_id)
db.commit()
# Remove from bans.txt
_remove_ban_from_file(server_id, ban.get("guid") or "")
return _ok({"message": f"Ban {ban_id} revoked"})
# ── File sync helpers ──
def _sync_ban_to_file(
server_id: int, identifier: str, ban_type: str, reason: str, duration_minutes: int
) -> None:
"""Write ban to bans.txt. Log error but don't fail the request."""
try:
mgr = Arma3BanManager(server_id)
mgr.add_ban(identifier, ban_type, reason, duration_minutes)
except Exception as exc:
logger.error("Failed to sync ban to bans.txt for server %d: %s", server_id, exc)
def _remove_ban_from_file(server_id: int, identifier: str) -> None:
"""Remove ban from bans.txt. Log error but don't fail the request."""
try:
mgr = Arma3BanManager(server_id)
mgr.remove_ban(identifier)
except Exception as exc:
logger.error("Failed to remove ban from bans.txt for server %d: %s", server_id, exc)

View File

@@ -0,0 +1,115 @@
"""Mission management endpoints — list, upload, delete mission files."""
from __future__ import annotations
import logging
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, status
from sqlalchemy.engine import Connection
from adapters.exceptions import AdapterError
from adapters.registry import GameAdapterRegistry
from core.servers.service import ServerService
from database import get_db
from dependencies import get_current_user, require_admin
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/servers/{server_id}/missions", tags=["missions"])
_MAX_UPLOAD_SIZE = 500 * 1024 * 1024 # 500 MB
def _ok(data):
return {"success": True, "data": data, "error": None}
def _get_mission_manager(server_id: int, game_type: str):
"""Get MissionManager for the server's game type."""
adapter = GameAdapterRegistry.get(game_type)
if not adapter.has_capability("mission_manager"):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "NOT_SUPPORTED", "message": f"Game type '{game_type}' does not support mission management"},
)
return adapter.get_mission_manager(server_id)
@router.get("")
def list_missions(
server_id: int,
db: Annotated[Connection, Depends(get_db)],
_user: Annotated[dict, Depends(get_current_user)],
) -> dict:
"""List all available mission files on disk."""
server = ServerService(db).get_server(server_id) # raises 404 if not found
mgr = _get_mission_manager(server_id, server["game_type"])
try:
missions = mgr.list_missions()
except AdapterError as exc:
raise HTTPException(status_code=500, detail={"code": "ADAPTER_ERROR", "message": str(exc)})
return _ok({
"server_id": server_id,
"missions": missions,
"total": len(missions),
})
@router.post("", status_code=status.HTTP_201_CREATED)
async def upload_mission(
server_id: int,
db: Annotated[Connection, Depends(get_db)],
_admin: Annotated[dict, Depends(require_admin)],
file: UploadFile = File(...),
) -> dict:
"""
Upload a mission .pbo file.
Max size: 500 MB.
"""
server = ServerService(db).get_server(server_id) # raises 404 if not found
if not file.filename:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "NO_FILENAME", "message": "No filename provided"},
)
content = await file.read()
if len(content) > _MAX_UPLOAD_SIZE:
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail={"code": "FILE_TOO_LARGE", "message": f"File too large. Max size is {_MAX_UPLOAD_SIZE // (1024*1024)} MB"},
)
mgr = _get_mission_manager(server_id, server["game_type"])
try:
mission = mgr.upload_mission(file.filename, content)
except AdapterError as exc:
raise HTTPException(status_code=400, detail={"code": "ADAPTER_ERROR", "message": str(exc)})
return _ok(mission)
@router.delete("/{filename}")
def delete_mission(
server_id: int,
filename: str,
db: Annotated[Connection, Depends(get_db)],
_admin: Annotated[dict, Depends(require_admin)],
) -> dict:
"""Delete a mission file by filename."""
server = ServerService(db).get_server(server_id) # raises 404 if not found
mgr = _get_mission_manager(server_id, server["game_type"])
try:
deleted = mgr.delete_mission(filename)
except AdapterError as exc:
raise HTTPException(status_code=400, detail={"code": "ADAPTER_ERROR", "message": str(exc)})
if not deleted:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"code": "NOT_FOUND", "message": f"Mission '{filename}' not found"},
)
return _ok({"message": f"Mission '{filename}' deleted"})

View File

@@ -0,0 +1,101 @@
"""Mod management endpoints — list available mods, set enabled mods."""
from __future__ import annotations
import logging
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy.engine import Connection
from adapters.exceptions import AdapterError
from adapters.registry import GameAdapterRegistry
from core.dal.config_repository import ConfigRepository
from core.servers.service import ServerService
from database import get_db
from dependencies import get_current_user, require_admin
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/servers/{server_id}/mods", tags=["mods"])
def _ok(data):
return {"success": True, "data": data, "error": None}
class SetEnabledModsRequest(BaseModel):
mods: list[str]
def _get_mod_manager(server_id: int, game_type: str):
"""Get ModManager for the server's game type."""
adapter = GameAdapterRegistry.get(game_type)
if not adapter.has_capability("mod_manager"):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "NOT_SUPPORTED", "message": f"Game type '{game_type}' does not support mod management"},
)
return adapter.get_mod_manager(server_id)
@router.get("")
def list_mods(
server_id: int,
db: Annotated[Connection, Depends(get_db)],
_user: Annotated[dict, Depends(get_current_user)],
) -> dict:
"""List all available mods and which are enabled."""
server = ServerService(db).get_server(server_id) # raises 404 if not found
mgr = _get_mod_manager(server_id, server["game_type"])
config_repo = ConfigRepository(db)
try:
available = mgr.list_available_mods()
enabled = set(mgr.get_enabled_mods(config_repo))
except AdapterError as exc:
raise HTTPException(status_code=500, detail={"code": "ADAPTER_ERROR", "message": str(exc)})
for mod in available:
mod["enabled"] = mod["name"] in enabled
return _ok({
"server_id": server_id,
"mods": available,
"enabled_count": len(enabled),
})
@router.put("/enabled")
def set_enabled_mods(
server_id: int,
body: SetEnabledModsRequest,
db: Annotated[Connection, Depends(get_db)],
_admin: Annotated[dict, Depends(require_admin)],
) -> dict:
"""
Set the list of enabled mods.
Replaces the current enabled list entirely.
Server must be restarted for changes to take effect.
"""
server = ServerService(db).get_server(server_id) # raises 404 if not found
mgr = _get_mod_manager(server_id, server["game_type"])
config_repo = ConfigRepository(db)
try:
mgr.set_enabled_mods(body.mods, config_repo)
except AdapterError as exc:
raise HTTPException(status_code=400, detail={"code": "ADAPTER_ERROR", "message": str(exc)})
except ValueError as exc:
if "CONFIG_VERSION_CONFLICT" in str(exc):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={"code": "VERSION_CONFLICT", "message": "Config was modified by another request. Please retry."},
)
raise
db.commit()
return _ok({
"message": "Enabled mods updated. Restart the server for changes to take effect.",
"enabled_mods": body.mods,
})

View File

@@ -0,0 +1,57 @@
"""Player endpoints — list current players for a running server."""
from __future__ import annotations
import logging
from typing import Annotated
from fastapi import APIRouter, Depends
from sqlalchemy.engine import Connection
from core.dal.player_repository import PlayerRepository
from core.servers.service import ServerService
from database import get_db
from dependencies import get_current_user
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/servers/{server_id}/players", tags=["players"])
def _ok(data):
return {"success": True, "data": data, "error": None}
@router.get("")
def list_players(
server_id: int,
db: Annotated[Connection, Depends(get_db)],
_user: Annotated[dict, Depends(get_current_user)],
) -> dict:
"""List current players (cached from RemoteAdminPollerThread)."""
ServerService(db).get_server(server_id) # raises 404 if not found
player_repo = PlayerRepository(db)
players = player_repo.get_all(server_id=server_id)
count = player_repo.count(server_id=server_id)
return _ok({
"server_id": server_id,
"player_count": count,
"players": players,
})
@router.get("/history")
def player_history(
server_id: int,
db: Annotated[Connection, Depends(get_db)],
_user: Annotated[dict, Depends(get_current_user)],
limit: int = 100,
offset: int = 0,
search: str | None = None,
) -> dict:
"""Get historical player sessions."""
ServerService(db).get_server(server_id) # raises 404 if not found
player_repo = PlayerRepository(db)
total, rows = player_repo.get_history(
server_id=server_id, limit=limit, offset=offset, search=search,
)
return _ok({"total": total, "items": rows})

View File

@@ -0,0 +1,243 @@
"""
ProcessManager singleton — owns all subprocess handles.
Game-agnostic: delegates exe validation and config to adapters.
"""
from __future__ import annotations
import logging
import subprocess
import threading
from pathlib import Path
import psutil
logger = logging.getLogger(__name__)
class ProcessManager:
_instance: "ProcessManager | None" = None
_init_lock = threading.Lock()
def __init__(self):
self._processes: dict[int, subprocess.Popen] = {}
self._lock = threading.Lock()
self._operation_locks: dict[int, threading.Lock] = {}
self._ops_lock = threading.Lock()
@classmethod
def get(cls) -> "ProcessManager":
if cls._instance is None:
with cls._init_lock:
if cls._instance is None:
cls._instance = ProcessManager()
return cls._instance
def get_operation_lock(self, server_id: int) -> threading.Lock:
"""Per-server lock that serializes start/stop/restart for the same server."""
with self._ops_lock:
if server_id not in self._operation_locks:
self._operation_locks[server_id] = threading.Lock()
return self._operation_locks[server_id]
def start(
self,
server_id: int,
exe_path: str,
args: list[str],
cwd: str | Path,
) -> int:
"""
Start a game server process.
Returns the PID.
cwd is set to servers/{server_id}/ so relative config paths work.
"""
with self._lock:
if server_id in self._processes:
proc = self._processes[server_id]
if proc.poll() is None:
raise RuntimeError(f"Server {server_id} is already running (PID {proc.pid})")
del self._processes[server_id]
full_cmd = [exe_path] + args
logger.info("Starting server %d: %s", server_id, ' '.join(full_cmd))
proc = subprocess.Popen(
full_cmd,
cwd=str(cwd),
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
# On Windows, don't create a new console window
creationflags=subprocess.CREATE_NO_WINDOW if hasattr(subprocess, "CREATE_NO_WINDOW") else 0,
)
with self._lock:
self._processes[server_id] = proc
logger.info("Server %d started with PID %d", server_id, proc.pid)
return proc.pid
def stop(self, server_id: int, timeout: int = 30) -> bool:
"""
Send terminate signal and wait up to timeout seconds.
On Windows, terminate() = hard kill (no SIGTERM).
Returns True if process exited, False if still running.
"""
with self._lock:
proc = self._processes.get(server_id)
if proc is None:
return True
try:
proc.terminate()
except ProcessLookupError:
return True
try:
proc.wait(timeout=timeout)
with self._lock:
self._processes.pop(server_id, None)
return True
except subprocess.TimeoutExpired:
return False
def kill(self, server_id: int) -> bool:
"""Force-kill the process immediately."""
with self._lock:
proc = self._processes.get(server_id)
if proc is None:
return True
try:
proc.kill()
proc.wait(timeout=5)
except (ProcessLookupError, subprocess.TimeoutExpired):
logger.debug("Process %d already exited or timed out during kill", server_id)
with self._lock:
self._processes.pop(server_id, None)
return True
def is_running(self, server_id: int) -> bool:
with self._lock:
proc = self._processes.get(server_id)
if proc is None:
return False
return proc.poll() is None
def get_pid(self, server_id: int) -> int | None:
with self._lock:
proc = self._processes.get(server_id)
if proc is None or proc.poll() is not None:
return None
return proc.pid
def get_process(self, server_id: int) -> subprocess.Popen | None:
with self._lock:
return self._processes.get(server_id)
def list_running(self) -> list[int]:
with self._lock:
return [sid for sid, p in self._processes.items() if p.poll() is None]
def recover_on_startup(self, db) -> None:
"""
On app restart: check DB for servers marked 'running'.
If the PID is still alive AND the process name matches the adapter's
allowed executables, re-attach monitoring threads.
Otherwise mark server as 'crashed'.
"""
from adapters.registry import GameAdapterRegistry
from core.dal.server_repository import ServerRepository
from core.dal.event_repository import EventRepository
from sqlalchemy import text
running_servers = ServerRepository(db).get_running()
for server in running_servers:
pid = server.get("pid")
if pid is None:
self._mark_crashed(server, db, "No PID recorded")
continue
# Check if PID is alive
if not psutil.pid_exists(pid):
self._mark_crashed(server, db, f"PID {pid} no longer exists")
continue
# Check process name matches adapter allowlist
try:
proc = psutil.Process(pid)
proc_name = proc.name()
adapter = GameAdapterRegistry.get(server["game_type"])
allowed = adapter.get_process_config().get_allowed_executables()
if not any(proc_name.lower() == exe.lower() for exe in allowed):
self._mark_crashed(
server, db,
f"PID {pid} has name '{proc_name}', not in allowlist {allowed}"
)
continue
except (psutil.NoSuchProcess, psutil.AccessDenied, KeyError) as e:
self._mark_crashed(server, db, str(e))
continue
# PID is valid — re-attach the process and start monitoring threads
logger.info(
"Recovering server %d (PID %d, %s)", server['id'], pid, server['game_type']
)
proc_obj = self._get_popen_for_pid(pid)
if proc_obj:
with self._lock:
self._processes[server["id"]] = proc_obj
# Re-start monitoring threads without re-launching the process
try:
from core.threads.thread_registry import ThreadRegistry
ThreadRegistry.reattach_server_threads(server["id"], db)
except Exception as e:
logger.warning("Could not re-attach threads for server %d: %s", server['id'], e)
else:
self._mark_crashed(server, db, f"Could not attach to PID {pid}")
def _mark_crashed(self, server: dict, db, reason: str) -> None:
from core.dal.server_repository import ServerRepository
from core.dal.event_repository import EventRepository
logger.warning("Server %d marked crashed on startup: %s", server['id'], reason)
ServerRepository(db).update_status(server["id"], "crashed")
EventRepository(db).insert(
server["id"], "crashed", actor="system",
detail={"reason": reason, "on_startup": True}
)
@staticmethod
def _get_popen_for_pid(pid: int) -> subprocess.Popen | None:
"""
Create a Popen-like wrapper that attaches to an existing PID.
NOTE: This is a limited wrapper — we cannot use Popen() on existing PIDs.
We use a sentinel object that wraps psutil.Process.
"""
try:
return _PsutilProcessWrapper(pid)
except (psutil.NoSuchProcess, psutil.AccessDenied):
return None
class _PsutilProcessWrapper:
"""
Minimal Popen-compatible wrapper around an existing process (by PID).
Used for startup recovery only.
"""
def __init__(self, pid: int):
self._psutil_proc = psutil.Process(pid)
self.pid = pid
def poll(self) -> int | None:
"""Return None if running, exit code if not (we use -1 for external termination)."""
if self._psutil_proc.is_running():
return None
return -1
def wait(self, timeout: int | None = None):
self._psutil_proc.wait(timeout=timeout)
def terminate(self):
self._psutil_proc.terminate()
def kill(self):
self._psutil_proc.kill()

View File

@@ -0,0 +1,233 @@
from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi.responses import Response
from pydantic import BaseModel
from sqlalchemy.engine import Connection
from core.servers.schemas import (
CreateServerRequest, StopServerRequest, UpdateServerRequest,
)
from core.servers.service import ServerService
from database import get_db
from dependencies import get_current_user, require_admin
router = APIRouter(prefix="/servers", tags=["servers"])
def _ok(data):
return {"success": True, "data": data, "error": None}
class SendCommandRequest(BaseModel):
command: str
# ── Server CRUD ──────────────────────────────────────────────────────────────
@router.get("")
def list_servers(
game_type: str | None = None,
db: Annotated[Connection, Depends(get_db)] = None,
_user: Annotated[dict, Depends(get_current_user)] = None,
):
return _ok(ServerService(db).list_servers(game_type))
@router.post("", status_code=201)
def create_server(
body: CreateServerRequest,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
return _ok(ServerService(db).create_server(
name=body.name,
game_type=body.game_type,
exe_path=body.exe_path,
game_port=body.game_port,
rcon_port=body.rcon_port,
description=body.description,
auto_restart=body.auto_restart,
max_restarts=body.max_restarts,
))
@router.get("/{server_id}")
def get_server(
server_id: int,
db: Annotated[Connection, Depends(get_db)] = None,
_user: Annotated[dict, Depends(get_current_user)] = None,
):
return _ok(ServerService(db).get_server(server_id))
@router.put("/{server_id}")
def update_server(
server_id: int,
body: UpdateServerRequest,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
return _ok(ServerService(db).update_server(server_id, **body.model_dump(exclude_none=True)))
@router.delete("/{server_id}", status_code=204)
def delete_server(
server_id: int,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
ServerService(db).delete_server(server_id)
return Response(status_code=204)
# ── Lifecycle ────────────────────────────────────────────────────────────────
@router.post("/{server_id}/start")
def start_server(
server_id: int,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
return _ok(ServerService(db).start(server_id))
@router.post("/{server_id}/stop")
def stop_server(
server_id: int,
body: StopServerRequest = None,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
force = body.force if body else False
return _ok(ServerService(db).stop(server_id, force=force))
@router.post("/{server_id}/restart")
def restart_server(
server_id: int,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
return _ok(ServerService(db).restart(server_id))
@router.post("/{server_id}/kill")
def kill_server(
server_id: int,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
return _ok(ServerService(db).kill(server_id))
# ── Config ───────────────────────────────────────────────────────────────────
@router.get("/{server_id}/config")
def get_config(
server_id: int,
db: Annotated[Connection, Depends(get_db)] = None,
_user: Annotated[dict, Depends(get_current_user)] = None,
):
return _ok(ServerService(db).get_config(server_id))
@router.get("/{server_id}/config/preview")
def get_config_preview(
server_id: int,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
return _ok(ServerService(db).get_config_preview(server_id))
@router.get("/{server_id}/config/{section}")
def get_config_section(
server_id: int,
section: str,
db: Annotated[Connection, Depends(get_db)] = None,
_user: Annotated[dict, Depends(get_current_user)] = None,
):
return _ok(ServerService(db).get_config_section(server_id, section))
@router.put("/{server_id}/config/{section}")
def update_config_section(
server_id: int,
section: str,
body: dict, # Dynamic — adapter-specific fields
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
expected_version = body.pop("config_version", None)
return _ok(ServerService(db).update_config_section(
server_id, section, body, expected_version
))
# ── RCon ──────────────────────────────────────────────────────────────────────
@router.post("/{server_id}/rcon/command")
def send_rcon_command(
server_id: int,
body: SendCommandRequest,
db: Annotated[Connection, Depends(get_db)] = None,
_admin: Annotated[dict, Depends(require_admin)] = None,
):
"""Send an RCon command to a running server."""
from adapters.registry import GameAdapterRegistry
from adapters.exceptions import RemoteAdminError
from core.dal.config_repository import ConfigRepository
from core.dal.server_repository import ServerRepository
server = ServerRepository(db).get_by_id(server_id)
if server is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"code": "NOT_FOUND", "message": f"Server {server_id} not found"},
)
adapter = GameAdapterRegistry.get(server["game_type"])
if not adapter.has_capability("remote_admin"):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "NOT_SUPPORTED", "message": f"Game type {server['game_type']} does not support RCon"},
)
# Get RCon password from config
remote_admin_factory = adapter.get_remote_admin()
config_gen = adapter.get_config_generator()
sensitive = config_gen.get_sensitive_fields("rcon") if "rcon" in config_gen.get_sections() else []
config_repo = ConfigRepository(db)
rcon_section = config_repo.get_section(server_id, "rcon", sensitive)
if not rcon_section or not rcon_section.get("password"):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "NO_RCON_PASSWORD", "message": "RCon password not configured for this server"},
)
password = rcon_section["password"]
rcon_port = server.get("rcon_port") or (server["game_port"] + 3)
client = remote_admin_factory.create_client(
host="127.0.0.1",
port=rcon_port,
password=password,
)
try:
client.connect()
result = client.send_command(body.command)
client.disconnect()
except RemoteAdminError as exc:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail={"code": "RCON_ERROR", "message": f"RCon command failed: {exc}"},
)
except Exception as exc:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail={"code": "RCON_ERROR", "message": f"RCon connection failed: {exc}"},
)
return _ok({"response": result})

View File

@@ -0,0 +1,35 @@
from __future__ import annotations
from pydantic import BaseModel, Field
class CreateServerRequest(BaseModel):
name: str
description: str | None = None
game_type: str = "arma3"
exe_path: str
game_port: int = Field(ge=1024, le=65535)
rcon_port: int | None = Field(default=None, ge=1024, le=65535)
auto_restart: bool = False
max_restarts: int = Field(default=3, ge=0, le=20)
class UpdateServerRequest(BaseModel):
name: str | None = None
description: str | None = None
exe_path: str | None = None
game_port: int | None = Field(default=None, ge=1024, le=65535)
rcon_port: int | None = Field(default=None, ge=1024, le=65535)
auto_restart: bool | None = None
max_restarts: int | None = None
class StopServerRequest(BaseModel):
force: bool = False
reason: str | None = None
class UpdateConfigSectionRequest(BaseModel):
config_version: int | None = None # Required for optimistic locking on PUT
# All other fields come from the adapter's JSON Schema — passed through as-is
model_config = {"extra": "allow"}

View File

@@ -0,0 +1,503 @@
"""
ServerService — orchestrates all server lifecycle operations.
Delegates game-specific work to the adapter.
"""
from __future__ import annotations
import logging
import shutil
from pathlib import Path
from fastapi import HTTPException, status
from sqlalchemy.engine import Connection
from adapters.registry import GameAdapterRegistry
from core.dal.config_repository import ConfigRepository
from core.dal.event_repository import EventRepository
from core.dal.server_repository import ServerRepository
from core.servers.process_manager import ProcessManager
from core.utils.file_utils import ensure_server_dirs, get_server_dir
logger = logging.getLogger(__name__)
def _ok_response(data):
return {"success": True, "data": data, "error": None}
class ServerService:
def __init__(self, db: Connection):
self._db = db
self._server_repo = ServerRepository(db)
self._config_repo = ConfigRepository(db)
self._event_repo = EventRepository(db)
# ── CRUD ──────────────────────────────────────────────────────────────────
def list_servers(self, game_type: str | None = None) -> list[dict]:
"""Return server list with live metrics merged in."""
servers = self._server_repo.get_all(game_type)
return [self._enrich_server(s) for s in servers]
def get_server(self, server_id: int) -> dict:
server = self._server_repo.get_by_id(server_id)
if server is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"code": "NOT_FOUND", "message": f"Server {server_id} not found"},
)
return self._enrich_server(server)
def _enrich_server(self, server: dict) -> dict:
"""Add live CPU/RAM/player count from DB."""
from core.dal.metrics_repository import MetricsRepository
from core.dal.player_repository import PlayerRepository
result = dict(server)
metrics = MetricsRepository(self._db).get_latest(server["id"])
if metrics:
result["cpu_percent"] = metrics["cpu_percent"]
result["ram_mb"] = metrics["ram_mb"]
else:
result["cpu_percent"] = None
result["ram_mb"] = None
result["player_count"] = PlayerRepository(self._db).count(server["id"])
return result
def create_server(
self,
name: str,
game_type: str,
exe_path: str,
game_port: int,
rcon_port: int | None = None,
description: str | None = None,
auto_restart: bool = False,
max_restarts: int = 3,
) -> dict:
# Validate adapter exists
try:
adapter = GameAdapterRegistry.get(game_type)
except KeyError:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "GAME_TYPE_NOT_FOUND", "message": f"Unknown game type: {game_type}"},
)
# Validate exe
process_config = adapter.get_process_config()
exe_name = Path(exe_path).name
if exe_name not in process_config.get_allowed_executables():
from adapters.exceptions import ExeNotAllowedError
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"code": "EXE_NOT_ALLOWED",
"message": f"Executable '{exe_name}' not allowed",
"allowed": process_config.get_allowed_executables(),
},
)
# Determine rcon_port if not provided
if rcon_port is None:
rcon_port = process_config.get_default_rcon_port(game_port)
# Check port conflicts against running servers
from core.utils.port_checker import check_ports_against_running_servers
conflicts = check_ports_against_running_servers(game_port, rcon_port, None, self._db)
if conflicts:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={
"code": "PORT_IN_USE",
"message": f"Ports already in use: {conflicts}",
},
)
# Create DB row
server_id = self._server_repo.create(
name=name,
game_type=game_type,
exe_path=exe_path,
game_port=game_port,
rcon_port=rcon_port,
description=description,
auto_restart=auto_restart,
max_restarts=max_restarts,
)
# Create directory layout
layout = process_config.get_server_dir_layout()
ensure_server_dirs(server_id, layout)
# Seed default config sections
config_gen = adapter.get_config_generator()
schema_version = config_gen.get_config_version()
for section in config_gen.get_sections():
defaults = config_gen.get_defaults(section)
sensitive = config_gen.get_sensitive_fields(section)
self._config_repo.upsert_section(
server_id=server_id,
game_type=game_type,
section=section,
config_data=defaults,
schema_version=schema_version,
sensitive_fields=sensitive,
)
self._event_repo.insert(server_id, "created", actor="admin")
return self.get_server(server_id)
def update_server(self, server_id: int, **updates) -> dict:
self.get_server(server_id) # raises 404 if not found
filtered = {k: v for k, v in updates.items() if v is not None}
if filtered:
self._server_repo.update(server_id, **filtered)
return self.get_server(server_id)
def delete_server(self, server_id: int) -> None:
server = self.get_server(server_id)
if server["status"] not in ("stopped", "crashed", "error"):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={
"code": "SERVER_NOT_STOPPED",
"message": "Server must be stopped before deletion",
},
)
self._server_repo.delete(server_id)
# Delete server directory
server_dir = get_server_dir(server_id)
if server_dir.exists():
shutil.rmtree(str(server_dir), ignore_errors=True)
# ── Lifecycle ─────────────────────────────────────────────────────────────
def start(self, server_id: int) -> dict:
"""
Full start sequence:
1. Load server + adapter
2. Validate exe
3. Check ports
4. Write config files (atomic)
5. Build launch args
6. Start process
7. Start monitoring threads
8. Return status
"""
from adapters.exceptions import (
ConfigWriteError, ExeNotAllowedError,
LaunchArgsError, ConfigValidationError,
)
from core.utils.port_checker import check_ports_against_running_servers
server = self.get_server(server_id)
if server["status"] in ("running", "starting"):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={"code": "SERVER_ALREADY_RUNNING", "message": "Server is already running"},
)
adapter = GameAdapterRegistry.get(server["game_type"])
process_config = adapter.get_process_config()
config_gen = adapter.get_config_generator()
# Validate exe
exe_name = Path(server["exe_path"]).name
if exe_name not in process_config.get_allowed_executables():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={
"code": "EXE_NOT_ALLOWED",
"message": f"Executable '{exe_name}' not in adapter allowlist",
"allowed": process_config.get_allowed_executables(),
},
)
# Check ports
conflicts = check_ports_against_running_servers(
server["game_port"], server.get("rcon_port"), server_id, self._db
)
if conflicts:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={"code": "PORT_IN_USE", "message": f"Ports in use: {conflicts}"},
)
# Load config sections (decrypt sensitive fields for config generation)
sensitive_by_section = {
s: config_gen.get_sensitive_fields(s)
for s in config_gen.get_sections()
}
sections = self._config_repo.get_all_sections(server_id, sensitive_by_section)
# Remove _meta from each section before passing to adapter
raw_sections = {
section: {k: v for k, v in data.items() if k != "_meta"}
for section, data in sections.items()
}
# Inject port into sections so build_launch_args can use it
if "_port" not in raw_sections:
raw_sections["_port"] = server["game_port"]
# Get mod args if adapter supports mods
mod_args: list[str] = []
if adapter.has_capability("mod_manager"):
from sqlalchemy import text
mods = self._db.execute(
text("""
SELECT m.folder_path, sm.is_server_mod, sm.sort_order
FROM server_mods sm JOIN mods m ON m.id = sm.mod_id
WHERE sm.server_id = :sid ORDER BY sm.sort_order
"""),
{"sid": server_id},
).fetchall()
mod_list = [dict(r._mapping) for r in mods]
mod_args = adapter.get_mod_manager().build_mod_args(mod_list)
# Write config files (atomic)
server_dir = get_server_dir(server_id)
try:
config_gen.write_configs(server_id, server_dir, raw_sections)
except ConfigWriteError as e:
self._server_repo.update_status(server_id, "error")
self._event_repo.insert(server_id, "config_write_error", detail={"error": str(e)})
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail={"code": "CONFIG_WRITE_ERROR", "message": str(e)},
)
except ConfigValidationError as e:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail={"code": "INVALID_CONFIG", "message": str(e), "errors": e.errors},
)
# Build launch args
try:
launch_args = config_gen.build_launch_args(raw_sections, mod_args)
except LaunchArgsError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail={"code": "INVALID_CONFIG", "message": str(e)},
)
# Start process
pm = ProcessManager.get()
with pm.get_operation_lock(server_id):
pid = pm.start(server_id, server["exe_path"], launch_args, cwd=str(server_dir))
# Update DB
from datetime import datetime, timezone
self._server_repo.update_status(
server_id, "starting", pid=pid,
started_at=datetime.now(timezone.utc).isoformat()
)
self._event_repo.insert(server_id, "started", detail={"pid": pid})
# Start monitoring threads
try:
from core.threads.thread_registry import ThreadRegistry
ThreadRegistry.start_server_threads(server_id, self._db)
except Exception as e:
logger.warning("Could not start monitoring threads: %s", e)
return {"status": "starting", "pid": pid}
def stop(self, server_id: int, force: bool = False) -> dict:
server = self.get_server(server_id)
if server["status"] in ("stopped", "crashed"):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={"code": "SERVER_NOT_RUNNING", "message": "Server is not running"},
)
# Mark as "stopping" so ProcessMonitorThread doesn't treat this as a crash
self._server_repo.update_status(server_id, "stopping")
# Stop monitoring threads first so they don't fight with shutdown
try:
from core.threads.thread_registry import ThreadRegistry
ThreadRegistry.stop_server_threads(server_id)
except Exception as exc:
logger.warning("Failed to stop monitoring threads for server %d during stop: %s", server_id, exc)
# Try graceful shutdown via remote admin
if not force:
try:
pm = ProcessManager.get()
logger.info("Sending graceful shutdown to server %d", server_id)
except Exception as e:
logger.warning("Graceful shutdown failed: %s, falling back to terminate", e)
pm = ProcessManager.get()
with pm.get_operation_lock(server_id):
exited = pm.stop(server_id, timeout=30)
if not exited:
logger.warning("Server %d did not exit in 30s, force-killing", server_id)
pm.kill(server_id)
from datetime import datetime, timezone
self._server_repo.update_status(
server_id, "stopped",
pid=None, stopped_at=datetime.now(timezone.utc).isoformat()
)
from core.dal.player_repository import PlayerRepository
PlayerRepository(self._db).clear(server_id)
self._event_repo.insert(server_id, "stopped")
return {"status": "stopped"}
def restart(self, server_id: int) -> dict:
self.stop(server_id)
return self.start(server_id)
def kill(self, server_id: int) -> dict:
server = self.get_server(server_id)
# Mark as "stopping" so ProcessMonitorThread doesn't treat this as a crash
self._server_repo.update_status(server_id, "stopping")
# Stop monitoring threads first
try:
from core.threads.thread_registry import ThreadRegistry
ThreadRegistry.stop_server_threads(server_id)
except Exception as exc:
logger.warning("Failed to stop monitoring threads for server %d during kill: %s", server_id, exc)
pm = ProcessManager.get()
with pm.get_operation_lock(server_id):
pm.kill(server_id)
from datetime import datetime, timezone
self._server_repo.update_status(server_id, "stopped", pid=None,
stopped_at=datetime.now(timezone.utc).isoformat())
from core.dal.player_repository import PlayerRepository
PlayerRepository(self._db).clear(server_id)
self._event_repo.insert(server_id, "killed")
return {"status": "stopped"}
# ── Config ────────────────────────────────────────────────────────────────
def get_config(self, server_id: int) -> dict:
self.get_server(server_id)
adapter = GameAdapterRegistry.get(
self._server_repo.get_by_id(server_id)["game_type"]
)
config_gen = adapter.get_config_generator()
sensitive_by_section = {
s: config_gen.get_sensitive_fields(s) for s in config_gen.get_sections()
}
sections = self._config_repo.get_all_sections(server_id, sensitive_by_section)
# Mask sensitive fields in response (replace actual value with "***")
for section, data in sections.items():
sf = config_gen.get_sensitive_fields(section)
for field in sf:
if field in data and data[field]:
data[field] = "***"
return sections
def get_config_section(self, server_id: int, section: str) -> dict:
server = self.get_server(server_id)
adapter = GameAdapterRegistry.get(server["game_type"])
config_gen = adapter.get_config_generator()
if section not in config_gen.get_sections():
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"code": "NOT_FOUND", "message": f"Config section '{section}' not found"},
)
sensitive = config_gen.get_sensitive_fields(section)
data = self._config_repo.get_section(server_id, section, sensitive)
if data is None:
data = config_gen.get_defaults(section)
data["_meta"] = {"config_version": 0, "schema_version": config_gen.get_config_version()}
# Mask sensitive fields
for field in sensitive:
if field in data and data[field]:
data[field] = "***"
return data
def update_config_section(
self,
server_id: int,
section: str,
data: dict,
expected_version: int | None = None,
) -> dict:
server = self.get_server(server_id)
adapter = GameAdapterRegistry.get(server["game_type"])
config_gen = adapter.get_config_generator()
sections = config_gen.get_sections()
if section not in sections:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail={"code": "NOT_FOUND", "message": f"Config section '{section}' not found"},
)
# Validate against adapter's Pydantic model
model_cls = sections[section]
# Get current values, merge with update (partial update support)
current = self._config_repo.get_section(
server_id, section, config_gen.get_sensitive_fields(section)
)
if current:
merged = {k: v for k, v in current.items() if k != "_meta"}
else:
merged = config_gen.get_defaults(section)
# Apply updates
for k, v in data.items():
if k not in ("_meta", "config_version"):
merged[k] = v
# Validate
try:
model_cls(**merged)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail={"code": "INVALID_CONFIG", "message": str(e)},
)
sensitive = config_gen.get_sensitive_fields(section)
try:
new_version = self._config_repo.upsert_section(
server_id=server_id,
game_type=server["game_type"],
section=section,
config_data=merged,
schema_version=config_gen.get_config_version(),
sensitive_fields=sensitive,
expected_config_version=expected_version,
)
except ValueError as e:
error_msg = str(e)
if "CONFIG_VERSION_CONFLICT" in error_msg:
current_version = int(error_msg.split(":")[1])
current_data = self.get_config_section(server_id, section)
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={
"code": "CONFIG_VERSION_CONFLICT",
"message": "Config was modified by another user. Re-read and merge.",
"current_config": current_data,
"current_version": current_version,
},
)
raise
self._event_repo.insert(
server_id, "config_updated", detail={"section": section, "version": new_version}
)
return self.get_config_section(server_id, section)
def get_config_preview(self, server_id: int) -> dict[str, str]:
server = self.get_server(server_id)
adapter = GameAdapterRegistry.get(server["game_type"])
config_gen = adapter.get_config_generator()
sensitive_by_section = {
s: config_gen.get_sensitive_fields(s) for s in config_gen.get_sections()
}
sections = self._config_repo.get_all_sections(server_id, sensitive_by_section)
raw_sections = {k: {kk: vv for kk, vv in v.items() if kk != "_meta"} for k, v in sections.items()}
server_dir = get_server_dir(server_id)
return config_gen.preview_config(server_id, server_dir, raw_sections)

View File

View File

@@ -0,0 +1,32 @@
from fastapi import APIRouter, Depends
from typing import Annotated
from dependencies import get_current_user
from adapters.registry import GameAdapterRegistry
router = APIRouter(prefix="/system", tags=["system"])
@router.get("/health")
def health():
return {"status": "ok"}
@router.get("/status")
def system_status(_user: Annotated[dict, Depends(get_current_user)]):
from sqlalchemy import text
from database import get_engine
with get_engine().connect() as db:
running = db.execute(
text("SELECT COUNT(*) FROM servers WHERE status IN ('running','starting')")
).fetchone()[0]
total = db.execute(text("SELECT COUNT(*) FROM servers")).fetchone()[0]
return {
"success": True,
"data": {
"version": "1.0.0",
"running_servers": running,
"total_servers": total,
"supported_games": [a.game_type for a in GameAdapterRegistry.all()],
},
}

View File

@@ -0,0 +1,3 @@
from core.threads.thread_registry import ThreadRegistry
__all__ = ["ThreadRegistry"]

View File

@@ -0,0 +1,123 @@
"""
BaseServerThread — base class for all per-server background threads.
Rules every subclass MUST follow:
- Call super().__init__(server_id, name) in __init__
- Implement _run_loop() — called repeatedly until _stop_event is set
- Do NOT override run() directly
- Use self._db for all database operations — it is a thread-local connection
- Call self._close_db() in your finally block if you open additional connections
- Exceptions raised from _run_loop() are caught, logged, and the loop continues
unless the exception is a fatal error — set self._fatal_error = True to stop
"""
from __future__ import annotations
import logging
import threading
from abc import ABC, abstractmethod
from database import get_thread_db
logger = logging.getLogger(__name__)
_EXCEPTION_BACKOFF_BASE = 2.0
_EXCEPTION_BACKOFF_MAX = 60.0
_EXCEPTION_BACKOFF_MULTIPLIER = 2.0
class BaseServerThread(ABC, threading.Thread):
"""
Abstract base for all per-server background threads.
Subclasses implement _run_loop(). This base class handles:
- Stop event signaling
- Thread-local DB connection lifecycle
- Exception backoff to prevent tight crash loops
- Structured logging with server_id context
"""
def __init__(self, server_id: int, name: str) -> None:
super().__init__(name=f"{name}-server-{server_id}", daemon=True)
self.server_id = server_id
self._stop_event = threading.Event()
self._fatal_error = False
self._db = None
self._exception_count = 0
# ── Public API ──
def stop(self) -> None:
"""Signal the thread to stop. Does not block."""
self._stop_event.set()
def stop_and_join(self, timeout: float = 5.0) -> None:
"""Signal stop and wait for the thread to exit."""
self._stop_event.set()
self.join(timeout=timeout)
@property
def is_stopping(self) -> bool:
return self._stop_event.is_set()
# ── Thread entry point ──
def run(self) -> None:
logger.info("[%s] Starting", self.name)
backoff = _EXCEPTION_BACKOFF_BASE
try:
self._db = get_thread_db()
self._on_start()
while not self._stop_event.is_set() and not self._fatal_error:
try:
self._run_loop()
backoff = _EXCEPTION_BACKOFF_BASE
self._exception_count = 0
except Exception as exc:
self._exception_count += 1
logger.error(
"[%s] Unhandled exception in _run_loop (count=%d): %s",
self.name, self._exception_count, exc, exc_info=True,
)
if self._fatal_error:
break
self._stop_event.wait(timeout=backoff)
backoff = min(backoff * _EXCEPTION_BACKOFF_MULTIPLIER, _EXCEPTION_BACKOFF_MAX)
except Exception as exc:
logger.critical("[%s] Fatal error in thread setup: %s", self.name, exc, exc_info=True)
finally:
self._on_stop()
self._close_db()
logger.info("[%s] Stopped", self.name)
# ── Hooks for subclasses ──
def _on_start(self) -> None:
"""Called once before the loop starts. Override for setup."""
def _on_stop(self) -> None:
"""Called once after the loop ends. Override for cleanup."""
@abstractmethod
def _run_loop(self) -> None:
"""
Implement the thread's work here.
Called repeatedly until stop() is called or _fatal_error is set.
Should block for a short period (sleep or wait) to avoid busy-looping.
"""
# ── Internal helpers ──
def _close_db(self) -> None:
if self._db is not None:
try:
self._db.close()
except Exception as exc:
logger.debug("[%s] Error closing DB connection: %s", self.name, exc)
self._db = None
def _sleep(self, seconds: float) -> None:
"""Interruptible sleep — wakes up early if stop() is called."""
self._stop_event.wait(timeout=seconds)

View File

@@ -0,0 +1,167 @@
"""
LogTailThread — tails a server's log file, parses lines via LogParser,
and persists parsed entries to the logs table.
Design notes:
- Opens the log file in text mode with errors="replace" to handle encoding issues
- Detects log rotation by checking if the inode changes (Unix) or file shrinks (Windows)
- On rotation: closes old handle, reopens from position 0
- Flushes inserts in batches of up to LOG_BATCH_SIZE per loop iteration
"""
from __future__ import annotations
import logging
import os
import queue
from pathlib import Path
from typing import Callable, Optional
from core.dal.log_repository import LogRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_LOG_BATCH_SIZE = 50
_POLL_INTERVAL = 1.0
_REOPEN_DELAY = 2.0
class LogTailThread(BaseServerThread):
"""
Tails a log file for a specific server.
Args:
server_id: The database server ID.
log_path: Absolute path to the log file to tail.
log_parser: LogParser adapter instance for this game type.
broadcast_queue: Optional queue.Queue to push parsed events to BroadcastThread.
"""
def __init__(
self,
server_id: int,
log_path: str,
log_parser,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "LogTail")
self._log_path = log_path
self._log_parser = log_parser
self._broadcast_queue = broadcast_queue
self._file_handle = None
self._last_inode = None
self._last_size = 0
# ── Lifecycle ──
def _on_start(self) -> None:
self._open_log_file()
def _on_stop(self) -> None:
self._close_file()
# ── Main loop ──
def _run_loop(self) -> None:
if self._file_handle is None:
self._stop_event.wait(timeout=_POLL_INTERVAL)
self._open_log_file()
return
if self._detect_rotation():
logger.info("[%s] Log rotation detected, reopening", self.name)
self._close_file()
self._stop_event.wait(timeout=_REOPEN_DELAY)
self._open_log_file()
return
lines_read = 0
entries_to_insert = []
while lines_read < _LOG_BATCH_SIZE:
line = self._file_handle.readline()
if not line:
break
lines_read += 1
line = line.rstrip("\n").rstrip("\r")
if not line:
continue
parsed = self._log_parser.parse_line(line)
if parsed is not None:
entries_to_insert.append(parsed)
if entries_to_insert and self._db is not None:
log_repo = LogRepository(self._db)
for entry in entries_to_insert:
log_repo.insert(server_id=self.server_id, entry=entry)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed: %s", self.name, exc)
self._db.rollback()
if self._broadcast_queue is not None:
for entry in entries_to_insert:
try:
self._broadcast_queue.put_nowait({
"type": "log",
"server_id": self.server_id,
"data": entry,
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping log event", self.name)
if lines_read == 0:
self._stop_event.wait(timeout=_POLL_INTERVAL)
# ── File management ──
def _open_log_file(self) -> None:
if not os.path.exists(self._log_path):
return
try:
self._file_handle = open(
self._log_path, "r", encoding="utf-8", errors="replace"
)
# Start tailing from the end of the file
self._file_handle.seek(0, 2)
self._last_size = self._file_handle.tell()
stat = os.stat(self._log_path)
self._last_inode = getattr(stat, "st_ino", None)
logger.debug("[%s] Opened log file: %s", self.name, self._log_path)
except OSError as exc:
logger.warning("[%s] Cannot open log file %s: %s", self.name, self._log_path, exc)
self._file_handle = None
def _close_file(self) -> None:
if self._file_handle is not None:
try:
self._file_handle.close()
except OSError as exc:
logger.debug("[%s] Error closing log file: %s", self.name, exc)
self._file_handle = None
self._last_inode = None
self._last_size = 0
def _detect_rotation(self) -> bool:
"""Returns True if the log file has been rotated."""
try:
stat = os.stat(self._log_path)
except OSError:
return True
current_inode = getattr(stat, "st_ino", None)
if current_inode is not None and self._last_inode is not None:
if current_inode != self._last_inode:
return True
# Windows fallback: file shrunk
current_size = stat.st_size
if self._file_handle is not None:
current_pos = self._file_handle.tell()
if current_size < current_pos:
return True
self._last_size = current_size
return False

View File

@@ -0,0 +1,118 @@
"""
MetricsCollectorThread — collects CPU and memory usage for a server process
and persists to the metrics table every COLLECTION_INTERVAL seconds.
Uses psutil to inspect the process identified by ProcessManager.get_pid().
If the process is not running, the thread sleeps and retries.
"""
from __future__ import annotations
import logging
import queue
import psutil
from core.dal.metrics_repository import MetricsRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_COLLECTION_INTERVAL = 10.0
_RETENTION_DAYS = 1
class MetricsCollectorThread(BaseServerThread):
"""
Collects process metrics for a running game server.
Args:
server_id: Database server ID.
process_manager: ProcessManager singleton instance.
broadcast_queue: Optional queue.Queue for real-time metric pushes.
"""
def __init__(
self,
server_id: int,
process_manager,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "MetricsCollector")
self._process_manager = process_manager
self._broadcast_queue = broadcast_queue
self._psutil_process = None
self._samples_since_cleanup = 0
self._cleanup_every = 360 # ~1 hour at 10s intervals
# ── Main loop ──
def _run_loop(self) -> None:
pid = self._process_manager.get_pid(self.server_id)
if pid is None:
self._psutil_process = None
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
return
# Reuse or create psutil.Process handle
if self._psutil_process is None or self._psutil_process.pid != pid:
try:
self._psutil_process = psutil.Process(pid)
self._psutil_process.cpu_percent(interval=None)
except psutil.NoSuchProcess:
self._psutil_process = None
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
return
self._stop_event.wait(timeout=_COLLECTION_INTERVAL)
if self._stop_event.is_set():
return
try:
cpu_pct = self._psutil_process.cpu_percent(interval=None)
mem_info = self._psutil_process.memory_info()
mem_mb = round(mem_info.rss / (1024 * 1024), 2)
except psutil.NoSuchProcess:
logger.info("[%s] Process %d no longer exists", self.name, pid)
self._psutil_process = None
return
except psutil.AccessDenied as exc:
logger.warning("[%s] Access denied reading process %d: %s", self.name, pid, exc)
return
if self._db is None:
return
metrics_repo = MetricsRepository(self._db)
metrics_repo.insert(
server_id=self.server_id,
cpu_percent=cpu_pct,
ram_mb=mem_mb,
)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed: %s", self.name, exc)
self._db.rollback()
return
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "metrics",
"server_id": self.server_id,
"data": {"cpu_percent": cpu_pct, "memory_mb": mem_mb},
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping metrics event", self.name)
# Periodic cleanup
self._samples_since_cleanup += 1
if self._samples_since_cleanup >= self._cleanup_every:
self._samples_since_cleanup = 0
try:
metrics_repo.cleanup_old(server_id=self.server_id, retention_days=_RETENTION_DAYS)
self._db.commit()
except Exception as exc:
logger.warning("[%s] Cleanup failed: %s", self.name, exc)
self._db.rollback()

View File

@@ -0,0 +1,158 @@
"""
ProcessMonitorThread — watches a running game server process.
Responsibilities:
1. Detect when the process exits unexpectedly (crash).
2. On crash: update server status to "crashed" in DB, emit a crash event.
3. If auto_restart is enabled on the server record: trigger restart.
4. Respect max_restarts — if exceeded, leave server in "crashed" state.
Poll interval: 5 seconds.
"""
from __future__ import annotations
import logging
import queue
from core.dal.event_repository import EventRepository
from core.dal.server_repository import ServerRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_POLL_INTERVAL = 5.0
class ProcessMonitorThread(BaseServerThread):
"""
Monitors the OS process for a running game server.
Args:
server_id: Database server ID.
process_manager: ProcessManager singleton (injected).
broadcast_queue: Optional queue.Queue for crash notifications.
"""
def __init__(
self,
server_id: int,
process_manager,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "ProcessMonitor")
self._process_manager = process_manager
self._broadcast_queue = broadcast_queue
# ── Main loop ──
def _run_loop(self) -> None:
self._stop_event.wait(timeout=_POLL_INTERVAL)
if self._stop_event.is_set():
return
if not self._process_manager.is_running(self.server_id):
self._handle_unexpected_exit()
# After handling, stop this monitor — the server is no longer running
self._fatal_error = True
# ── Crash handling ──
def _handle_unexpected_exit(self) -> None:
if self._db is None:
return
server_repo = ServerRepository(self._db)
event_repo = EventRepository(self._db)
server = server_repo.get_by_id(self.server_id)
if server is None:
return
# Only treat as crash if the server was supposed to be running
if server["status"] not in ("running", "starting"):
return
logger.warning(
"[%s] Server %d process exited unexpectedly (status was '%s')",
self.name, self.server_id, server["status"],
)
# Increment crash counter
server_repo.increment_restart_count(self.server_id)
restart_count = server["restart_count"] + 1
max_restarts = server.get("max_restarts", 3)
# Record crash event
event_repo.insert(
server_id=self.server_id,
event_type="crash",
detail={"restart_count": restart_count},
)
should_restart = (
server.get("auto_restart", False)
and restart_count <= max_restarts
)
if should_restart:
server_repo.update_status(self.server_id, "restarting")
event_repo.insert(
server_id=self.server_id,
event_type="restart_scheduled",
detail={"attempt": restart_count, "max": max_restarts},
)
else:
server_repo.update_status(self.server_id, "crashed")
if restart_count > max_restarts:
event_repo.insert(
server_id=self.server_id,
event_type="restart_limit_reached",
detail={"restart_count": restart_count, "max_restarts": max_restarts},
)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed during crash handling: %s", self.name, exc)
self._db.rollback()
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "server_status",
"server_id": self.server_id,
"data": {
"status": "restarting" if should_restart else "crashed",
"restart_count": restart_count,
},
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping server_status event", self.name)
# Trigger actual restart outside DB work
if should_restart:
self._trigger_restart()
def _trigger_restart(self) -> None:
"""
Calls ServerService.start() to restart the server.
This is safe to call from a background thread.
"""
try:
from database import get_thread_db
from core.servers.service import ServerService
db = get_thread_db()
try:
service = ServerService(db)
service.start(self.server_id)
except Exception as exc:
logger.error("[%s] Auto-restart start() failed: %s", self.name, exc, exc_info=True)
finally:
try:
db.close()
except Exception as exc:
logger.debug("[%s] Error closing restart DB connection: %s", self.name, exc)
except Exception as exc:
logger.error("[%s] Auto-restart failed: %s", self.name, exc, exc_info=True)

View File

@@ -0,0 +1,169 @@
"""
RemoteAdminPollerThread — polls the game server's remote admin interface
(e.g. BattlEye RCon for Arma3) to sync the player list.
Design notes:
- Uses the RemoteAdminClient protocol injected at construction time
- Reconnects automatically on disconnect with exponential backoff
- Persists current player list to players table via PlayerRepository
- Emits player_join / player_leave events via EventRepository
- Pushes player list updates to broadcast_queue if provided
Poll interval: 30 seconds.
Reconnect backoff: 5s -> 10s -> 20s -> 40s -> 60s (cap).
"""
from __future__ import annotations
import logging
import queue
from core.dal.event_repository import EventRepository
from core.dal.player_repository import PlayerRepository
from core.threads.base_thread import BaseServerThread
logger = logging.getLogger(__name__)
_POLL_INTERVAL = 30.0
_RECONNECT_BACKOFF_BASE = 5.0
_RECONNECT_BACKOFF_MAX = 60.0
_RECONNECT_BACKOFF_MULT = 2.0
class RemoteAdminPollerThread(BaseServerThread):
"""
Polls the remote admin interface for a game server.
Args:
server_id: Database server ID.
remote_admin_client: Connected RemoteAdminClient instance.
broadcast_queue: Optional queue.Queue for player list pushes.
"""
def __init__(
self,
server_id: int,
remote_admin_client,
broadcast_queue=None,
) -> None:
super().__init__(server_id, "RemoteAdminPoller")
self._client = remote_admin_client
self._broadcast_queue = broadcast_queue
self._connected = False
self._reconnect_backoff = _RECONNECT_BACKOFF_BASE
self._known_players: dict[str, dict] = {} # player_uid -> player data
# ── Lifecycle ──
def _on_stop(self) -> None:
if self._connected and self._client is not None:
try:
self._client.disconnect()
except Exception as exc:
logger.debug("[%s] Error disconnecting remote admin on stop: %s", self.name, exc)
self._connected = False
# ── Main loop ──
def _run_loop(self) -> None:
if not self._connected:
self._attempt_connect()
return
self._stop_event.wait(timeout=_POLL_INTERVAL)
if self._stop_event.is_set():
return
try:
players = self._client.get_players()
self._reconnect_backoff = _RECONNECT_BACKOFF_BASE
self._sync_players(players)
except Exception as exc:
logger.warning("[%s] Poll failed: %s — will reconnect", self.name, exc)
self._connected = False
try:
if self._client is not None:
self._client.disconnect()
except Exception as exc:
logger.debug("[%s] Error disconnecting after poll failure: %s", self.name, exc)
# ── Connection management ──
def _attempt_connect(self) -> None:
try:
self._client.connect() if hasattr(self._client, "connect") else None
self._connected = True
self._reconnect_backoff = _RECONNECT_BACKOFF_BASE
logger.info("[%s] Connected to remote admin", self.name)
except Exception as exc:
logger.warning(
"[%s] Connection failed: %s — retrying in %.1fs",
self.name, exc, self._reconnect_backoff,
)
self._stop_event.wait(timeout=self._reconnect_backoff)
self._reconnect_backoff = min(
self._reconnect_backoff * _RECONNECT_BACKOFF_MULT,
_RECONNECT_BACKOFF_MAX,
)
# ── Player sync ──
def _sync_players(self, current_players: list[dict]) -> None:
"""
Diff current_players against self._known_players.
Insert join events for new players, leave events for departed ones.
Upsert all current players in the DB.
Each player dict must have at least: slot_id, name (other fields optional).
"""
if self._db is None:
return
player_repo = PlayerRepository(self._db)
event_repo = EventRepository(self._db)
# Build uid sets for diffing — use slot_id as key
current_slots = {str(p.get("slot_id", i)): p for i, p in enumerate(current_players)}
current_keys = set(current_slots.keys())
known_keys = set(self._known_players.keys())
joined = current_keys - known_keys
left = known_keys - current_keys
for slot_key, player in current_slots.items():
player_repo.upsert(server_id=self.server_id, player=player)
if slot_key in joined:
event_repo.insert(
server_id=self.server_id,
event_type="player_join",
detail={"name": player.get("name", ""), "slot": slot_key},
)
logger.debug("[%s] Player joined: %s (slot %s)", self.name, player.get("name"), slot_key)
for slot_key in left:
departed = self._known_players[slot_key]
event_repo.insert(
server_id=self.server_id,
event_type="player_leave",
detail={"name": departed.get("name", ""), "slot": slot_key},
)
logger.debug("[%s] Player left: %s (slot %s)", self.name, departed.get("name"), slot_key)
try:
self._db.commit()
except Exception as exc:
logger.error("[%s] DB commit failed during player sync: %s", self.name, exc)
self._db.rollback()
# Update known players
self._known_players = current_slots
if self._broadcast_queue is not None:
try:
self._broadcast_queue.put_nowait({
"type": "players",
"server_id": self.server_id,
"data": current_players,
})
except queue.Full:
logger.debug("[%s] Broadcast queue full, dropping players event", self.name)

View File

@@ -0,0 +1,257 @@
"""
ThreadRegistry — manages the lifecycle of all per-server background threads.
One instance is created at app startup and stored in app.state.thread_registry.
Also provides class-level methods for convenience (called from ServerService).
Thread set per server:
- LogTailThread (started if adapter has "log_parser" capability and log_path is known)
- MetricsCollectorThread (always started)
- ProcessMonitorThread (always started)
- RemoteAdminPollerThread (started only if adapter has "remote_admin" capability)
Key methods:
start_server_threads(server_id, db) — start all threads for a server
stop_server_threads(server_id) — stop all threads for a server
reattach_server_threads(server_id, db) — re-attach threads without restarting process
stop_all() — called at app shutdown
"""
from __future__ import annotations
import logging
import queue
from adapters.registry import GameAdapterRegistry
from core.dal.config_repository import ConfigRepository
from core.dal.server_repository import ServerRepository
from core.threads.log_tail import LogTailThread
from core.threads.metrics_collector import MetricsCollectorThread
from core.threads.process_monitor import ProcessMonitorThread
from core.threads.remote_admin_poller import RemoteAdminPollerThread
logger = logging.getLogger(__name__)
# Module-level singleton for convenience (used by ServerService)
_instance: ThreadRegistry | None = None
class ThreadRegistry:
"""
Manages all background threads for all running servers.
"""
def __init__(
self,
process_manager,
adapter_registry: GameAdapterRegistry | None = None,
global_broadcast_queue: queue.Queue | None = None,
) -> None:
self._process_manager = process_manager
self._adapter_registry = adapter_registry or GameAdapterRegistry
self._broadcast_queue = global_broadcast_queue or queue.Queue(maxsize=1000)
self._bundles: dict[int, dict] = {} # server_id -> thread bundle
# ── Class-level convenience API ──
@classmethod
def _get_instance(cls) -> "ThreadRegistry | None":
return _instance
@classmethod
def set_instance(cls, registry: "ThreadRegistry") -> None:
global _instance
_instance = registry
@classmethod
def start_server_threads(cls, server_id: int, db) -> None:
"""Class-level convenience — starts threads for a server using the singleton."""
registry = cls._get_instance()
if registry is not None:
registry._start_server_threads(server_id, db)
@classmethod
def stop_server_threads(cls, server_id: int) -> None:
"""Class-level convenience — stops threads for a server using the singleton."""
registry = cls._get_instance()
if registry is not None:
registry._stop_server_threads(server_id)
@classmethod
def reattach_server_threads(cls, server_id: int, db) -> None:
"""Class-level convenience — re-attaches threads for a recovered server."""
registry = cls._get_instance()
if registry is not None:
registry._reattach_server_threads(server_id, db)
@classmethod
def stop_all(cls) -> None:
"""Class-level convenience — stops all threads."""
registry = cls._get_instance()
if registry is not None:
registry._stop_all()
# ── Instance methods ──
def _start_server_threads(self, server_id: int, db) -> None:
if server_id in self._bundles:
logger.warning(
"ThreadRegistry: threads already exist for server %d — stopping first",
server_id,
)
self._stop_server_threads(server_id)
bundle = self._build_bundle(server_id, db)
self._bundles[server_id] = bundle
self._start_bundle(server_id, bundle)
def _stop_server_threads(self, server_id: int) -> None:
bundle = self._bundles.pop(server_id, None)
if bundle is None:
return
self._stop_bundle(server_id, bundle)
def _reattach_server_threads(self, server_id: int, db) -> None:
logger.info("ThreadRegistry: reattaching threads for server %d", server_id)
self._start_server_threads(server_id, db)
def _stop_all(self) -> None:
server_ids = list(self._bundles.keys())
for server_id in server_ids:
self._stop_server_threads(server_id)
logger.info("ThreadRegistry: all threads stopped")
def get_thread_count(self, server_id: int) -> int:
"""Returns the number of running threads for a server."""
bundle = self._bundles.get(server_id)
if bundle is None:
return 0
return sum(
1
for key in ("log_tail", "metrics", "monitor", "rcon_poller")
if bundle.get(key) is not None and bundle[key].is_alive()
)
# ── Bundle construction ──
def _build_bundle(self, server_id: int, db) -> dict:
"""Reads server + config data from DB and constructs (but does not start) the thread bundle."""
server_repo = ServerRepository(db)
config_repo = ConfigRepository(db)
server = server_repo.get_by_id(server_id)
if server is None:
raise ValueError(f"Server {server_id} not found in database")
game_type = server["game_type"]
adapter = self._adapter_registry.get(game_type)
# Log path: read from config if present, else use adapter default
log_path = None
if adapter.has_capability("log_parser"):
log_parser = adapter.get_log_parser()
# Try to resolve log path via the adapter's log file resolver
from core.utils.file_utils import get_server_dir
server_dir = get_server_dir(server_id)
if server_dir.exists():
resolver = log_parser.get_log_file_resolver(server_id)
resolved = resolver(server_dir)
if resolved is not None:
log_path = str(resolved)
bundle: dict = {
"log_tail": None,
"metrics": None,
"monitor": None,
"rcon_poller": None,
}
# Always: ProcessMonitorThread
bundle["monitor"] = ProcessMonitorThread(
server_id=server_id,
process_manager=self._process_manager,
broadcast_queue=self._broadcast_queue,
)
# Always: MetricsCollectorThread
bundle["metrics"] = MetricsCollectorThread(
server_id=server_id,
process_manager=self._process_manager,
broadcast_queue=self._broadcast_queue,
)
# Conditional: LogTailThread
if log_path and adapter.has_capability("log_parser"):
log_parser = adapter.get_log_parser()
bundle["log_tail"] = LogTailThread(
server_id=server_id,
log_path=log_path,
log_parser=log_parser,
broadcast_queue=self._broadcast_queue,
)
# Conditional: RemoteAdminPollerThread
if adapter.has_capability("remote_admin"):
remote_admin = adapter.get_remote_admin()
if remote_admin is not None:
# Get RCon password from config
rcon_password = self._get_remote_admin_password(server_id, config_repo)
if rcon_password:
try:
rcon_port = server.get("rcon_port") or server.get("game_port", 0) + 1
client = remote_admin.create_client(
host="127.0.0.1",
port=rcon_port,
password=rcon_password,
)
bundle["rcon_poller"] = RemoteAdminPollerThread(
server_id=server_id,
remote_admin_client=client,
broadcast_queue=self._broadcast_queue,
)
except Exception as exc:
logger.warning(
"ThreadRegistry: could not create RCon client for server %d: %s",
server_id, exc,
)
return bundle
def _start_bundle(self, server_id: int, bundle: dict) -> None:
started = []
for key in ("monitor", "metrics", "log_tail", "rcon_poller"):
thread = bundle.get(key)
if thread is not None:
thread.start()
started.append(key)
logger.info("ThreadRegistry: started threads for server %d: %s", server_id, started)
def _stop_bundle(self, server_id: int, bundle: dict) -> None:
for key in ("rcon_poller", "log_tail", "metrics", "monitor"):
thread = bundle.get(key)
if thread is not None and thread.is_alive():
thread.stop_and_join(timeout=5.0)
logger.info("ThreadRegistry: stopped all threads for server %d", server_id)
# ── Helpers ──
def _get_remote_admin_password(
self, server_id: int, config_repo: ConfigRepository
) -> str | None:
"""Read the RCon password from the rcon config section."""
# Need to decrypt sensitive fields
from adapters.registry import GameAdapterRegistry
try:
server = ServerRepository(config_repo._db).get_by_id(server_id)
if server is None:
return None
adapter = self._adapter_registry.get(server["game_type"])
config_gen = adapter.get_config_generator()
sensitive = config_gen.get_sensitive_fields("rcon") if "rcon" in config_gen.get_sections() else []
except Exception as exc:
logger.debug("Could not determine sensitive fields for RCon config: %s", exc)
sensitive = []
rcon_section = config_repo.get_section(server_id, "rcon", sensitive)
if rcon_section is None:
return None
return rcon_section.get("password") or None

View File

@@ -0,0 +1 @@
"""Core utility modules."""

View File

@@ -0,0 +1,32 @@
"""Field-level encryption using Fernet (AES-256)."""
from __future__ import annotations
from cryptography.fernet import Fernet
_fernet: Fernet | None = None
def get_fernet() -> Fernet:
global _fernet
if _fernet is None:
from config import settings
_fernet = Fernet(settings.encryption_key.encode())
return _fernet
def encrypt(plaintext: str) -> str:
"""Encrypt plaintext string. Returns 'encrypted:<base64-token>'."""
token = get_fernet().encrypt(plaintext.encode()).decode()
return f"encrypted:{token}"
def decrypt(ciphertext: str) -> str:
"""Decrypt 'encrypted:<token>' string. Returns plaintext."""
if not ciphertext.startswith("encrypted:"):
return ciphertext # Not encrypted, return as-is
token = ciphertext[len("encrypted:"):]
return get_fernet().decrypt(token.encode()).decode()
def is_encrypted(value: str) -> bool:
return isinstance(value, str) and value.startswith("encrypted:")

View File

@@ -0,0 +1,65 @@
"""Game-agnostic file operations."""
from __future__ import annotations
import re
from pathlib import Path
def get_server_dir(server_id: int) -> Path:
"""Return the absolute directory path for a server's data."""
from config import settings
base = Path(settings.servers_dir).resolve()
return base / str(server_id)
def ensure_server_dirs(server_id: int, layout: list[str] | None = None) -> None:
"""
Create servers/{id}/ and any subdirectories from adapter layout.
layout example: ["server", "battleye", "mpmissions"]
"""
server_dir = get_server_dir(server_id)
server_dir.mkdir(parents=True, exist_ok=True)
if layout:
for subdir in layout:
(server_dir / subdir).mkdir(parents=True, exist_ok=True)
def safe_delete_file(path: Path) -> bool:
"""Delete a file if it exists. Returns True if deleted."""
try:
path.unlink(missing_ok=True)
return True
except OSError:
return False
def sanitize_filename(filename: str) -> str:
"""
Sanitize a filename for safe disk storage.
Rules:
- Strip path separators (/ \\ and ..)
- Allow only alphanumeric, dots, hyphens, underscores, @ signs
- Collapse consecutive dots (prevent ../ tricks)
- Truncate to 255 characters
- Raise ValueError if the result is empty
"""
# Take only the basename — strip any directory components
filename = filename.replace("\\", "/").split("/")[-1]
# Remove null bytes and control characters
filename = re.sub(r"[\x00-\x1f\x7f]", "", filename)
# Allow only safe characters: alphanum, dot, hyphen, underscore, @
filename = re.sub(r"[^\w.\-@]", "_", filename)
# Collapse consecutive dots to prevent tricks like ".../.."
filename = re.sub(r"\.{2,}", ".", filename)
# Truncate
filename = filename[:255]
if not filename or filename in (".", ".."):
raise ValueError(f"Filename '{filename}' is not safe for storage")
return filename

View File

@@ -0,0 +1,87 @@
"""Game-agnostic port availability checking."""
from __future__ import annotations
import logging
import socket
logger = logging.getLogger(__name__)
def is_port_in_use(port: int, host: str = "127.0.0.1") -> bool:
"""Return True if the port is already bound."""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(0.5)
try:
s.bind((host, port))
return False
except OSError:
return True
def check_server_ports_available(
game_port: int,
rcon_port: int | None = None,
host: str = "127.0.0.1",
port_conventions: dict[str, int] | None = None,
) -> list[int]:
"""
Check all ports for a server instance.
If port_conventions is provided (from adapter), checks all derived ports.
Returns list of ports that are already in use (empty = all available).
"""
ports_to_check: set[int] = set()
if port_conventions:
ports_to_check.update(port_conventions.values())
else:
ports_to_check.add(game_port)
if rcon_port is not None:
ports_to_check.add(rcon_port)
return [p for p in sorted(ports_to_check) if is_port_in_use(p, host)]
def check_ports_against_running_servers(
new_server_game_port: int,
new_server_rcon_port: int | None,
exclude_server_id: int | None,
db,
) -> list[int]:
"""
Cross-game port conflict detection.
Checks new server's full port set against all running servers' full port sets.
Returns list of conflicting ports.
"""
from adapters.registry import GameAdapterRegistry
from sqlalchemy import text
rows = db.execute(
text("SELECT id, game_type, game_port, rcon_port FROM servers WHERE status IN ('running','starting')")
).fetchall()
occupied_ports: set[int] = set()
for row in rows:
if exclude_server_id and row[0] == exclude_server_id:
continue
try:
adapter = GameAdapterRegistry.get(row[1])
conventions = adapter.get_process_config().get_port_conventions(row[2])
occupied_ports.update(conventions.values())
except KeyError:
logger.debug("Unknown game type '%s', falling back to game_port only", row[1])
occupied_ports.add(row[2])
if row[3] is not None:
occupied_ports.add(row[3])
# Check new server's ports against occupied set
try:
adapter = GameAdapterRegistry.get("arma3") # temporary — will be passed in
except KeyError:
logger.debug("No 'arma3' adapter for port conventions, using defaults")
new_ports: set[int] = {new_server_game_port}
if new_server_rcon_port:
new_ports.add(new_server_rcon_port)
return sorted(new_ports & occupied_ports)

View File

@@ -0,0 +1,4 @@
from core.websocket.manager import WebSocketManager
from core.websocket.broadcast_thread import BroadcastThread
__all__ = ["WebSocketManager", "BroadcastThread"]

View File

@@ -0,0 +1,116 @@
"""
BroadcastThread — the single bridge between OS threads and asyncio WebSocket world.
Reads events from a queue.Queue (written by background server threads) and
forwards them to the WebSocketManager running in the asyncio event loop.
Design:
- Runs as a daemon thread — no cleanup needed on shutdown.
- queue.Queue is thread-safe — multiple producer threads, single consumer.
- asyncio.run_coroutine_threadsafe() schedules the WebSocketManager.broadcast()
coroutine on the event loop from this non-asyncio thread.
- If the event loop is closed or the broadcast fails, the event is dropped silently.
Queue item format (dict):
{
"type": str, # "log", "metrics", "players", "server_status", etc.
"server_id": int, # Which server this event belongs to
"data": dict | list, # Payload — varies by type
}
"""
from __future__ import annotations
import asyncio
import logging
import queue
import threading
logger = logging.getLogger(__name__)
_QUEUE_GET_TIMEOUT = 1.0
_DROP_LOG_THRESHOLD = 100
class BroadcastThread(threading.Thread):
"""
Bridge from thread-world to asyncio-world.
Args:
event_queue: The shared queue.Queue that all background threads write to.
ws_manager: The WebSocketManager instance (asyncio-side).
loop: The asyncio event loop running in the main thread.
"""
def __init__(
self,
event_queue: queue.Queue,
ws_manager, # WebSocketManager — type annotation omitted to avoid circular import
loop: asyncio.AbstractEventLoop,
) -> None:
super().__init__(name="BroadcastThread", daemon=True)
self._queue = event_queue
self._ws_manager = ws_manager
self._loop = loop
self._stop_event = threading.Event()
self._dropped = 0
def stop(self) -> None:
self._stop_event.set()
def run(self) -> None:
logger.info("BroadcastThread: started")
while not self._stop_event.is_set():
try:
item = self._queue.get(timeout=_QUEUE_GET_TIMEOUT)
except queue.Empty:
continue
self._forward(item)
# Drain remaining items on shutdown
while not self._queue.empty():
try:
item = self._queue.get_nowait()
self._forward(item)
except queue.Empty:
break
logger.info("BroadcastThread: stopped")
def _forward(self, item: dict) -> None:
"""Schedule a broadcast on the asyncio event loop."""
if self._loop.is_closed():
self._dropped += 1
if self._dropped % _DROP_LOG_THRESHOLD == 0:
logger.warning(
"BroadcastThread: event loop closed, dropped %d messages",
self._dropped,
)
return
server_id = item.get("server_id")
event_type = item.get("type", "unknown")
data = item.get("data", {})
message = {
"type": event_type,
"server_id": server_id,
"data": data,
}
try:
future = asyncio.run_coroutine_threadsafe(
self._ws_manager.broadcast(server_id, message),
self._loop,
)
# Fire and forget — suppress unhandled exception warnings
future.add_done_callback(self._on_broadcast_done)
except RuntimeError as exc:
logger.debug("BroadcastThread: could not schedule broadcast: %s", exc)
def _on_broadcast_done(self, future) -> None:
"""Called when the broadcast coroutine completes. Log exceptions only."""
try:
future.result()
except Exception as exc:
logger.debug("BroadcastThread: broadcast error: %s", exc)

View File

@@ -0,0 +1,96 @@
"""
WebSocketManager — asyncio-side manager for WebSocket connections.
All methods are coroutines and must be called from the asyncio event loop.
No locking needed — the event loop is single-threaded.
Subscription model:
- Each connection subscribes to zero or more server_ids.
- Subscribing to server_id=None means "all servers".
- broadcast(server_id, message) sends to all clients subscribed to that server_id
plus all clients subscribed to None (global subscribers).
"""
from __future__ import annotations
import json
import logging
from typing import Optional
from fastapi import WebSocket
logger = logging.getLogger(__name__)
class WebSocketManager:
"""Manages active WebSocket connections and delivers broadcast messages."""
def __init__(self) -> None:
# Maps WebSocket -> set of subscribed server_ids (None = all)
self._connections: dict[WebSocket, set[Optional[int]]] = {}
# ── Connection lifecycle ──
async def connect(self, ws: WebSocket, server_ids: Optional[list[int]] = None) -> None:
"""
Accept a WebSocket connection and register it.
Args:
ws: The FastAPI WebSocket instance.
server_ids: List of server IDs to subscribe to, or None for all.
"""
await ws.accept()
subscriptions: set[Optional[int]] = set(server_ids) if server_ids else {None}
self._connections[ws] = subscriptions
logger.info(
"WebSocketManager: client connected, subscriptions=%s, total=%d",
subscriptions,
len(self._connections),
)
async def disconnect(self, ws: WebSocket) -> None:
"""Remove a disconnected WebSocket."""
self._connections.pop(ws, None)
logger.info(
"WebSocketManager: client disconnected, total=%d",
len(self._connections),
)
# ── Broadcast ──
async def broadcast(self, server_id: Optional[int], message: dict) -> None:
"""
Send a message to all clients subscribed to the given server_id.
Also sends to clients subscribed to None (global subscribers).
Disconnected clients are removed automatically.
"""
if not self._connections:
return
payload = json.dumps(message)
disconnected = []
for ws, subscriptions in self._connections.items():
if None in subscriptions or server_id in subscriptions:
try:
await ws.send_text(payload)
except Exception as exc:
logger.debug("WebSocketManager: send failed, marking disconnected: %s", exc)
disconnected.append(ws)
for ws in disconnected:
await self.disconnect(ws)
async def send_to_connection(self, ws: WebSocket, message: dict) -> None:
"""Send a message to a single specific connection."""
try:
await ws.send_text(json.dumps(message))
except Exception as exc:
logger.debug("WebSocketManager: direct send failed, disconnecting: %s", exc)
await self.disconnect(ws)
# ── Stats ──
@property
def connection_count(self) -> int:
return len(self._connections)

View File

@@ -0,0 +1,90 @@
"""
WebSocket endpoint.
URL: /ws
/ws?server_id=1
/ws?server_id=1&server_id=2
Authentication: JWT passed as a query parameter `token` because
browser WebSocket API does not support custom headers.
If the token is missing or invalid, the connection is closed with code 4001.
After authentication, the client receives:
- A "connected" welcome message with the list of subscribed server IDs
- All events for subscribed servers pushed by BroadcastThread
"""
from __future__ import annotations
import logging
from typing import Optional
from fastapi import APIRouter, Query, WebSocket, WebSocketDisconnect
from core.auth.utils import decode_access_token
logger = logging.getLogger(__name__)
router = APIRouter(tags=["websocket"])
@router.websocket("/ws")
async def websocket_endpoint(
ws: WebSocket,
token: Optional[str] = Query(default=None),
server_id: Optional[list[int]] = Query(default=None),
) -> None:
"""
WebSocket endpoint for real-time server events.
Query parameters:
token: JWT access token (required)
server_id: One or more server IDs to subscribe to (optional, default=all)
"""
# Authenticate before accepting
if not token:
await ws.close(code=4001, reason="Missing token")
return
try:
user = decode_access_token(token)
except Exception as exc:
logger.warning("WebSocket: token decode failed: %s", exc)
user = None
if user is None:
await ws.close(code=4001, reason="Invalid or expired token")
return
# Get WebSocketManager from app state
ws_manager = ws.app.state.ws_manager
await ws_manager.connect(ws, server_ids=server_id)
logger.info(
"WebSocket: user '%s' connected, subscribed to servers=%s",
user.get("sub"),
server_id,
)
try:
# Send welcome message
await ws_manager.send_to_connection(ws, {
"type": "connected",
"data": {
"user": user.get("sub"),
"subscriptions": server_id or "all",
},
})
# Keep connection alive — wait for client to disconnect
while True:
data = await ws.receive_text()
except WebSocketDisconnect:
logger.info(
"WebSocket: user '%s' disconnected",
user.get("sub"),
)
except Exception as exc:
logger.error("WebSocket: unexpected error: %s", exc)
finally:
await ws_manager.disconnect(ws)