""" commands/server.py ================== ComfyUI server lifecycle management via NSSM Windows service. On bot startup, `autostart_comfy()` runs as a background task: 1. If the service does not exist, it is installed automatically. 2. If the service exists but ComfyUI is not responding, it is started. NSSM handles: - Background process management (no console window) - Stdout / stderr capture to rotating log files - Complete isolation from the bot's own NSSM service Commands: ttr!server start — start the service ttr!server stop — stop the service ttr!server restart — restart the service ttr!server status — NSSM service state + HTTP reachability ttr!server install — (re)install / reconfigure the NSSM service ttr!server uninstall — remove the service from Windows Requires: - nssm.exe in PATH - The bot service account must have permission to manage Windows services (Local System or a user with SeServiceLogonRight works) """ from __future__ import annotations import asyncio import logging from pathlib import Path import aiohttp from discord.ext import commands logger = logging.getLogger(__name__) _POLL_INTERVAL = 5 # seconds between HTTP up-checks _MAX_ATTEMPTS = 24 # 24 × 5s = 120s max wait # Public — imported by status_monitor for emoji rendering STATUS_EMOJI: dict[str, str] = { "SERVICE_RUNNING": "🟢", "SERVICE_STOPPED": "🔴", "SERVICE_PAUSED": "🟡", "SERVICE_START_PENDING": "⏳", "SERVICE_STOP_PENDING": "⏳", "SERVICE_PAUSE_PENDING": "⏳", "SERVICE_CONTINUE_PENDING": "⏳", } # --------------------------------------------------------------------------- # Low-level subprocess helpers # --------------------------------------------------------------------------- async def _nssm(*args: str) -> tuple[int, str]: """Run `nssm ` and return (returncode, stdout).""" try: proc = await asyncio.create_subprocess_exec( "nssm", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT, ) stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=30) return proc.returncode, stdout.decode(errors="replace").strip() except FileNotFoundError: return -1, "nssm not found — is it installed and in PATH?" except asyncio.TimeoutError: return -1, "nssm command timed out." except Exception as exc: return -1, str(exc) async def _get_service_pid(service_name: str) -> int: """Return the PID of the process backing *service_name*, or 0 if unavailable.""" rc, out = await _nssm("getpid", service_name) if rc != 0: return 0 try: return int(out.strip()) except ValueError: return 0 async def _kill_service_process(service_name: str) -> None: """ Forcefully kill the process backing *service_name*. NSSM does not have a `kill` subcommand. Instead we retrieve the PID via `nssm getpid` and then use `taskkill /F /PID`. Safe to call when the service is already stopped (no-op if PID is 0). """ pid = await _get_service_pid(service_name) if not pid: return try: proc = await asyncio.create_subprocess_exec( "taskkill", "/F", "/PID", str(pid), stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL, ) await asyncio.wait_for(proc.communicate(), timeout=10) logger.debug("taskkill /F /PID %d sent for service '%s'", pid, service_name) except Exception as exc: logger.warning("taskkill failed for PID %d (%s): %s", pid, service_name, exc) async def _is_comfy_up(server_address: str, timeout: float = 3.0) -> bool: """Return True if the ComfyUI HTTP endpoint is responding.""" url = f"http://{server_address}/system_stats" try: async with aiohttp.ClientSession() as session: async with session.get(url, timeout=aiohttp.ClientTimeout(total=timeout)) as resp: return resp.status == 200 except Exception: return False async def _service_exists(service_name: str) -> bool: """Return True if the Windows service is installed (running or stopped).""" try: proc = await asyncio.create_subprocess_exec( "sc", "query", service_name, stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.DEVNULL, ) await proc.communicate() return proc.returncode == 0 except Exception: return False # --------------------------------------------------------------------------- # Public API — used by status_monitor and other modules # --------------------------------------------------------------------------- async def get_service_state(service_name: str) -> str: """ Return the NSSM service state string for *service_name*. Returns one of the SERVICE_* keys in STATUS_EMOJI on success, or "error" / "timeout" / "unknown" on failure. Intended for use by the status dashboard — callers should not raise on these sentinel values. """ try: rc, out = await asyncio.wait_for(_nssm("status", service_name), timeout=5.0) if rc == -1: return "error" return out.strip() or "unknown" except asyncio.TimeoutError: return "timeout" except Exception: return "error" # --------------------------------------------------------------------------- # Service installation # --------------------------------------------------------------------------- async def _install_service(config) -> tuple[bool, str]: """ Install the ComfyUI NSSM service with log capture and rotation. We install directly via python.exe (not the .bat file) to avoid the "Terminate batch job (Y/N)?" prompt that can cause NSSM to hang on STOP. Safe to call even if the service already exists — it will be removed first. Returns (success, message). """ name = config.comfy_service_name start_bat = Path(config.comfy_start_bat) log_dir = Path(config.comfy_log_dir) log_file = str(log_dir / "comfyui.log") max_bytes = str(config.comfy_log_max_mb * 1024 * 1024) # Derive portable paths from the .bat location (ComfyUI_windows_portable root): # /run_nvidia_gpu.bat # /python_embeded/python.exe # /ComfyUI/main.py portable_root = start_bat.parent python_exe = portable_root / "python_embeded" / "python.exe" main_py = portable_root / "ComfyUI" / "main.py" if not start_bat.exists(): return False, f"Start bat not found (used to derive paths): `{start_bat}`" if not python_exe.exists(): return False, f"Portable python not found: `{python_exe}`" if not main_py.exists(): return False, f"ComfyUI main.py not found: `{main_py}`" log_dir.mkdir(parents=True, exist_ok=True) # Optional extra args from config (accepts string or list/tuple) extra_args: list[str] = [] extra = getattr(config, "comfy_extra_args", None) try: if isinstance(extra, (list, tuple)): extra_args = [str(x) for x in extra if str(x).strip()] elif isinstance(extra, str) and extra.strip(): import shlex extra_args = shlex.split(extra) except Exception: extra_args = [] # ignore parse errors rather than aborting install # Remove any existing service cleanly before reinstalling if await _service_exists(name): await _nssm("stop", name) await _kill_service_process(name) # force-kill if stuck in STOP_PENDING rc, out = await _nssm("remove", name, "confirm") if rc != 0: return False, f"Could not remove existing service: {out}" # nssm install -s --windows-standalone-build [extra] steps: list[tuple[str, ...]] = [ ("install", name, str(python_exe), "-s", str(main_py), "--windows-standalone-build", *extra_args), ("set", name, "AppDirectory", str(portable_root)), ("set", name, "DisplayName", "ComfyUI Server"), ("set", name, "AppStdout", log_file), ("set", name, "AppStderr", log_file), ("set", name, "AppRotateFiles", "1"), ("set", name, "AppRotateBytes", max_bytes), ("set", name, "AppRotateOnline", "1"), ("set", name, "Start", "SERVICE_DEMAND_START"), # Stop behavior — prevent NSSM from hanging indefinitely ("set", name, "AppKillProcessTree", "1"), ("set", name, "AppStopMethodConsole", "1500"), ("set", name, "AppStopMethodWindow", "1500"), ("set", name, "AppStopMethodThreads", "1500"), ] for step in steps: rc, out = await _nssm(*step) if rc != 0: return False, f"`nssm {' '.join(step[:3])}` failed: {out}" return True, f"Service `{name}` installed. Log: `{log_file}`" # --------------------------------------------------------------------------- # Autostart (called from bot.py on_ready) # --------------------------------------------------------------------------- async def autostart_comfy(config) -> None: """ Ensure ComfyUI is running when the bot starts. 1. Install the NSSM service if it is missing. 2. Start the service if ComfyUI is not already responding. Does nothing if config.comfy_autostart is False. """ if not getattr(config, "comfy_autostart", True): return if not await _service_exists(config.comfy_service_name): logger.info("NSSM service '%s' not found — installing", config.comfy_service_name) ok, msg = await _install_service(config) if not ok: logger.error("Failed to install ComfyUI service: %s", msg) return logger.info("ComfyUI service installed: %s", msg) if await _is_comfy_up(config.comfy_server): logger.info("ComfyUI already running at %s", config.comfy_server) return logger.info("Starting NSSM service '%s'", config.comfy_service_name) rc, out = await _nssm("start", config.comfy_service_name) if rc != 0: logger.warning("nssm start returned %d: %s", rc, out) return for attempt in range(_MAX_ATTEMPTS): await asyncio.sleep(_POLL_INTERVAL) if await _is_comfy_up(config.comfy_server): logger.info("ComfyUI is up after ~%ds", (attempt + 1) * _POLL_INTERVAL) return logger.warning( "ComfyUI did not respond within %ds after service start", _MAX_ATTEMPTS * _POLL_INTERVAL, ) # --------------------------------------------------------------------------- # Discord commands # --------------------------------------------------------------------------- def setup_server_commands(bot, config=None): """Register ComfyUI server management commands.""" def _no_config(ctx): """Reply and return True when config is missing (guards every subcommand).""" return config is None @bot.group(name="server", invoke_without_command=True, extras={"category": "Server"}) async def server_group(ctx: commands.Context) -> None: """ComfyUI server management. Subcommands: start, stop, restart, status, install, uninstall.""" await ctx.send_help(ctx.command) @server_group.command(name="start") async def server_start(ctx: commands.Context) -> None: """Start the ComfyUI service.""" if config is None: await ctx.reply("Bot config not available.", mention_author=False) return if await _is_comfy_up(config.comfy_server): await ctx.reply("✅ ComfyUI is already running.", mention_author=False) return msg = await ctx.reply( f"⏳ Starting service `{config.comfy_service_name}`…", mention_author=False ) rc, out = await _nssm("start", config.comfy_service_name) if rc != 0: await msg.edit(content=f"❌ `{out}`") return await msg.edit(content="⏳ Waiting for ComfyUI to respond…") for attempt in range(_MAX_ATTEMPTS): await asyncio.sleep(_POLL_INTERVAL) if await _is_comfy_up(config.comfy_server): await msg.edit( content=f"✅ ComfyUI is up! (took ~{(attempt + 1) * _POLL_INTERVAL}s)" ) return await msg.edit(content="⚠️ Service started but ComfyUI did not respond within 120 seconds.") @server_group.command(name="stop") async def server_stop(ctx: commands.Context) -> None: """Stop the ComfyUI service (force-kills if graceful stop fails).""" if config is None: await ctx.reply("Bot config not available.", mention_author=False) return msg = await ctx.reply( f"⏳ Stopping service `{config.comfy_service_name}`…", mention_author=False ) rc, out = await _nssm("stop", config.comfy_service_name) if rc == 0: await msg.edit(content="✅ ComfyUI service stopped.") return # Graceful stop failed (timed out or error) — force-kill the process. await msg.edit(content="⏳ Graceful stop failed — force-killing process…") await _kill_service_process(config.comfy_service_name) await asyncio.sleep(2) state = await get_service_state(config.comfy_service_name) if state == "SERVICE_STOPPED": await msg.edit(content="✅ ComfyUI service force-killed and stopped.") else: await msg.edit( content=f"⚠️ Force-kill sent but service state is `{state}`. " f"Use `ttr!server kill` to try again." ) @server_group.command(name="kill") async def server_kill(ctx: commands.Context) -> None: """Force-kill the ComfyUI process when it is stuck in STOPPING/STOP_PENDING.""" if config is None: await ctx.reply("Bot config not available.", mention_author=False) return msg = await ctx.reply( f"⏳ Force-killing `{config.comfy_service_name}` process…", mention_author=False ) await _kill_service_process(config.comfy_service_name) await asyncio.sleep(2) state = await get_service_state(config.comfy_service_name) emoji = STATUS_EMOJI.get(state, "⚪") await msg.edit( content=f"💀 taskkill sent. Service state is now {emoji} `{state}`." ) @server_group.command(name="restart") async def server_restart(ctx: commands.Context) -> None: """Restart the ComfyUI service (force-kills if graceful stop fails).""" if config is None: await ctx.reply("Bot config not available.", mention_author=False) return msg = await ctx.reply( f"⏳ Stopping `{config.comfy_service_name}` for restart…", mention_author=False ) # Step 1: graceful stop. rc, out = await _nssm("stop", config.comfy_service_name) if rc != 0: # Stop timed out or failed — force-kill so we can start fresh. await msg.edit(content="⏳ Graceful stop failed — force-killing process…") await _kill_service_process(config.comfy_service_name) await asyncio.sleep(2) # Step 2: verify stopped before starting. state = await get_service_state(config.comfy_service_name) if state not in ("SERVICE_STOPPED", "error", "unknown", "timeout"): # Still not fully stopped — try one more force-kill. await _kill_service_process(config.comfy_service_name) await asyncio.sleep(2) # Step 3: start. await msg.edit(content=f"⏳ Starting `{config.comfy_service_name}`…") rc, out = await _nssm("start", config.comfy_service_name) if rc != 0: await msg.edit(content=f"❌ Start failed: `{out}`") return # Step 4: wait for HTTP. await msg.edit(content="⏳ Waiting for ComfyUI to come back up…") for attempt in range(_MAX_ATTEMPTS): await asyncio.sleep(_POLL_INTERVAL) if await _is_comfy_up(config.comfy_server): await msg.edit( content=f"✅ ComfyUI is back up! (took ~{(attempt + 1) * _POLL_INTERVAL}s)" ) return await msg.edit(content="⚠️ Service started but ComfyUI did not respond within 120 seconds.") @server_group.command(name="status") async def server_status(ctx: commands.Context) -> None: """Show NSSM service state and HTTP reachability.""" if config is None: await ctx.reply("Bot config not available.", mention_author=False) return state, http_up = await asyncio.gather( get_service_state(config.comfy_service_name), _is_comfy_up(config.comfy_server), ) emoji = STATUS_EMOJI.get(state, "⚪") svc_line = f"{emoji} `{state}`" http_line = ( f"🟢 Responding at `{config.comfy_server}`" if http_up else f"🔴 Not responding at `{config.comfy_server}`" ) await ctx.reply( f"**ComfyUI Server Status**\n" f"Service `{config.comfy_service_name}`: {svc_line}\n" f"HTTP: {http_line}", mention_author=False, ) @server_group.command(name="install") async def server_install(ctx: commands.Context) -> None: """(Re)install the ComfyUI NSSM service with current config settings.""" if config is None: await ctx.reply("Bot config not available.", mention_author=False) return msg = await ctx.reply( f"⏳ Installing service `{config.comfy_service_name}`…", mention_author=False ) ok, detail = await _install_service(config) await msg.edit(content=f"{'✅' if ok else '❌'} {detail}") @server_group.command(name="uninstall") async def server_uninstall(ctx: commands.Context) -> None: """Stop and remove the ComfyUI NSSM service from Windows.""" if config is None: await ctx.reply("Bot config not available.", mention_author=False) return msg = await ctx.reply( f"⏳ Removing service `{config.comfy_service_name}`…", mention_author=False ) await _nssm("stop", config.comfy_service_name) await _kill_service_process(config.comfy_service_name) rc, out = await _nssm("remove", config.comfy_service_name, "confirm") if rc == 0: await msg.edit(content=f"✅ Service `{config.comfy_service_name}` removed.") else: await msg.edit(content=f"❌ `{out}`")