From 236db81ed35b68781465cc2b769521d54ef7527c Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Tue, 24 Feb 2026 20:36:16 -0800 Subject: [PATCH] =?UTF-8?q?feat:=20startup=20preflight=20=E2=80=94=20port?= =?UTF-8?q?=20collision=20avoidance=20+=20resource=20checks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scripts/preflight.py (stdlib-only, no psutil): - Port probing: owned services auto-reassign to next free port; external services (Ollama) show ✓ reachable / ⚠ not responding - System resources: CPU cores, RAM (total + available), GPU VRAM via nvidia-smi; works on Linux + macOS - Profile recommendation: remote / cpu / single-gpu / dual-gpu - vLLM KV cache offload: calculates CPU_OFFLOAD_GB when VRAM < 10 GB free and RAM headroom > 4 GB (uses up to 25% of available headroom) - Writes resolved values to .env for docker compose; single-service mode (--service streamlit) for scripted port queries - Exit 0 unless an owned port genuinely can't be resolved scripts/manage-ui.sh: - Calls preflight.py --service streamlit before bind; falls back to pure-bash port scan if Python/yaml unavailable compose.yml: - vllm command: adds --cpu-offload-gb ${CPU_OFFLOAD_GB:-0} Makefile: - start / restart depend on preflight target - PYTHON variable for env portability - test target uses PYTHON variable Co-Authored-By: Claude Sonnet 4.6 --- Makefile | 14 +- compose.yml | 1 + scripts/manage-ui.sh | 28 ++++ scripts/preflight.py | 301 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 339 insertions(+), 5 deletions(-) create mode 100644 scripts/preflight.py diff --git a/Makefile b/Makefile index 1092cda..f3694a8 100644 --- a/Makefile +++ b/Makefile @@ -1,27 +1,31 @@ # Makefile — Peregrine convenience targets # Usage: make -.PHONY: setup start stop restart logs test clean +.PHONY: setup preflight start stop restart logs test clean help PROFILE ?= remote +PYTHON ?= python3 setup: ## Install dependencies (Docker, NVIDIA toolkit) @bash setup.sh -start: ## Start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu) +preflight: ## Check ports + system resources; write .env + @$(PYTHON) scripts/preflight.py + +start: preflight ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu) docker compose --profile $(PROFILE) up -d stop: ## Stop all Peregrine services docker compose down -restart: ## Restart all services +restart: preflight ## Preflight check then restart all services docker compose down && docker compose --profile $(PROFILE) up -d logs: ## Tail app logs docker compose logs -f app -test: ## Run the test suite (requires conda env) - /devl/miniconda3/envs/job-seeker/bin/pytest tests/ -v +test: ## Run the test suite + $(PYTHON) -m pytest tests/ -v clean: ## Remove containers, images, and data volumes (DESTRUCTIVE) @echo "WARNING: This will delete all Peregrine containers and data." diff --git a/compose.yml b/compose.yml index c968ff4..79d8ba2 100644 --- a/compose.yml +++ b/compose.yml @@ -91,6 +91,7 @@ services: --gpu-memory-utilization 0.75 --enforce-eager --max-num-seqs 8 + --cpu-offload-gb ${CPU_OFFLOAD_GB:-0} deploy: resources: reservations: diff --git a/scripts/manage-ui.sh b/scripts/manage-ui.sh index b676a9f..ea8a60d 100755 --- a/scripts/manage-ui.sh +++ b/scripts/manage-ui.sh @@ -22,12 +22,40 @@ PID_FILE="$REPO_DIR/.streamlit.pid" LOG_FILE="$REPO_DIR/.streamlit.log" PORT="${STREAMLIT_PORT:-8501}" +_resolve_port() { + # Ask preflight.py for the next free port near the configured port. + # Falls back to a pure-bash scan if Python/yaml is not available. + local python_bin + for python_bin in python3 python; do + if command -v "$python_bin" &>/dev/null && \ + "$python_bin" -c "import yaml" &>/dev/null 2>&1; then + local resolved + resolved=$("$python_bin" "$REPO_DIR/scripts/preflight.py" --service streamlit 2>/dev/null) + if [[ -n "$resolved" && "$resolved" =~ ^[0-9]+$ ]]; then + echo "$resolved"; return + fi + fi + done + # Pure-bash fallback: scan for a free port + local p="$PORT" + while (echo >/dev/tcp/127.0.0.1/"$p") 2>/dev/null; do + ((p++)) + [[ $p -gt $((PORT + 20)) ]] && break + done + echo "$p" +} + start() { if is_running; then echo "Already running (PID $(cat "$PID_FILE")). Use 'restart' to reload." return 0 fi + PORT=$(_resolve_port) + if [[ "$PORT" != "${STREAMLIT_PORT:-8501}" ]]; then + echo "Port ${STREAMLIT_PORT:-8501} in use — using $PORT instead." + fi + echo "Starting Streamlit on http://localhost:$PORT …" "$STREAMLIT_BIN" run "$APP_ENTRY" \ --server.port "$PORT" \ diff --git a/scripts/preflight.py b/scripts/preflight.py new file mode 100644 index 0000000..cb8b873 --- /dev/null +++ b/scripts/preflight.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +""" +Peregrine preflight check. + +Scans for port conflicts, assesses system resources (RAM / CPU / GPU), +recommends a Docker Compose profile, and calculates optional vLLM KV-cache +CPU offload when VRAM is tight. Writes resolved settings to .env so docker +compose picks them up automatically. + +Usage: + python scripts/preflight.py # full report + write .env + python scripts/preflight.py --check-only # report only, no .env write + python scripts/preflight.py --service streamlit # print resolved port, exit + python scripts/preflight.py --quiet # machine-readable, exit 0/1 + +Exit codes: + 0 — all checks passed (or issues auto-resolved) + 1 — manual action required (unresolvable port conflict on external service) +""" +import argparse +import platform +import socket +import subprocess +import sys +from pathlib import Path + +import yaml + +ROOT = Path(__file__).parent.parent +USER_YAML = ROOT / "config" / "user.yaml" +ENV_FILE = ROOT / ".env" + +# ── Port table ──────────────────────────────────────────────────────────────── +# (yaml_key, default, env_var, peregrine_owns_it) +_PORTS: dict[str, tuple[str, int, str, bool]] = { + "streamlit": ("streamlit_port", 8501, "STREAMLIT_PORT", True), + "searxng": ("searxng_port", 8888, "SEARXNG_PORT", True), + "vllm": ("vllm_port", 8000, "VLLM_PORT", True), + "vision": ("vision_port", 8002, "VISION_PORT", True), + "ollama": ("ollama_port", 11434, "OLLAMA_PORT", False), +} + + +# ── System probes (stdlib only — no psutil) ─────────────────────────────────── + +def _sh(*cmd: str, timeout: int = 5) -> str: + try: + r = subprocess.run(list(cmd), capture_output=True, text=True, timeout=timeout) + return r.stdout.strip() if r.returncode == 0 else "" + except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + return "" + + +def get_ram_gb() -> tuple[float, float]: + """Return (total_gb, available_gb). Returns (0, 0) if undetectable.""" + os_name = platform.system() + if os_name == "Linux": + try: + meminfo = Path("/proc/meminfo").read_text() + except OSError: + return 0.0, 0.0 + total = available = 0 + for line in meminfo.splitlines(): + if line.startswith("MemTotal:"): + total = int(line.split()[1]) + elif line.startswith("MemAvailable:"): + available = int(line.split()[1]) + return total / 1024 / 1024, available / 1024 / 1024 + elif os_name == "Darwin": + total_bytes = _sh("sysctl", "-n", "hw.memsize") + total = int(total_bytes) / 1024 ** 3 if total_bytes.isdigit() else 0.0 + vm = _sh("vm_stat") + free_pages = 0 + for line in vm.splitlines(): + if "Pages free" in line or "Pages speculative" in line: + try: + free_pages += int(line.split()[-1].rstrip(".")) + except ValueError: + pass + available = free_pages * 4096 / 1024 ** 3 + return total, available + return 0.0, 0.0 + + +def get_cpu_cores() -> int: + import os + return os.cpu_count() or 1 + + +def get_gpus() -> list[dict]: + """Return list of {name, vram_total_gb, vram_free_gb} via nvidia-smi.""" + out = _sh( + "nvidia-smi", + "--query-gpu=name,memory.total,memory.free", + "--format=csv,noheader,nounits", + ) + if not out: + return [] + gpus = [] + for line in out.splitlines(): + parts = [p.strip() for p in line.split(",")] + if len(parts) == 3: + try: + gpus.append({ + "name": parts[0], + "vram_total_gb": round(int(parts[1]) / 1024, 1), + "vram_free_gb": round(int(parts[2]) / 1024, 1), + }) + except ValueError: + pass + return gpus + + +# ── Port probes ─────────────────────────────────────────────────────────────── + +def _load_svc() -> dict: + if USER_YAML.exists(): + return (yaml.safe_load(USER_YAML.read_text()) or {}).get("services", {}) + return {} + + +def is_port_free(port: int) -> bool: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(0.3) + return s.connect_ex(("127.0.0.1", port)) != 0 + + +def find_free_port(start: int, limit: int = 30) -> int: + for p in range(start, start + limit): + if is_port_free(p): + return p + raise RuntimeError(f"No free port found in range {start}–{start + limit - 1}") + + +def check_ports(svc: dict) -> dict[str, dict]: + results = {} + for name, (yaml_key, default, env_var, owned) in _PORTS.items(): + configured = int(svc.get(yaml_key, default)) + free = is_port_free(configured) + resolved = configured if (free or not owned) else find_free_port(configured + 1) + results[name] = { + "configured": configured, + "resolved": resolved, + "changed": resolved != configured, + "owned": owned, + "free": free, + "env_var": env_var, + } + return results + + +# ── Recommendations ─────────────────────────────────────────────────────────── + +def recommend_profile(gpus: list[dict], ram_total_gb: float) -> str: + if len(gpus) >= 2: + return "dual-gpu" + if len(gpus) == 1: + return "single-gpu" + if ram_total_gb >= 8: + return "cpu" + return "remote" + + +def calc_cpu_offload_gb(gpus: list[dict], ram_available_gb: float) -> int: + """ + Suggest GBs of KV cache to offload from GPU VRAM → system RAM. + + Enabled when VRAM is tight (< 10 GB free on any GPU) and there is + enough RAM headroom (> 4 GB available). Uses at most 25% of the + RAM headroom above 4 GB, capped at 8 GB. + """ + if not gpus or ram_available_gb < 4: + return 0 + min_vram_free = min(g["vram_free_gb"] for g in gpus) + if min_vram_free >= 10: + return 0 + headroom = ram_available_gb - 4.0 # reserve 4 GB for OS + return min(int(headroom * 0.25), 8) + + +# ── .env writer ─────────────────────────────────────────────────────────────── + +def write_env(updates: dict[str, str]) -> None: + existing: dict[str, str] = {} + if ENV_FILE.exists(): + for line in ENV_FILE.read_text().splitlines(): + line = line.strip() + if "=" in line and not line.startswith("#"): + k, _, v = line.partition("=") + existing[k.strip()] = v.strip() + existing.update(updates) + ENV_FILE.write_text( + "\n".join(f"{k}={v}" for k, v in sorted(existing.items())) + "\n" + ) + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + parser = argparse.ArgumentParser(description="Peregrine preflight check") + parser.add_argument("--check-only", action="store_true", + help="Print report; don't write .env") + parser.add_argument("--quiet", action="store_true", + help="Suppress output; rely on exit code") + parser.add_argument("--service", metavar="NAME", + help="Print resolved port for one service and exit (e.g. streamlit)") + args = parser.parse_args() + + svc = _load_svc() + ports = check_ports(svc) + + # Single-service mode — used by manage-ui.sh + if args.service: + info = ports.get(args.service.lower()) + print(info["resolved"] if info else _PORTS[args.service.lower()][1]) + return + + ram_total, ram_avail = get_ram_gb() + cpu_cores = get_cpu_cores() + gpus = get_gpus() + profile = recommend_profile(gpus, ram_total) + offload_gb = calc_cpu_offload_gb(gpus, ram_avail) + + if not args.quiet: + reassigned = [n for n, i in ports.items() if i["changed"]] + unresolved = [n for n, i in ports.items() if not i["free"] and not i["changed"]] + + print("╔══ Peregrine Preflight ══════════════════════════════╗") + print("║") + print("║ Ports") + for name, info in ports.items(): + tag = "owned " if info["owned"] else "extern" + if not info["owned"]: + # external: in-use means the service is reachable + status = "✓ reachable" if not info["free"] else "⚠ not responding" + elif info["free"]: + status = "✓ free" + elif info["changed"]: + status = f"→ reassigned to :{info['resolved']}" + else: + status = "⚠ in use" + print(f"║ {name:<10} :{info['configured']} [{tag}] {status}") + + print("║") + print("║ Resources") + print(f"║ CPU {cpu_cores} core{'s' if cpu_cores != 1 else ''}") + if ram_total: + print(f"║ RAM {ram_total:.0f} GB total / {ram_avail:.1f} GB available") + else: + print("║ RAM (undetectable)") + if gpus: + for i, g in enumerate(gpus): + print(f"║ GPU {i} {g['name']} — " + f"{g['vram_free_gb']:.1f} / {g['vram_total_gb']:.0f} GB VRAM free") + else: + print("║ GPU none detected") + + print("║") + print("║ Recommendations") + print(f"║ Docker profile {profile}") + if offload_gb > 0: + print(f"║ vLLM KV offload {offload_gb} GB → RAM (CPU_OFFLOAD_GB={offload_gb})") + else: + print("║ vLLM KV offload not needed") + + if reassigned: + print("║") + print("║ Port reassignments written to .env:") + for name in reassigned: + info = ports[name] + print(f"║ {info['env_var']}={info['resolved']} (was :{info['configured']})") + + # External services: in-use = ✓ running; free = warn (may be down) + ext_down = [n for n, i in ports.items() if not i["owned"] and i["free"]] + if ext_down: + print("║") + print("║ ⚠ External services not detected on configured port:") + for name in ext_down: + info = ports[name] + svc_key = _PORTS[name][0] + print(f"║ {name} :{info['configured']} — nothing listening " + f"(start the service or update services.{svc_key} in user.yaml)") + + print("╚════════════════════════════════════════════════════╝") + + if not args.check_only: + env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()} + env_updates["RECOMMENDED_PROFILE"] = profile + if offload_gb > 0: + env_updates["CPU_OFFLOAD_GB"] = str(offload_gb) + write_env(env_updates) + if not args.quiet: + print(f" wrote {ENV_FILE.relative_to(ROOT)}") + + # Fail only when an owned port can't be resolved (shouldn't happen in practice) + owned_stuck = [n for n, i in ports.items() if i["owned"] and not i["free"] and not i["changed"]] + sys.exit(1 if owned_stuck else 0) + + +if __name__ == "__main__": + main()