peregrine/scripts/preflight.py
pyr0ball 56fb386225 feat: startup preflight — port collision avoidance + resource checks
scripts/preflight.py (stdlib-only, no psutil):
- Port probing: owned services auto-reassign to next free port; external
  services (Ollama) show ✓ reachable / ⚠ not responding
- System resources: CPU cores, RAM (total + available), GPU VRAM via
  nvidia-smi; works on Linux + macOS
- Profile recommendation: remote / cpu / single-gpu / dual-gpu
- vLLM KV cache offload: calculates CPU_OFFLOAD_GB when VRAM < 10 GB
  free and RAM headroom > 4 GB (uses up to 25% of available headroom)
- Writes resolved values to .env for docker compose; single-service mode
  (--service streamlit) for scripted port queries
- Exit 0 unless an owned port genuinely can't be resolved

scripts/manage-ui.sh:
- Calls preflight.py --service streamlit before bind; falls back to
  pure-bash port scan if Python/yaml unavailable

compose.yml:
- vllm command: adds --cpu-offload-gb ${CPU_OFFLOAD_GB:-0}

Makefile:
- start / restart depend on preflight target
- PYTHON variable for env portability
- test target uses PYTHON variable

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 20:36:16 -08:00

301 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Peregrine preflight check.
Scans for port conflicts, assesses system resources (RAM / CPU / GPU),
recommends a Docker Compose profile, and calculates optional vLLM KV-cache
CPU offload when VRAM is tight. Writes resolved settings to .env so docker
compose picks them up automatically.
Usage:
python scripts/preflight.py # full report + write .env
python scripts/preflight.py --check-only # report only, no .env write
python scripts/preflight.py --service streamlit # print resolved port, exit
python scripts/preflight.py --quiet # machine-readable, exit 0/1
Exit codes:
0 — all checks passed (or issues auto-resolved)
1 — manual action required (unresolvable port conflict on external service)
"""
import argparse
import platform
import socket
import subprocess
import sys
from pathlib import Path
import yaml
ROOT = Path(__file__).parent.parent
USER_YAML = ROOT / "config" / "user.yaml"
ENV_FILE = ROOT / ".env"
# ── Port table ────────────────────────────────────────────────────────────────
# (yaml_key, default, env_var, peregrine_owns_it)
_PORTS: dict[str, tuple[str, int, str, bool]] = {
"streamlit": ("streamlit_port", 8501, "STREAMLIT_PORT", True),
"searxng": ("searxng_port", 8888, "SEARXNG_PORT", True),
"vllm": ("vllm_port", 8000, "VLLM_PORT", True),
"vision": ("vision_port", 8002, "VISION_PORT", True),
"ollama": ("ollama_port", 11434, "OLLAMA_PORT", False),
}
# ── System probes (stdlib only — no psutil) ───────────────────────────────────
def _sh(*cmd: str, timeout: int = 5) -> str:
try:
r = subprocess.run(list(cmd), capture_output=True, text=True, timeout=timeout)
return r.stdout.strip() if r.returncode == 0 else ""
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
return ""
def get_ram_gb() -> tuple[float, float]:
"""Return (total_gb, available_gb). Returns (0, 0) if undetectable."""
os_name = platform.system()
if os_name == "Linux":
try:
meminfo = Path("/proc/meminfo").read_text()
except OSError:
return 0.0, 0.0
total = available = 0
for line in meminfo.splitlines():
if line.startswith("MemTotal:"):
total = int(line.split()[1])
elif line.startswith("MemAvailable:"):
available = int(line.split()[1])
return total / 1024 / 1024, available / 1024 / 1024
elif os_name == "Darwin":
total_bytes = _sh("sysctl", "-n", "hw.memsize")
total = int(total_bytes) / 1024 ** 3 if total_bytes.isdigit() else 0.0
vm = _sh("vm_stat")
free_pages = 0
for line in vm.splitlines():
if "Pages free" in line or "Pages speculative" in line:
try:
free_pages += int(line.split()[-1].rstrip("."))
except ValueError:
pass
available = free_pages * 4096 / 1024 ** 3
return total, available
return 0.0, 0.0
def get_cpu_cores() -> int:
import os
return os.cpu_count() or 1
def get_gpus() -> list[dict]:
"""Return list of {name, vram_total_gb, vram_free_gb} via nvidia-smi."""
out = _sh(
"nvidia-smi",
"--query-gpu=name,memory.total,memory.free",
"--format=csv,noheader,nounits",
)
if not out:
return []
gpus = []
for line in out.splitlines():
parts = [p.strip() for p in line.split(",")]
if len(parts) == 3:
try:
gpus.append({
"name": parts[0],
"vram_total_gb": round(int(parts[1]) / 1024, 1),
"vram_free_gb": round(int(parts[2]) / 1024, 1),
})
except ValueError:
pass
return gpus
# ── Port probes ───────────────────────────────────────────────────────────────
def _load_svc() -> dict:
if USER_YAML.exists():
return (yaml.safe_load(USER_YAML.read_text()) or {}).get("services", {})
return {}
def is_port_free(port: int) -> bool:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(0.3)
return s.connect_ex(("127.0.0.1", port)) != 0
def find_free_port(start: int, limit: int = 30) -> int:
for p in range(start, start + limit):
if is_port_free(p):
return p
raise RuntimeError(f"No free port found in range {start}{start + limit - 1}")
def check_ports(svc: dict) -> dict[str, dict]:
results = {}
for name, (yaml_key, default, env_var, owned) in _PORTS.items():
configured = int(svc.get(yaml_key, default))
free = is_port_free(configured)
resolved = configured if (free or not owned) else find_free_port(configured + 1)
results[name] = {
"configured": configured,
"resolved": resolved,
"changed": resolved != configured,
"owned": owned,
"free": free,
"env_var": env_var,
}
return results
# ── Recommendations ───────────────────────────────────────────────────────────
def recommend_profile(gpus: list[dict], ram_total_gb: float) -> str:
if len(gpus) >= 2:
return "dual-gpu"
if len(gpus) == 1:
return "single-gpu"
if ram_total_gb >= 8:
return "cpu"
return "remote"
def calc_cpu_offload_gb(gpus: list[dict], ram_available_gb: float) -> int:
"""
Suggest GBs of KV cache to offload from GPU VRAM → system RAM.
Enabled when VRAM is tight (< 10 GB free on any GPU) and there is
enough RAM headroom (> 4 GB available). Uses at most 25% of the
RAM headroom above 4 GB, capped at 8 GB.
"""
if not gpus or ram_available_gb < 4:
return 0
min_vram_free = min(g["vram_free_gb"] for g in gpus)
if min_vram_free >= 10:
return 0
headroom = ram_available_gb - 4.0 # reserve 4 GB for OS
return min(int(headroom * 0.25), 8)
# ── .env writer ───────────────────────────────────────────────────────────────
def write_env(updates: dict[str, str]) -> None:
existing: dict[str, str] = {}
if ENV_FILE.exists():
for line in ENV_FILE.read_text().splitlines():
line = line.strip()
if "=" in line and not line.startswith("#"):
k, _, v = line.partition("=")
existing[k.strip()] = v.strip()
existing.update(updates)
ENV_FILE.write_text(
"\n".join(f"{k}={v}" for k, v in sorted(existing.items())) + "\n"
)
# ── Main ──────────────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(description="Peregrine preflight check")
parser.add_argument("--check-only", action="store_true",
help="Print report; don't write .env")
parser.add_argument("--quiet", action="store_true",
help="Suppress output; rely on exit code")
parser.add_argument("--service", metavar="NAME",
help="Print resolved port for one service and exit (e.g. streamlit)")
args = parser.parse_args()
svc = _load_svc()
ports = check_ports(svc)
# Single-service mode — used by manage-ui.sh
if args.service:
info = ports.get(args.service.lower())
print(info["resolved"] if info else _PORTS[args.service.lower()][1])
return
ram_total, ram_avail = get_ram_gb()
cpu_cores = get_cpu_cores()
gpus = get_gpus()
profile = recommend_profile(gpus, ram_total)
offload_gb = calc_cpu_offload_gb(gpus, ram_avail)
if not args.quiet:
reassigned = [n for n, i in ports.items() if i["changed"]]
unresolved = [n for n, i in ports.items() if not i["free"] and not i["changed"]]
print("╔══ Peregrine Preflight ══════════════════════════════╗")
print("")
print("║ Ports")
for name, info in ports.items():
tag = "owned " if info["owned"] else "extern"
if not info["owned"]:
# external: in-use means the service is reachable
status = "✓ reachable" if not info["free"] else "⚠ not responding"
elif info["free"]:
status = "✓ free"
elif info["changed"]:
status = f"→ reassigned to :{info['resolved']}"
else:
status = "⚠ in use"
print(f"{name:<10} :{info['configured']} [{tag}] {status}")
print("")
print("║ Resources")
print(f"║ CPU {cpu_cores} core{'s' if cpu_cores != 1 else ''}")
if ram_total:
print(f"║ RAM {ram_total:.0f} GB total / {ram_avail:.1f} GB available")
else:
print("║ RAM (undetectable)")
if gpus:
for i, g in enumerate(gpus):
print(f"║ GPU {i} {g['name']}"
f"{g['vram_free_gb']:.1f} / {g['vram_total_gb']:.0f} GB VRAM free")
else:
print("║ GPU none detected")
print("")
print("║ Recommendations")
print(f"║ Docker profile {profile}")
if offload_gb > 0:
print(f"║ vLLM KV offload {offload_gb} GB → RAM (CPU_OFFLOAD_GB={offload_gb})")
else:
print("║ vLLM KV offload not needed")
if reassigned:
print("")
print("║ Port reassignments written to .env:")
for name in reassigned:
info = ports[name]
print(f"{info['env_var']}={info['resolved']} (was :{info['configured']})")
# External services: in-use = ✓ running; free = warn (may be down)
ext_down = [n for n, i in ports.items() if not i["owned"] and i["free"]]
if ext_down:
print("")
print("║ ⚠ External services not detected on configured port:")
for name in ext_down:
info = ports[name]
svc_key = _PORTS[name][0]
print(f"{name} :{info['configured']} — nothing listening "
f"(start the service or update services.{svc_key} in user.yaml)")
print("╚════════════════════════════════════════════════════╝")
if not args.check_only:
env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()}
env_updates["RECOMMENDED_PROFILE"] = profile
if offload_gb > 0:
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
write_env(env_updates)
if not args.quiet:
print(f" wrote {ENV_FILE.relative_to(ROOT)}")
# Fail only when an owned port can't be resolved (shouldn't happen in practice)
owned_stuck = [n for n, i in ports.items() if i["owned"] and not i["free"] and not i["changed"]]
sys.exit(1 if owned_stuck else 0)
if __name__ == "__main__":
main()