peregrine/scripts/preflight.py

#!/usr/bin/env python3
"""
Peregrine preflight check.

Scans for port conflicts, assesses system resources (RAM / CPU / GPU),
recommends a Docker Compose profile, and calculates optional vLLM KV-cache
CPU offload when VRAM is tight.  Writes resolved settings to .env so docker
compose picks them up automatically.

Usage:
    python scripts/preflight.py              # full report + write .env
    python scripts/preflight.py --check-only # report only, no .env write
    python scripts/preflight.py --service streamlit  # print resolved port, exit
    python scripts/preflight.py --quiet      # machine-readable, exit 0/1

Exit codes:
  0 — all checks passed (or issues auto-resolved)
  1 — manual action required (unresolvable port conflict on external service)
"""
import argparse
import platform
import socket
import subprocess
import sys
from pathlib import Path

import yaml

ROOT = Path(__file__).parent.parent
USER_YAML = ROOT / "config" / "user.yaml"
ENV_FILE = ROOT / ".env"

# ── Port table ────────────────────────────────────────────────────────────────
# (yaml_key, default, env_var, peregrine_owns_it)
_PORTS: dict[str, tuple[str, int, str, bool]] = {
    "streamlit": ("streamlit_port",   8501, "STREAMLIT_PORT", True),
    "searxng":   ("searxng_port",     8888, "SEARXNG_PORT",   True),
    "vllm":      ("vllm_port",        8000, "VLLM_PORT",      True),
    "vision":    ("vision_port",      8002, "VISION_PORT",    True),
    "ollama":    ("ollama_port",     11434, "OLLAMA_PORT",    False),
}


# ── System probes (stdlib only — no psutil) ───────────────────────────────────

def _sh(*cmd: str, timeout: int = 5) -> str:
    try:
        r = subprocess.run(list(cmd), capture_output=True, text=True, timeout=timeout)
        return r.stdout.strip() if r.returncode == 0 else ""
    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
        return ""


def get_ram_gb() -> tuple[float, float]:
    """Return (total_gb, available_gb).  Returns (0, 0) if undetectable."""
    os_name = platform.system()
    if os_name == "Linux":
        try:
            meminfo = Path("/proc/meminfo").read_text()
        except OSError:
            return 0.0, 0.0
        total = available = 0
        for line in meminfo.splitlines():
            if line.startswith("MemTotal:"):
                total = int(line.split()[1])
            elif line.startswith("MemAvailable:"):
                available = int(line.split()[1])
        return total / 1024 / 1024, available / 1024 / 1024
    elif os_name == "Darwin":
        total_bytes = _sh("sysctl", "-n", "hw.memsize")
        total = int(total_bytes) / 1024 ** 3 if total_bytes.isdigit() else 0.0
        vm = _sh("vm_stat")
        free_pages = 0
        for line in vm.splitlines():
            if "Pages free" in line or "Pages speculative" in line:
                try:
                    free_pages += int(line.split()[-1].rstrip("."))
                except ValueError:
                    pass
        available = free_pages * 4096 / 1024 ** 3
        return total, available
    return 0.0, 0.0


def get_cpu_cores() -> int:
    import os
    return os.cpu_count() or 1


def get_gpus() -> list[dict]:
    """Return list of {name, vram_total_gb, vram_free_gb} via nvidia-smi."""
    out = _sh(
        "nvidia-smi",
        "--query-gpu=name,memory.total,memory.free",
        "--format=csv,noheader,nounits",
    )
    if not out:
        return []
    gpus = []
    for line in out.splitlines():
        parts = [p.strip() for p in line.split(",")]
        if len(parts) == 3:
            try:
                gpus.append({
                    "name": parts[0],
                    "vram_total_gb": round(int(parts[1]) / 1024, 1),
                    "vram_free_gb":  round(int(parts[2]) / 1024, 1),
                })
            except ValueError:
                pass
    return gpus


# ── Port probes ───────────────────────────────────────────────────────────────

def _load_svc() -> dict:
    if USER_YAML.exists():
        return (yaml.safe_load(USER_YAML.read_text()) or {}).get("services", {})
    return {}


def is_port_free(port: int) -> bool:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.settimeout(0.3)
        return s.connect_ex(("127.0.0.1", port)) != 0


def find_free_port(start: int, limit: int = 30) -> int:
    for p in range(start, start + limit):
        if is_port_free(p):
            return p
    raise RuntimeError(f"No free port found in range {start}–{start + limit - 1}")


def check_ports(svc: dict) -> dict[str, dict]:
    results = {}
    for name, (yaml_key, default, env_var, owned) in _PORTS.items():
        configured = int(svc.get(yaml_key, default))
        free = is_port_free(configured)
        resolved = configured if (free or not owned) else find_free_port(configured + 1)
        results[name] = {
            "configured": configured,
            "resolved":   resolved,
            "changed":    resolved != configured,
            "owned":      owned,
            "free":       free,
            "env_var":    env_var,
        }
    return results


# ── Recommendations ───────────────────────────────────────────────────────────

def recommend_profile(gpus: list[dict], ram_total_gb: float) -> str:
    if len(gpus) >= 2:
        return "dual-gpu"
    if len(gpus) == 1:
        return "single-gpu"
    if ram_total_gb >= 8:
        return "cpu"
    return "remote"


def calc_cpu_offload_gb(gpus: list[dict], ram_available_gb: float) -> int:
    """
    Suggest GBs of KV cache to offload from GPU VRAM → system RAM.

    Enabled when VRAM is tight (< 10 GB free on any GPU) and there is
    enough RAM headroom (> 4 GB available).  Uses at most 25% of the
    RAM headroom above 4 GB, capped at 8 GB.
    """
    if not gpus or ram_available_gb < 4:
        return 0
    min_vram_free = min(g["vram_free_gb"] for g in gpus)
    if min_vram_free >= 10:
        return 0
    headroom = ram_available_gb - 4.0  # reserve 4 GB for OS
    return min(int(headroom * 0.25), 8)


# ── .env writer ───────────────────────────────────────────────────────────────

def write_env(updates: dict[str, str]) -> None:
    existing: dict[str, str] = {}
    if ENV_FILE.exists():
        for line in ENV_FILE.read_text().splitlines():
            line = line.strip()
            if "=" in line and not line.startswith("#"):
                k, _, v = line.partition("=")
                existing[k.strip()] = v.strip()
    existing.update(updates)
    ENV_FILE.write_text(
        "\n".join(f"{k}={v}" for k, v in sorted(existing.items())) + "\n"
    )


# ── Main ──────────────────────────────────────────────────────────────────────

def main() -> None:
    parser = argparse.ArgumentParser(description="Peregrine preflight check")
    parser.add_argument("--check-only", action="store_true",
                        help="Print report; don't write .env")
    parser.add_argument("--quiet", action="store_true",
                        help="Suppress output; rely on exit code")
    parser.add_argument("--service", metavar="NAME",
                        help="Print resolved port for one service and exit (e.g. streamlit)")
    args = parser.parse_args()

    svc = _load_svc()
    ports = check_ports(svc)

    # Single-service mode — used by manage-ui.sh
    if args.service:
        info = ports.get(args.service.lower())
        print(info["resolved"] if info else _PORTS[args.service.lower()][1])
        return

    ram_total, ram_avail = get_ram_gb()
    cpu_cores = get_cpu_cores()
    gpus = get_gpus()
    profile = recommend_profile(gpus, ram_total)
    offload_gb = calc_cpu_offload_gb(gpus, ram_avail)

    if not args.quiet:
        reassigned = [n for n, i in ports.items() if i["changed"]]
        unresolved  = [n for n, i in ports.items() if not i["free"] and not i["changed"]]

        print("╔══ Peregrine Preflight ══════════════════════════════╗")
        print("║")
        print("║  Ports")
        for name, info in ports.items():
            tag = "owned " if info["owned"] else "extern"
            if not info["owned"]:
                # external: in-use means the service is reachable
                status = "✓ reachable" if not info["free"] else "⚠ not responding"
            elif info["free"]:
                status = "✓ free"
            elif info["changed"]:
                status = f"→ reassigned to :{info['resolved']}"
            else:
                status = "⚠ in use"
            print(f"║    {name:<10} :{info['configured']}  [{tag}]  {status}")

        print("║")
        print("║  Resources")
        print(f"║    CPU      {cpu_cores} core{'s' if cpu_cores != 1 else ''}")
        if ram_total:
            print(f"║    RAM      {ram_total:.0f} GB total  /  {ram_avail:.1f} GB available")
        else:
            print("║    RAM      (undetectable)")
        if gpus:
            for i, g in enumerate(gpus):
                print(f"║    GPU {i}    {g['name']}  —  "
                      f"{g['vram_free_gb']:.1f} / {g['vram_total_gb']:.0f} GB VRAM free")
        else:
            print("║    GPU      none detected")

        print("║")
        print("║  Recommendations")
        print(f"║    Docker profile   {profile}")
        if offload_gb > 0:
            print(f"║    vLLM KV offload  {offload_gb} GB → RAM  (CPU_OFFLOAD_GB={offload_gb})")
        else:
            print("║    vLLM KV offload  not needed")

        if reassigned:
            print("║")
            print("║  Port reassignments written to .env:")
            for name in reassigned:
                info = ports[name]
                print(f"║    {info['env_var']}={info['resolved']}  (was :{info['configured']})")

        # External services: in-use = ✓ running; free = warn (may be down)
        ext_down = [n for n, i in ports.items() if not i["owned"] and i["free"]]
        if ext_down:
            print("║")
            print("║  ⚠  External services not detected on configured port:")
            for name in ext_down:
                info = ports[name]
                svc_key = _PORTS[name][0]
                print(f"║    {name} :{info['configured']} — nothing listening "
                      f"(start the service or update services.{svc_key} in user.yaml)")

        print("╚════════════════════════════════════════════════════╝")

    if not args.check_only:
        env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()}
        env_updates["RECOMMENDED_PROFILE"] = profile
        if offload_gb > 0:
            env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
        write_env(env_updates)
        if not args.quiet:
            print(f"  wrote {ENV_FILE.relative_to(ROOT)}")

    # Fail only when an owned port can't be resolved (shouldn't happen in practice)
    owned_stuck = [n for n, i in ports.items() if i["owned"] and not i["free"] and not i["changed"]]
    sys.exit(1 if owned_stuck else 0)


if __name__ == "__main__":
    main()