feat: startup preflight — port collision avoidance + resource checks
scripts/preflight.py (stdlib-only, no psutil):
- Port probing: owned services auto-reassign to next free port; external
services (Ollama) show ✓ reachable / ⚠ not responding
- System resources: CPU cores, RAM (total + available), GPU VRAM via
nvidia-smi; works on Linux + macOS
- Profile recommendation: remote / cpu / single-gpu / dual-gpu
- vLLM KV cache offload: calculates CPU_OFFLOAD_GB when VRAM < 10 GB
free and RAM headroom > 4 GB (uses up to 25% of available headroom)
- Writes resolved values to .env for docker compose; single-service mode
(--service streamlit) for scripted port queries
- Exit 0 unless an owned port genuinely can't be resolved
scripts/manage-ui.sh:
- Calls preflight.py --service streamlit before bind; falls back to
pure-bash port scan if Python/yaml unavailable
compose.yml:
- vllm command: adds --cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
Makefile:
- start / restart depend on preflight target
- PYTHON variable for env portability
- test target uses PYTHON variable
This commit is contained in:
parent
4841b211ea
commit
78917c8460
4 changed files with 339 additions and 5 deletions
14
Makefile
14
Makefile
|
|
@ -1,27 +1,31 @@
|
||||||
# Makefile — Peregrine convenience targets
|
# Makefile — Peregrine convenience targets
|
||||||
# Usage: make <target>
|
# Usage: make <target>
|
||||||
|
|
||||||
.PHONY: setup start stop restart logs test clean
|
.PHONY: setup preflight start stop restart logs test clean help
|
||||||
|
|
||||||
PROFILE ?= remote
|
PROFILE ?= remote
|
||||||
|
PYTHON ?= python3
|
||||||
|
|
||||||
setup: ## Install dependencies (Docker, NVIDIA toolkit)
|
setup: ## Install dependencies (Docker, NVIDIA toolkit)
|
||||||
@bash setup.sh
|
@bash setup.sh
|
||||||
|
|
||||||
start: ## Start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
|
preflight: ## Check ports + system resources; write .env
|
||||||
|
@$(PYTHON) scripts/preflight.py
|
||||||
|
|
||||||
|
start: preflight ## Preflight check then start Peregrine (PROFILE=remote|cpu|single-gpu|dual-gpu)
|
||||||
docker compose --profile $(PROFILE) up -d
|
docker compose --profile $(PROFILE) up -d
|
||||||
|
|
||||||
stop: ## Stop all Peregrine services
|
stop: ## Stop all Peregrine services
|
||||||
docker compose down
|
docker compose down
|
||||||
|
|
||||||
restart: ## Restart all services
|
restart: preflight ## Preflight check then restart all services
|
||||||
docker compose down && docker compose --profile $(PROFILE) up -d
|
docker compose down && docker compose --profile $(PROFILE) up -d
|
||||||
|
|
||||||
logs: ## Tail app logs
|
logs: ## Tail app logs
|
||||||
docker compose logs -f app
|
docker compose logs -f app
|
||||||
|
|
||||||
test: ## Run the test suite (requires conda env)
|
test: ## Run the test suite
|
||||||
/devl/miniconda3/envs/job-seeker/bin/pytest tests/ -v
|
$(PYTHON) -m pytest tests/ -v
|
||||||
|
|
||||||
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
|
clean: ## Remove containers, images, and data volumes (DESTRUCTIVE)
|
||||||
@echo "WARNING: This will delete all Peregrine containers and data."
|
@echo "WARNING: This will delete all Peregrine containers and data."
|
||||||
|
|
|
||||||
|
|
@ -91,6 +91,7 @@ services:
|
||||||
--gpu-memory-utilization 0.75
|
--gpu-memory-utilization 0.75
|
||||||
--enforce-eager
|
--enforce-eager
|
||||||
--max-num-seqs 8
|
--max-num-seqs 8
|
||||||
|
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
reservations:
|
reservations:
|
||||||
|
|
|
||||||
|
|
@ -22,12 +22,40 @@ PID_FILE="$REPO_DIR/.streamlit.pid"
|
||||||
LOG_FILE="$REPO_DIR/.streamlit.log"
|
LOG_FILE="$REPO_DIR/.streamlit.log"
|
||||||
PORT="${STREAMLIT_PORT:-8501}"
|
PORT="${STREAMLIT_PORT:-8501}"
|
||||||
|
|
||||||
|
_resolve_port() {
|
||||||
|
# Ask preflight.py for the next free port near the configured port.
|
||||||
|
# Falls back to a pure-bash scan if Python/yaml is not available.
|
||||||
|
local python_bin
|
||||||
|
for python_bin in python3 python; do
|
||||||
|
if command -v "$python_bin" &>/dev/null && \
|
||||||
|
"$python_bin" -c "import yaml" &>/dev/null 2>&1; then
|
||||||
|
local resolved
|
||||||
|
resolved=$("$python_bin" "$REPO_DIR/scripts/preflight.py" --service streamlit 2>/dev/null)
|
||||||
|
if [[ -n "$resolved" && "$resolved" =~ ^[0-9]+$ ]]; then
|
||||||
|
echo "$resolved"; return
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
# Pure-bash fallback: scan for a free port
|
||||||
|
local p="$PORT"
|
||||||
|
while (echo >/dev/tcp/127.0.0.1/"$p") 2>/dev/null; do
|
||||||
|
((p++))
|
||||||
|
[[ $p -gt $((PORT + 20)) ]] && break
|
||||||
|
done
|
||||||
|
echo "$p"
|
||||||
|
}
|
||||||
|
|
||||||
start() {
|
start() {
|
||||||
if is_running; then
|
if is_running; then
|
||||||
echo "Already running (PID $(cat "$PID_FILE")). Use 'restart' to reload."
|
echo "Already running (PID $(cat "$PID_FILE")). Use 'restart' to reload."
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
PORT=$(_resolve_port)
|
||||||
|
if [[ "$PORT" != "${STREAMLIT_PORT:-8501}" ]]; then
|
||||||
|
echo "Port ${STREAMLIT_PORT:-8501} in use — using $PORT instead."
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Starting Streamlit on http://localhost:$PORT …"
|
echo "Starting Streamlit on http://localhost:$PORT …"
|
||||||
"$STREAMLIT_BIN" run "$APP_ENTRY" \
|
"$STREAMLIT_BIN" run "$APP_ENTRY" \
|
||||||
--server.port "$PORT" \
|
--server.port "$PORT" \
|
||||||
|
|
|
||||||
301
scripts/preflight.py
Normal file
301
scripts/preflight.py
Normal file
|
|
@ -0,0 +1,301 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Peregrine preflight check.
|
||||||
|
|
||||||
|
Scans for port conflicts, assesses system resources (RAM / CPU / GPU),
|
||||||
|
recommends a Docker Compose profile, and calculates optional vLLM KV-cache
|
||||||
|
CPU offload when VRAM is tight. Writes resolved settings to .env so docker
|
||||||
|
compose picks them up automatically.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/preflight.py # full report + write .env
|
||||||
|
python scripts/preflight.py --check-only # report only, no .env write
|
||||||
|
python scripts/preflight.py --service streamlit # print resolved port, exit
|
||||||
|
python scripts/preflight.py --quiet # machine-readable, exit 0/1
|
||||||
|
|
||||||
|
Exit codes:
|
||||||
|
0 — all checks passed (or issues auto-resolved)
|
||||||
|
1 — manual action required (unresolvable port conflict on external service)
|
||||||
|
"""
|
||||||
|
import argparse
|
||||||
|
import platform
|
||||||
|
import socket
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
ROOT = Path(__file__).parent.parent
|
||||||
|
USER_YAML = ROOT / "config" / "user.yaml"
|
||||||
|
ENV_FILE = ROOT / ".env"
|
||||||
|
|
||||||
|
# ── Port table ────────────────────────────────────────────────────────────────
|
||||||
|
# (yaml_key, default, env_var, peregrine_owns_it)
|
||||||
|
_PORTS: dict[str, tuple[str, int, str, bool]] = {
|
||||||
|
"streamlit": ("streamlit_port", 8501, "STREAMLIT_PORT", True),
|
||||||
|
"searxng": ("searxng_port", 8888, "SEARXNG_PORT", True),
|
||||||
|
"vllm": ("vllm_port", 8000, "VLLM_PORT", True),
|
||||||
|
"vision": ("vision_port", 8002, "VISION_PORT", True),
|
||||||
|
"ollama": ("ollama_port", 11434, "OLLAMA_PORT", False),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── System probes (stdlib only — no psutil) ───────────────────────────────────
|
||||||
|
|
||||||
|
def _sh(*cmd: str, timeout: int = 5) -> str:
|
||||||
|
try:
|
||||||
|
r = subprocess.run(list(cmd), capture_output=True, text=True, timeout=timeout)
|
||||||
|
return r.stdout.strip() if r.returncode == 0 else ""
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_ram_gb() -> tuple[float, float]:
|
||||||
|
"""Return (total_gb, available_gb). Returns (0, 0) if undetectable."""
|
||||||
|
os_name = platform.system()
|
||||||
|
if os_name == "Linux":
|
||||||
|
try:
|
||||||
|
meminfo = Path("/proc/meminfo").read_text()
|
||||||
|
except OSError:
|
||||||
|
return 0.0, 0.0
|
||||||
|
total = available = 0
|
||||||
|
for line in meminfo.splitlines():
|
||||||
|
if line.startswith("MemTotal:"):
|
||||||
|
total = int(line.split()[1])
|
||||||
|
elif line.startswith("MemAvailable:"):
|
||||||
|
available = int(line.split()[1])
|
||||||
|
return total / 1024 / 1024, available / 1024 / 1024
|
||||||
|
elif os_name == "Darwin":
|
||||||
|
total_bytes = _sh("sysctl", "-n", "hw.memsize")
|
||||||
|
total = int(total_bytes) / 1024 ** 3 if total_bytes.isdigit() else 0.0
|
||||||
|
vm = _sh("vm_stat")
|
||||||
|
free_pages = 0
|
||||||
|
for line in vm.splitlines():
|
||||||
|
if "Pages free" in line or "Pages speculative" in line:
|
||||||
|
try:
|
||||||
|
free_pages += int(line.split()[-1].rstrip("."))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
available = free_pages * 4096 / 1024 ** 3
|
||||||
|
return total, available
|
||||||
|
return 0.0, 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def get_cpu_cores() -> int:
|
||||||
|
import os
|
||||||
|
return os.cpu_count() or 1
|
||||||
|
|
||||||
|
|
||||||
|
def get_gpus() -> list[dict]:
|
||||||
|
"""Return list of {name, vram_total_gb, vram_free_gb} via nvidia-smi."""
|
||||||
|
out = _sh(
|
||||||
|
"nvidia-smi",
|
||||||
|
"--query-gpu=name,memory.total,memory.free",
|
||||||
|
"--format=csv,noheader,nounits",
|
||||||
|
)
|
||||||
|
if not out:
|
||||||
|
return []
|
||||||
|
gpus = []
|
||||||
|
for line in out.splitlines():
|
||||||
|
parts = [p.strip() for p in line.split(",")]
|
||||||
|
if len(parts) == 3:
|
||||||
|
try:
|
||||||
|
gpus.append({
|
||||||
|
"name": parts[0],
|
||||||
|
"vram_total_gb": round(int(parts[1]) / 1024, 1),
|
||||||
|
"vram_free_gb": round(int(parts[2]) / 1024, 1),
|
||||||
|
})
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return gpus
|
||||||
|
|
||||||
|
|
||||||
|
# ── Port probes ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _load_svc() -> dict:
|
||||||
|
if USER_YAML.exists():
|
||||||
|
return (yaml.safe_load(USER_YAML.read_text()) or {}).get("services", {})
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def is_port_free(port: int) -> bool:
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
s.settimeout(0.3)
|
||||||
|
return s.connect_ex(("127.0.0.1", port)) != 0
|
||||||
|
|
||||||
|
|
||||||
|
def find_free_port(start: int, limit: int = 30) -> int:
|
||||||
|
for p in range(start, start + limit):
|
||||||
|
if is_port_free(p):
|
||||||
|
return p
|
||||||
|
raise RuntimeError(f"No free port found in range {start}–{start + limit - 1}")
|
||||||
|
|
||||||
|
|
||||||
|
def check_ports(svc: dict) -> dict[str, dict]:
|
||||||
|
results = {}
|
||||||
|
for name, (yaml_key, default, env_var, owned) in _PORTS.items():
|
||||||
|
configured = int(svc.get(yaml_key, default))
|
||||||
|
free = is_port_free(configured)
|
||||||
|
resolved = configured if (free or not owned) else find_free_port(configured + 1)
|
||||||
|
results[name] = {
|
||||||
|
"configured": configured,
|
||||||
|
"resolved": resolved,
|
||||||
|
"changed": resolved != configured,
|
||||||
|
"owned": owned,
|
||||||
|
"free": free,
|
||||||
|
"env_var": env_var,
|
||||||
|
}
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# ── Recommendations ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def recommend_profile(gpus: list[dict], ram_total_gb: float) -> str:
|
||||||
|
if len(gpus) >= 2:
|
||||||
|
return "dual-gpu"
|
||||||
|
if len(gpus) == 1:
|
||||||
|
return "single-gpu"
|
||||||
|
if ram_total_gb >= 8:
|
||||||
|
return "cpu"
|
||||||
|
return "remote"
|
||||||
|
|
||||||
|
|
||||||
|
def calc_cpu_offload_gb(gpus: list[dict], ram_available_gb: float) -> int:
|
||||||
|
"""
|
||||||
|
Suggest GBs of KV cache to offload from GPU VRAM → system RAM.
|
||||||
|
|
||||||
|
Enabled when VRAM is tight (< 10 GB free on any GPU) and there is
|
||||||
|
enough RAM headroom (> 4 GB available). Uses at most 25% of the
|
||||||
|
RAM headroom above 4 GB, capped at 8 GB.
|
||||||
|
"""
|
||||||
|
if not gpus or ram_available_gb < 4:
|
||||||
|
return 0
|
||||||
|
min_vram_free = min(g["vram_free_gb"] for g in gpus)
|
||||||
|
if min_vram_free >= 10:
|
||||||
|
return 0
|
||||||
|
headroom = ram_available_gb - 4.0 # reserve 4 GB for OS
|
||||||
|
return min(int(headroom * 0.25), 8)
|
||||||
|
|
||||||
|
|
||||||
|
# ── .env writer ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def write_env(updates: dict[str, str]) -> None:
|
||||||
|
existing: dict[str, str] = {}
|
||||||
|
if ENV_FILE.exists():
|
||||||
|
for line in ENV_FILE.read_text().splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if "=" in line and not line.startswith("#"):
|
||||||
|
k, _, v = line.partition("=")
|
||||||
|
existing[k.strip()] = v.strip()
|
||||||
|
existing.update(updates)
|
||||||
|
ENV_FILE.write_text(
|
||||||
|
"\n".join(f"{k}={v}" for k, v in sorted(existing.items())) + "\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(description="Peregrine preflight check")
|
||||||
|
parser.add_argument("--check-only", action="store_true",
|
||||||
|
help="Print report; don't write .env")
|
||||||
|
parser.add_argument("--quiet", action="store_true",
|
||||||
|
help="Suppress output; rely on exit code")
|
||||||
|
parser.add_argument("--service", metavar="NAME",
|
||||||
|
help="Print resolved port for one service and exit (e.g. streamlit)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
svc = _load_svc()
|
||||||
|
ports = check_ports(svc)
|
||||||
|
|
||||||
|
# Single-service mode — used by manage-ui.sh
|
||||||
|
if args.service:
|
||||||
|
info = ports.get(args.service.lower())
|
||||||
|
print(info["resolved"] if info else _PORTS[args.service.lower()][1])
|
||||||
|
return
|
||||||
|
|
||||||
|
ram_total, ram_avail = get_ram_gb()
|
||||||
|
cpu_cores = get_cpu_cores()
|
||||||
|
gpus = get_gpus()
|
||||||
|
profile = recommend_profile(gpus, ram_total)
|
||||||
|
offload_gb = calc_cpu_offload_gb(gpus, ram_avail)
|
||||||
|
|
||||||
|
if not args.quiet:
|
||||||
|
reassigned = [n for n, i in ports.items() if i["changed"]]
|
||||||
|
unresolved = [n for n, i in ports.items() if not i["free"] and not i["changed"]]
|
||||||
|
|
||||||
|
print("╔══ Peregrine Preflight ══════════════════════════════╗")
|
||||||
|
print("║")
|
||||||
|
print("║ Ports")
|
||||||
|
for name, info in ports.items():
|
||||||
|
tag = "owned " if info["owned"] else "extern"
|
||||||
|
if not info["owned"]:
|
||||||
|
# external: in-use means the service is reachable
|
||||||
|
status = "✓ reachable" if not info["free"] else "⚠ not responding"
|
||||||
|
elif info["free"]:
|
||||||
|
status = "✓ free"
|
||||||
|
elif info["changed"]:
|
||||||
|
status = f"→ reassigned to :{info['resolved']}"
|
||||||
|
else:
|
||||||
|
status = "⚠ in use"
|
||||||
|
print(f"║ {name:<10} :{info['configured']} [{tag}] {status}")
|
||||||
|
|
||||||
|
print("║")
|
||||||
|
print("║ Resources")
|
||||||
|
print(f"║ CPU {cpu_cores} core{'s' if cpu_cores != 1 else ''}")
|
||||||
|
if ram_total:
|
||||||
|
print(f"║ RAM {ram_total:.0f} GB total / {ram_avail:.1f} GB available")
|
||||||
|
else:
|
||||||
|
print("║ RAM (undetectable)")
|
||||||
|
if gpus:
|
||||||
|
for i, g in enumerate(gpus):
|
||||||
|
print(f"║ GPU {i} {g['name']} — "
|
||||||
|
f"{g['vram_free_gb']:.1f} / {g['vram_total_gb']:.0f} GB VRAM free")
|
||||||
|
else:
|
||||||
|
print("║ GPU none detected")
|
||||||
|
|
||||||
|
print("║")
|
||||||
|
print("║ Recommendations")
|
||||||
|
print(f"║ Docker profile {profile}")
|
||||||
|
if offload_gb > 0:
|
||||||
|
print(f"║ vLLM KV offload {offload_gb} GB → RAM (CPU_OFFLOAD_GB={offload_gb})")
|
||||||
|
else:
|
||||||
|
print("║ vLLM KV offload not needed")
|
||||||
|
|
||||||
|
if reassigned:
|
||||||
|
print("║")
|
||||||
|
print("║ Port reassignments written to .env:")
|
||||||
|
for name in reassigned:
|
||||||
|
info = ports[name]
|
||||||
|
print(f"║ {info['env_var']}={info['resolved']} (was :{info['configured']})")
|
||||||
|
|
||||||
|
# External services: in-use = ✓ running; free = warn (may be down)
|
||||||
|
ext_down = [n for n, i in ports.items() if not i["owned"] and i["free"]]
|
||||||
|
if ext_down:
|
||||||
|
print("║")
|
||||||
|
print("║ ⚠ External services not detected on configured port:")
|
||||||
|
for name in ext_down:
|
||||||
|
info = ports[name]
|
||||||
|
svc_key = _PORTS[name][0]
|
||||||
|
print(f"║ {name} :{info['configured']} — nothing listening "
|
||||||
|
f"(start the service or update services.{svc_key} in user.yaml)")
|
||||||
|
|
||||||
|
print("╚════════════════════════════════════════════════════╝")
|
||||||
|
|
||||||
|
if not args.check_only:
|
||||||
|
env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()}
|
||||||
|
env_updates["RECOMMENDED_PROFILE"] = profile
|
||||||
|
if offload_gb > 0:
|
||||||
|
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
||||||
|
write_env(env_updates)
|
||||||
|
if not args.quiet:
|
||||||
|
print(f" wrote {ENV_FILE.relative_to(ROOT)}")
|
||||||
|
|
||||||
|
# Fail only when an owned port can't be resolved (shouldn't happen in practice)
|
||||||
|
owned_stuck = [n for n, i in ports.items() if i["owned"] and not i["free"] and not i["changed"]]
|
||||||
|
sys.exit(1 if owned_stuck else 0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
Reference in a new issue