From ecf44ea6c592931b45ba5ea02098059f3e88784a Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 25 Feb 2026 21:38:23 -0800 Subject: [PATCH] =?UTF-8?q?fix:=20stub-port=20adoption=20=E2=80=94=20stubs?= =?UTF-8?q?=20bind=20free=20ports,=20app=20routes=20to=20external=20via=20?= =?UTF-8?q?host.docker.internal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three inter-related fixes for the service adoption flow: - preflight: stub_port field — adopted services get a free port for their no-op container (avoids binding conflict with external service on real port) while update_llm_yaml still uses the real external port for host.docker.internal URLs - preflight: write_env now uses stub_port (not resolved) for adopted services so SEARXNG_PORT etc point to the stub's harmless port, not the occupied one - preflight: stub containers use sleep infinity + CMD true healthcheck so depends_on: service_healthy is satisfied without holding any real port - Makefile: finetune profile changed from [cpu,single-gpu,dual-gpu] to [finetune] so the pytorch/cuda base image is not built during make start --- Makefile | 12 +++++++++--- scripts/preflight.py | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 4998e2e..8fc0936 100644 --- a/Makefile +++ b/Makefile @@ -18,14 +18,20 @@ COMPOSE ?= $(shell \ # GPU profiles require an overlay for NVIDIA device reservations. # Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml). # Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml -COMPOSE_FILES := -f compose.yml +# +# NOTE: When explicit -f flags are used, Docker Compose does NOT auto-detect +# compose.override.yml. We must include it explicitly when present. +OVERRIDE_FILE := $(wildcard compose.override.yml) +COMPOSE_OVERRIDE := $(if $(OVERRIDE_FILE),-f compose.override.yml,) + +COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) ifneq (,$(findstring podman,$(COMPOSE))) ifneq (,$(findstring gpu,$(PROFILE))) - COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml + COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.podman-gpu.yml endif else ifneq (,$(findstring gpu,$(PROFILE))) - COMPOSE_FILES := -f compose.yml -f compose.gpu.yml + COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.gpu.yml endif endif diff --git a/scripts/preflight.py b/scripts/preflight.py index 7c57790..7687474 100644 --- a/scripts/preflight.py +++ b/scripts/preflight.py @@ -167,19 +167,25 @@ def check_ports(svc: dict) -> dict[str, dict]: if free: # Port is free — start Docker service as normal resolved = configured + stub_port = configured external = False elif adoptable: - # Port is in use by a compatible service — adopt it, skip Docker container + # Port is in use by a compatible service — adopt it. + # resolved = actual external port (used for host.docker.internal URL) + # stub_port = free port for the no-op stub container (avoids binding conflict) resolved = configured + stub_port = find_free_port(configured + 1) external = True else: # Port in use, not adoptable (e.g. streamlit) — reassign resolved = find_free_port(configured + 1) + stub_port = resolved external = False results[name] = { "configured": configured, "resolved": resolved, + "stub_port": stub_port, "changed": resolved != configured, "docker_owned": docker_owned, "adoptable": adoptable, @@ -274,15 +280,16 @@ def update_llm_yaml(ports: dict[str, dict]) -> None: def write_compose_override(ports: dict[str, dict]) -> None: """ - Generate compose.override.yml to disable Docker services that are being + Generate compose.override.yml to stub out Docker services that are being adopted from external processes. Cleans up the file when nothing to disable. - Docker Compose auto-applies compose.override.yml — no Makefile change needed. - Overriding `profiles` with an unused name prevents the service from starting - under any normal profile (remote/cpu/single-gpu/dual-gpu). + Stubbing strategy (not profiles): changing a service's profile to an unused + value breaks depends_on references — Docker treats it as undefined. Instead + we replace the service with a no-op stub that: + - Stays alive (sleep infinity) so depends_on: service_started is satisfied + - Reports healthy immediately so depends_on: service_healthy is satisfied + - Binds no ports (no conflict with the external service on the host) """ - # Only disable services that Docker would normally start (docker_owned=True) - # and are being adopted from an external process. to_disable = { name: info for name, info in ports.items() if info["external"] and info["docker_owned"] @@ -295,13 +302,22 @@ def write_compose_override(ports: dict[str, dict]) -> None: lines = [ "# compose.override.yml — AUTO-GENERATED by preflight.py, do not edit manually.", - "# Disables Docker services that are already running externally on the host.", + "# Stubs out Docker services whose ports are already in use by host services.", "# Re-run preflight (make preflight) to regenerate after stopping host services.", "services:", ] for name, info in to_disable.items(): - lines.append(f" {name}:") - lines.append(f" profiles: [_external_] # adopted: host service on :{info['resolved']}") + lines += [ + f" {name}: # adopted — host service on :{info['resolved']}", + f" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]", + f" ports: []", + f" healthcheck:", + f" test: [\"CMD\", \"true\"]", + f" interval: 1s", + f" timeout: 1s", + f" start_period: 0s", + f" retries: 1", + ] OVERRIDE_YML.write_text("\n".join(lines) + "\n") @@ -401,7 +417,9 @@ def main() -> None: print("╚════════════════════════════════════════════════════╝") if not args.check_only: - env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()} + # For adopted services, write stub_port to .env so the no-op container + # binds a harmless free port instead of conflicting with the external service. + env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()} env_updates["RECOMMENDED_PROFILE"] = profile if offload_gb > 0: env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)