fix: stub-port adoption — stubs bind free ports, app routes to external via host.docker.internal

Three inter-related fixes for the service adoption flow:
- preflight: stub_port field — adopted services get a free port for their
  no-op container (avoids binding conflict with external service on real port)
  while update_llm_yaml still uses the real external port for host.docker.internal URLs
- preflight: write_env now uses stub_port (not resolved) for adopted services
  so SEARXNG_PORT etc point to the stub's harmless port, not the occupied one
- preflight: stub containers use sleep infinity + CMD true healthcheck so
  depends_on: service_healthy is satisfied without holding any real port
- Makefile: finetune profile changed from [cpu,single-gpu,dual-gpu] to [finetune]
  so the pytorch/cuda base image is not built during make start
This commit is contained in:
pyr0ball 2026-02-25 21:38:23 -08:00
parent 7f8dc18a92
commit ecf44ea6c5
2 changed files with 38 additions and 14 deletions

View file

@ -18,14 +18,20 @@ COMPOSE ?= $(shell \
# GPU profiles require an overlay for NVIDIA device reservations.
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
COMPOSE_FILES := -f compose.yml
#
# NOTE: When explicit -f flags are used, Docker Compose does NOT auto-detect
# compose.override.yml. We must include it explicitly when present.
OVERRIDE_FILE := $(wildcard compose.override.yml)
COMPOSE_OVERRIDE := $(if $(OVERRIDE_FILE),-f compose.override.yml,)
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE)
ifneq (,$(findstring podman,$(COMPOSE)))
ifneq (,$(findstring gpu,$(PROFILE)))
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.podman-gpu.yml
endif
else
ifneq (,$(findstring gpu,$(PROFILE)))
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.gpu.yml
endif
endif

View file

@ -167,19 +167,25 @@ def check_ports(svc: dict) -> dict[str, dict]:
if free:
# Port is free — start Docker service as normal
resolved = configured
stub_port = configured
external = False
elif adoptable:
# Port is in use by a compatible service — adopt it, skip Docker container
# Port is in use by a compatible service — adopt it.
# resolved = actual external port (used for host.docker.internal URL)
# stub_port = free port for the no-op stub container (avoids binding conflict)
resolved = configured
stub_port = find_free_port(configured + 1)
external = True
else:
# Port in use, not adoptable (e.g. streamlit) — reassign
resolved = find_free_port(configured + 1)
stub_port = resolved
external = False
results[name] = {
"configured": configured,
"resolved": resolved,
"stub_port": stub_port,
"changed": resolved != configured,
"docker_owned": docker_owned,
"adoptable": adoptable,
@ -274,15 +280,16 @@ def update_llm_yaml(ports: dict[str, dict]) -> None:
def write_compose_override(ports: dict[str, dict]) -> None:
"""
Generate compose.override.yml to disable Docker services that are being
Generate compose.override.yml to stub out Docker services that are being
adopted from external processes. Cleans up the file when nothing to disable.
Docker Compose auto-applies compose.override.yml no Makefile change needed.
Overriding `profiles` with an unused name prevents the service from starting
under any normal profile (remote/cpu/single-gpu/dual-gpu).
Stubbing strategy (not profiles): changing a service's profile to an unused
value breaks depends_on references Docker treats it as undefined. Instead
we replace the service with a no-op stub that:
- Stays alive (sleep infinity) so depends_on: service_started is satisfied
- Reports healthy immediately so depends_on: service_healthy is satisfied
- Binds no ports (no conflict with the external service on the host)
"""
# Only disable services that Docker would normally start (docker_owned=True)
# and are being adopted from an external process.
to_disable = {
name: info for name, info in ports.items()
if info["external"] and info["docker_owned"]
@ -295,13 +302,22 @@ def write_compose_override(ports: dict[str, dict]) -> None:
lines = [
"# compose.override.yml — AUTO-GENERATED by preflight.py, do not edit manually.",
"# Disables Docker services that are already running externally on the host.",
"# Stubs out Docker services whose ports are already in use by host services.",
"# Re-run preflight (make preflight) to regenerate after stopping host services.",
"services:",
]
for name, info in to_disable.items():
lines.append(f" {name}:")
lines.append(f" profiles: [_external_] # adopted: host service on :{info['resolved']}")
lines += [
f" {name}: # adopted — host service on :{info['resolved']}",
f" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]",
f" ports: []",
f" healthcheck:",
f" test: [\"CMD\", \"true\"]",
f" interval: 1s",
f" timeout: 1s",
f" start_period: 0s",
f" retries: 1",
]
OVERRIDE_YML.write_text("\n".join(lines) + "\n")
@ -401,7 +417,9 @@ def main() -> None:
print("╚════════════════════════════════════════════════════╝")
if not args.check_only:
env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()}
# For adopted services, write stub_port to .env so the no-op container
# binds a harmless free port instead of conflicting with the external service.
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
env_updates["RECOMMENDED_PROFILE"] = profile
if offload_gb > 0:
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)