fix: stub-port adoption — stubs bind free ports, app routes to external via host.docker.internal
Three inter-related fixes for the service adoption flow: - preflight: stub_port field — adopted services get a free port for their no-op container (avoids binding conflict with external service on real port) while update_llm_yaml still uses the real external port for host.docker.internal URLs - preflight: write_env now uses stub_port (not resolved) for adopted services so SEARXNG_PORT etc point to the stub's harmless port, not the occupied one - preflight: stub containers use sleep infinity + CMD true healthcheck so depends_on: service_healthy is satisfied without holding any real port - Makefile: finetune profile changed from [cpu,single-gpu,dual-gpu] to [finetune] so the pytorch/cuda base image is not built during make start
This commit is contained in:
parent
7c62935371
commit
1d228b293b
2 changed files with 38 additions and 14 deletions
12
Makefile
12
Makefile
|
|
@ -18,14 +18,20 @@ COMPOSE ?= $(shell \
|
|||
# GPU profiles require an overlay for NVIDIA device reservations.
|
||||
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
|
||||
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||
COMPOSE_FILES := -f compose.yml
|
||||
#
|
||||
# NOTE: When explicit -f flags are used, Docker Compose does NOT auto-detect
|
||||
# compose.override.yml. We must include it explicitly when present.
|
||||
OVERRIDE_FILE := $(wildcard compose.override.yml)
|
||||
COMPOSE_OVERRIDE := $(if $(OVERRIDE_FILE),-f compose.override.yml,)
|
||||
|
||||
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE)
|
||||
ifneq (,$(findstring podman,$(COMPOSE)))
|
||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
|
||||
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.podman-gpu.yml
|
||||
endif
|
||||
else
|
||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
|
||||
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.gpu.yml
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
|
|||
|
|
@ -167,19 +167,25 @@ def check_ports(svc: dict) -> dict[str, dict]:
|
|||
if free:
|
||||
# Port is free — start Docker service as normal
|
||||
resolved = configured
|
||||
stub_port = configured
|
||||
external = False
|
||||
elif adoptable:
|
||||
# Port is in use by a compatible service — adopt it, skip Docker container
|
||||
# Port is in use by a compatible service — adopt it.
|
||||
# resolved = actual external port (used for host.docker.internal URL)
|
||||
# stub_port = free port for the no-op stub container (avoids binding conflict)
|
||||
resolved = configured
|
||||
stub_port = find_free_port(configured + 1)
|
||||
external = True
|
||||
else:
|
||||
# Port in use, not adoptable (e.g. streamlit) — reassign
|
||||
resolved = find_free_port(configured + 1)
|
||||
stub_port = resolved
|
||||
external = False
|
||||
|
||||
results[name] = {
|
||||
"configured": configured,
|
||||
"resolved": resolved,
|
||||
"stub_port": stub_port,
|
||||
"changed": resolved != configured,
|
||||
"docker_owned": docker_owned,
|
||||
"adoptable": adoptable,
|
||||
|
|
@ -274,15 +280,16 @@ def update_llm_yaml(ports: dict[str, dict]) -> None:
|
|||
|
||||
def write_compose_override(ports: dict[str, dict]) -> None:
|
||||
"""
|
||||
Generate compose.override.yml to disable Docker services that are being
|
||||
Generate compose.override.yml to stub out Docker services that are being
|
||||
adopted from external processes. Cleans up the file when nothing to disable.
|
||||
|
||||
Docker Compose auto-applies compose.override.yml — no Makefile change needed.
|
||||
Overriding `profiles` with an unused name prevents the service from starting
|
||||
under any normal profile (remote/cpu/single-gpu/dual-gpu).
|
||||
Stubbing strategy (not profiles): changing a service's profile to an unused
|
||||
value breaks depends_on references — Docker treats it as undefined. Instead
|
||||
we replace the service with a no-op stub that:
|
||||
- Stays alive (sleep infinity) so depends_on: service_started is satisfied
|
||||
- Reports healthy immediately so depends_on: service_healthy is satisfied
|
||||
- Binds no ports (no conflict with the external service on the host)
|
||||
"""
|
||||
# Only disable services that Docker would normally start (docker_owned=True)
|
||||
# and are being adopted from an external process.
|
||||
to_disable = {
|
||||
name: info for name, info in ports.items()
|
||||
if info["external"] and info["docker_owned"]
|
||||
|
|
@ -295,13 +302,22 @@ def write_compose_override(ports: dict[str, dict]) -> None:
|
|||
|
||||
lines = [
|
||||
"# compose.override.yml — AUTO-GENERATED by preflight.py, do not edit manually.",
|
||||
"# Disables Docker services that are already running externally on the host.",
|
||||
"# Stubs out Docker services whose ports are already in use by host services.",
|
||||
"# Re-run preflight (make preflight) to regenerate after stopping host services.",
|
||||
"services:",
|
||||
]
|
||||
for name, info in to_disable.items():
|
||||
lines.append(f" {name}:")
|
||||
lines.append(f" profiles: [_external_] # adopted: host service on :{info['resolved']}")
|
||||
lines += [
|
||||
f" {name}: # adopted — host service on :{info['resolved']}",
|
||||
f" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]",
|
||||
f" ports: []",
|
||||
f" healthcheck:",
|
||||
f" test: [\"CMD\", \"true\"]",
|
||||
f" interval: 1s",
|
||||
f" timeout: 1s",
|
||||
f" start_period: 0s",
|
||||
f" retries: 1",
|
||||
]
|
||||
|
||||
OVERRIDE_YML.write_text("\n".join(lines) + "\n")
|
||||
|
||||
|
|
@ -401,7 +417,9 @@ def main() -> None:
|
|||
print("╚════════════════════════════════════════════════════╝")
|
||||
|
||||
if not args.check_only:
|
||||
env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()}
|
||||
# For adopted services, write stub_port to .env so the no-op container
|
||||
# binds a harmless free port instead of conflicting with the external service.
|
||||
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||
env_updates["RECOMMENDED_PROFILE"] = profile
|
||||
if offload_gb > 0:
|
||||
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
||||
|
|
|
|||
Loading…
Reference in a new issue