fix: stub-port adoption — stubs bind free ports, app routes to external via host.docker.internal
Three inter-related fixes for the service adoption flow: - preflight: stub_port field — adopted services get a free port for their no-op container (avoids binding conflict with external service on real port) while update_llm_yaml still uses the real external port for host.docker.internal URLs - preflight: write_env now uses stub_port (not resolved) for adopted services so SEARXNG_PORT etc point to the stub's harmless port, not the occupied one - preflight: stub containers use sleep infinity + CMD true healthcheck so depends_on: service_healthy is satisfied without holding any real port - Makefile: finetune profile changed from [cpu,single-gpu,dual-gpu] to [finetune] so the pytorch/cuda base image is not built during make start
This commit is contained in:
parent
7f8dc18a92
commit
ecf44ea6c5
2 changed files with 38 additions and 14 deletions
12
Makefile
12
Makefile
|
|
@ -18,14 +18,20 @@ COMPOSE ?= $(shell \
|
||||||
# GPU profiles require an overlay for NVIDIA device reservations.
|
# GPU profiles require an overlay for NVIDIA device reservations.
|
||||||
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
|
# Docker uses deploy.resources (compose.gpu.yml); Podman uses CDI device specs (compose.podman-gpu.yml).
|
||||||
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
# Generate CDI spec for Podman first: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
||||||
COMPOSE_FILES := -f compose.yml
|
#
|
||||||
|
# NOTE: When explicit -f flags are used, Docker Compose does NOT auto-detect
|
||||||
|
# compose.override.yml. We must include it explicitly when present.
|
||||||
|
OVERRIDE_FILE := $(wildcard compose.override.yml)
|
||||||
|
COMPOSE_OVERRIDE := $(if $(OVERRIDE_FILE),-f compose.override.yml,)
|
||||||
|
|
||||||
|
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE)
|
||||||
ifneq (,$(findstring podman,$(COMPOSE)))
|
ifneq (,$(findstring podman,$(COMPOSE)))
|
||||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||||
COMPOSE_FILES := -f compose.yml -f compose.podman-gpu.yml
|
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.podman-gpu.yml
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
ifneq (,$(findstring gpu,$(PROFILE)))
|
ifneq (,$(findstring gpu,$(PROFILE)))
|
||||||
COMPOSE_FILES := -f compose.yml -f compose.gpu.yml
|
COMPOSE_FILES := -f compose.yml $(COMPOSE_OVERRIDE) -f compose.gpu.yml
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -167,19 +167,25 @@ def check_ports(svc: dict) -> dict[str, dict]:
|
||||||
if free:
|
if free:
|
||||||
# Port is free — start Docker service as normal
|
# Port is free — start Docker service as normal
|
||||||
resolved = configured
|
resolved = configured
|
||||||
|
stub_port = configured
|
||||||
external = False
|
external = False
|
||||||
elif adoptable:
|
elif adoptable:
|
||||||
# Port is in use by a compatible service — adopt it, skip Docker container
|
# Port is in use by a compatible service — adopt it.
|
||||||
|
# resolved = actual external port (used for host.docker.internal URL)
|
||||||
|
# stub_port = free port for the no-op stub container (avoids binding conflict)
|
||||||
resolved = configured
|
resolved = configured
|
||||||
|
stub_port = find_free_port(configured + 1)
|
||||||
external = True
|
external = True
|
||||||
else:
|
else:
|
||||||
# Port in use, not adoptable (e.g. streamlit) — reassign
|
# Port in use, not adoptable (e.g. streamlit) — reassign
|
||||||
resolved = find_free_port(configured + 1)
|
resolved = find_free_port(configured + 1)
|
||||||
|
stub_port = resolved
|
||||||
external = False
|
external = False
|
||||||
|
|
||||||
results[name] = {
|
results[name] = {
|
||||||
"configured": configured,
|
"configured": configured,
|
||||||
"resolved": resolved,
|
"resolved": resolved,
|
||||||
|
"stub_port": stub_port,
|
||||||
"changed": resolved != configured,
|
"changed": resolved != configured,
|
||||||
"docker_owned": docker_owned,
|
"docker_owned": docker_owned,
|
||||||
"adoptable": adoptable,
|
"adoptable": adoptable,
|
||||||
|
|
@ -274,15 +280,16 @@ def update_llm_yaml(ports: dict[str, dict]) -> None:
|
||||||
|
|
||||||
def write_compose_override(ports: dict[str, dict]) -> None:
|
def write_compose_override(ports: dict[str, dict]) -> None:
|
||||||
"""
|
"""
|
||||||
Generate compose.override.yml to disable Docker services that are being
|
Generate compose.override.yml to stub out Docker services that are being
|
||||||
adopted from external processes. Cleans up the file when nothing to disable.
|
adopted from external processes. Cleans up the file when nothing to disable.
|
||||||
|
|
||||||
Docker Compose auto-applies compose.override.yml — no Makefile change needed.
|
Stubbing strategy (not profiles): changing a service's profile to an unused
|
||||||
Overriding `profiles` with an unused name prevents the service from starting
|
value breaks depends_on references — Docker treats it as undefined. Instead
|
||||||
under any normal profile (remote/cpu/single-gpu/dual-gpu).
|
we replace the service with a no-op stub that:
|
||||||
|
- Stays alive (sleep infinity) so depends_on: service_started is satisfied
|
||||||
|
- Reports healthy immediately so depends_on: service_healthy is satisfied
|
||||||
|
- Binds no ports (no conflict with the external service on the host)
|
||||||
"""
|
"""
|
||||||
# Only disable services that Docker would normally start (docker_owned=True)
|
|
||||||
# and are being adopted from an external process.
|
|
||||||
to_disable = {
|
to_disable = {
|
||||||
name: info for name, info in ports.items()
|
name: info for name, info in ports.items()
|
||||||
if info["external"] and info["docker_owned"]
|
if info["external"] and info["docker_owned"]
|
||||||
|
|
@ -295,13 +302,22 @@ def write_compose_override(ports: dict[str, dict]) -> None:
|
||||||
|
|
||||||
lines = [
|
lines = [
|
||||||
"# compose.override.yml — AUTO-GENERATED by preflight.py, do not edit manually.",
|
"# compose.override.yml — AUTO-GENERATED by preflight.py, do not edit manually.",
|
||||||
"# Disables Docker services that are already running externally on the host.",
|
"# Stubs out Docker services whose ports are already in use by host services.",
|
||||||
"# Re-run preflight (make preflight) to regenerate after stopping host services.",
|
"# Re-run preflight (make preflight) to regenerate after stopping host services.",
|
||||||
"services:",
|
"services:",
|
||||||
]
|
]
|
||||||
for name, info in to_disable.items():
|
for name, info in to_disable.items():
|
||||||
lines.append(f" {name}:")
|
lines += [
|
||||||
lines.append(f" profiles: [_external_] # adopted: host service on :{info['resolved']}")
|
f" {name}: # adopted — host service on :{info['resolved']}",
|
||||||
|
f" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]",
|
||||||
|
f" ports: []",
|
||||||
|
f" healthcheck:",
|
||||||
|
f" test: [\"CMD\", \"true\"]",
|
||||||
|
f" interval: 1s",
|
||||||
|
f" timeout: 1s",
|
||||||
|
f" start_period: 0s",
|
||||||
|
f" retries: 1",
|
||||||
|
]
|
||||||
|
|
||||||
OVERRIDE_YML.write_text("\n".join(lines) + "\n")
|
OVERRIDE_YML.write_text("\n".join(lines) + "\n")
|
||||||
|
|
||||||
|
|
@ -401,7 +417,9 @@ def main() -> None:
|
||||||
print("╚════════════════════════════════════════════════════╝")
|
print("╚════════════════════════════════════════════════════╝")
|
||||||
|
|
||||||
if not args.check_only:
|
if not args.check_only:
|
||||||
env_updates: dict[str, str] = {i["env_var"]: str(i["resolved"]) for i in ports.values()}
|
# For adopted services, write stub_port to .env so the no-op container
|
||||||
|
# binds a harmless free port instead of conflicting with the external service.
|
||||||
|
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||||
env_updates["RECOMMENDED_PROFILE"] = profile
|
env_updates["RECOMMENDED_PROFILE"] = profile
|
||||||
if offload_gb > 0:
|
if offload_gb > 0:
|
||||||
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue