diff --git a/circuitforge_core/text/app.py b/circuitforge_core/text/app.py index b2759cb..fc84f35 100644 --- a/circuitforge_core/text/app.py +++ b/circuitforge_core/text/app.py @@ -16,6 +16,12 @@ Usage: --port 8006 \ --gpu-id 0 +Multi-GPU (spans two GPUs via CUDA_VISIBLE_DEVICES, device_map=auto): + python -m circuitforge_core.text.app \ + --model /Library/Assets/LLM/deepseek-14b \ + --port 8006 \ + --gpu-ids 0,1 + Mock mode (no model or GPU required): CF_TEXT_MOCK=1 python -m circuitforge_core.text.app --port 8006 """ @@ -111,9 +117,17 @@ class OAIChatResponse(BaseModel): def create_app( model_path: str, gpu_id: int = 0, + gpu_ids: str | None = None, backend: str | None = None, mock: bool = False, ) -> FastAPI: + """Start the cf-text FastAPI app. + + ``gpu_ids``: comma-separated CUDA device indices for multi-GPU spanning + (e.g. "0,1"). When set, overrides ``gpu_id`` and sets + ``CUDA_VISIBLE_DEVICES`` to the full list so HuggingFace Accelerate's + ``device_map="auto"`` can shard the model across all listed devices. + """ global _backend if not mock and not model_path: @@ -122,7 +136,8 @@ def create_app( "Pass a GGUF path, a HuggingFace model ID, or set CF_TEXT_MOCK=1 for mock mode." ) - os.environ.setdefault("CUDA_VISIBLE_DEVICES", str(gpu_id)) + visible = gpu_ids if gpu_ids else str(gpu_id) + os.environ.setdefault("CUDA_VISIBLE_DEVICES", visible) _backend = make_text_backend(model_path, backend=backend, mock=mock) logger.info("cf-text ready: model=%r vram=%dMB", _backend.model_name, _backend.vram_mb) @@ -211,7 +226,10 @@ def _parse_args() -> argparse.Namespace: parser.add_argument("--port", type=int, default=8006) parser.add_argument("--host", default="0.0.0.0") parser.add_argument("--gpu-id", type=int, default=0, - help="CUDA device index to use") + help="CUDA device index to use (single GPU)") + parser.add_argument("--gpu-ids", default=None, + help="Comma-separated CUDA device indices for multi-GPU spanning " + "(e.g. '0,1'). Overrides --gpu-id when set.") parser.add_argument("--backend", choices=["llamacpp", "transformers"], default=None) parser.add_argument("--mock", action="store_true", help="Run in mock mode (no model or GPU needed)") @@ -226,6 +244,7 @@ if __name__ == "__main__": app = create_app( model_path=args.model, gpu_id=args.gpu_id, + gpu_ids=args.gpu_ids, backend=args.backend, mock=mock, ) diff --git a/circuitforge_core/text/backends/base.py b/circuitforge_core/text/backends/base.py index 8233165..4982778 100644 --- a/circuitforge_core/text/backends/base.py +++ b/circuitforge_core/text/backends/base.py @@ -121,17 +121,19 @@ class TextBackend(Protocol): def _select_backend(model_path: str, backend: str | None) -> str: """ - Return "llamacpp" or "transformers" for the given model path. + Return "llamacpp", "transformers", "ollama", or "vllm" for the given model path. Parameters ---------- - model_path Path to the model file or HuggingFace repo ID (e.g. "Qwen/Qwen2.5-3B"). - backend Explicit override from the caller ("llamacpp" | "transformers" | None). + model_path Path to the model file, HuggingFace repo ID, "ollama://", + or "vllm://". + backend Explicit override from the caller + ("llamacpp" | "transformers" | "ollama" | "vllm" | None). When provided, trust it without inspection. - Return "llamacpp" or "transformers". Raise ValueError for unrecognised values. + Raise ValueError for unrecognised override values. """ - _VALID = ("llamacpp", "transformers") + _VALID = ("llamacpp", "transformers", "ollama", "vllm") # 1. Caller-supplied override — highest trust, no inspection needed. resolved = backend or os.environ.get("CF_TEXT_BACKEND") @@ -142,11 +144,17 @@ def _select_backend(model_path: str, backend: str | None) -> str: ) return resolved - # 2. Format detection — GGUF files are unambiguously llama-cpp territory. + # 2. Proxy prefixes — unambiguous routing regardless of model name format. + if model_path.startswith("ollama://"): + return "ollama" + if model_path.startswith("vllm://"): + return "vllm" + + # 3. Format detection — GGUF files are unambiguously llama-cpp territory. if model_path.lower().endswith(".gguf"): return "llamacpp" - # 3. Safe default — transformers covers HF repo IDs and safetensors dirs. + # 4. Safe default — transformers covers HF repo IDs and safetensors dirs. return "transformers" @@ -179,4 +187,12 @@ def make_text_backend( from circuitforge_core.text.backends.transformers import TransformersBackend return TransformersBackend(model_path=model_path) - raise ValueError(f"Unknown backend {resolved!r}. Expected 'llamacpp' or 'transformers'.") + if resolved == "ollama": + from circuitforge_core.text.backends.ollama import OllamaBackend + return OllamaBackend(model_path=model_path) + + if resolved == "vllm": + from circuitforge_core.text.backends.vllm import VllmBackend + return VllmBackend(model_path=model_path) + + raise ValueError(f"Unknown backend {resolved!r}. Expected 'llamacpp', 'transformers', 'ollama', or 'vllm'.")