# app/services/meal_plan/llm_router.py
# BSL 1.1 — LLM feature
"""Provide a router-compatible LLM client for meal plan generation tasks.

Cloud (CF_ORCH_URL set):
  Allocates a cf-text service via cf-orch (3B-7B GGUF, ~2GB VRAM).
  Returns an _OrchTextRouter that wraps the cf-text HTTP endpoint
  with a .complete(system, user, **kwargs) interface.

Local / self-hosted (no CF_ORCH_URL):
  Returns an LLMRouter instance which tries ollama, vllm, or any
  backend configured in ~/.config/circuitforge/llm.yaml.

Both paths expose the same interface so llm_timing.py and llm_planner.py
need no knowledge of the backend.
"""
from __future__ import annotations

import logging
import os
from contextlib import nullcontext

logger = logging.getLogger(__name__)

# cf-orch service name and VRAM budget for meal plan LLM tasks.
# These are lighter than recipe_llm (4.0 GB) — cf-text handles them.
_SERVICE_TYPE = "cf-text"
_TTL_S = 120.0
_CALLER = "kiwi-meal-plan"


class _OrchTextRouter:
    """Thin adapter that makes a cf-text HTTP endpoint look like LLMRouter."""

    def __init__(self, base_url: str) -> None:
        self._base_url = base_url.rstrip("/")

    def complete(
        self,
        system: str = "",
        user: str = "",
        max_tokens: int = 512,
        temperature: float = 0.7,
        **_kwargs,
    ) -> str:
        from openai import OpenAI
        client = OpenAI(base_url=self._base_url + "/v1", api_key="any")
        messages = []
        if system:
            messages.append({"role": "system", "content": system})
        messages.append({"role": "user", "content": user})
        try:
            model = client.models.list().data[0].id
        except Exception:
            model = "local"
        resp = client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
        )
        return resp.choices[0].message.content or ""


def get_meal_plan_router():
    """Return an LLM client for meal plan tasks.

    Tries cf-orch cf-text allocation first (cloud); falls back to LLMRouter
    (local ollama/vllm). Returns None if no backend is available.
    """
    cf_orch_url = os.environ.get("CF_ORCH_URL")
    if cf_orch_url:
        try:
            from circuitforge_orch.client import CFOrchClient
            client = CFOrchClient(cf_orch_url)
            ctx = client.allocate(
                service=_SERVICE_TYPE,
                ttl_s=_TTL_S,
                caller=_CALLER,
            )
            alloc = ctx.__enter__()
            if alloc is not None:
                return _OrchTextRouter(alloc.url), ctx
        except Exception as exc:
            logger.debug("cf-orch cf-text allocation failed, falling back to LLMRouter: %s", exc)

    # Local fallback: LLMRouter (ollama / vllm / openai-compat)
    try:
        from circuitforge_core.llm.router import LLMRouter
        return LLMRouter(), nullcontext(None)
    except FileNotFoundError:
        logger.debug("LLMRouter: no llm.yaml and no LLM env vars — meal plan LLM disabled")
        return None, nullcontext(None)
    except Exception as exc:
        logger.debug("LLMRouter init failed: %s", exc)
        return None, nullcontext(None)