Compare commits
No commits in common. "main" and "feature/reranker-integration" have entirely different histories.
main
...
feature/re
4 changed files with 35 additions and 992 deletions
|
|
@ -6,15 +6,46 @@
|
||||||
# Caddy injects the Directus session cookie as X-CF-Session header before forwarding.
|
# Caddy injects the Directus session cookie as X-CF-Session header before forwarding.
|
||||||
# cloud_session.py resolves user_id → per-user db_path at session init.
|
# cloud_session.py resolves user_id → per-user db_path at session init.
|
||||||
#
|
#
|
||||||
# Services: api (FastAPI :8601), web (Vue :8508), searxng (internal)
|
|
||||||
# Streamlit app service removed — Vue+FastAPI is the only frontend (peregrine#104).
|
|
||||||
#
|
|
||||||
# Usage:
|
# Usage:
|
||||||
# docker compose -f compose.cloud.yml --project-name peregrine-cloud up -d
|
# docker compose -f compose.cloud.yml --project-name peregrine-cloud up -d
|
||||||
# docker compose -f compose.cloud.yml --project-name peregrine-cloud down
|
# docker compose -f compose.cloud.yml --project-name peregrine-cloud down
|
||||||
# docker compose -f compose.cloud.yml --project-name peregrine-cloud logs api -f
|
# docker compose -f compose.cloud.yml --project-name peregrine-cloud logs app -f
|
||||||
|
|
||||||
services:
|
services:
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: peregrine/Dockerfile.cfcore
|
||||||
|
container_name: peregrine-cloud
|
||||||
|
ports:
|
||||||
|
- "8505:8501"
|
||||||
|
volumes:
|
||||||
|
- /devl/menagerie-data:/devl/menagerie-data # per-user data trees
|
||||||
|
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro # cloud-safe backends only (no claude_code/copilot/anthropic)
|
||||||
|
environment:
|
||||||
|
- CLOUD_MODE=true
|
||||||
|
- CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||||
|
- DIRECTUS_JWT_SECRET=${DIRECTUS_JWT_SECRET}
|
||||||
|
- CF_SERVER_SECRET=${CF_SERVER_SECRET}
|
||||||
|
- PLATFORM_DB_URL=${PLATFORM_DB_URL}
|
||||||
|
- HEIMDALL_URL=${HEIMDALL_URL:-http://cf-license:8000}
|
||||||
|
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
|
||||||
|
- STAGING_DB=/devl/menagerie-data/cloud-default.db # fallback only — never used
|
||||||
|
- DOCS_DIR=/tmp/cloud-docs
|
||||||
|
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
|
- PEREGRINE_CADDY_PROXY=1
|
||||||
|
- CF_ORCH_URL=http://host.docker.internal:7700
|
||||||
|
- CF_APP_NAME=peregrine
|
||||||
|
- DEMO_MODE=false
|
||||||
|
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
|
||||||
|
depends_on:
|
||||||
|
searxng:
|
||||||
|
condition: service_healthy
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
api:
|
api:
|
||||||
build:
|
build:
|
||||||
context: ..
|
context: ..
|
||||||
|
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
||||||
"""
|
|
||||||
Peregrine cloud session — thin wrapper around cf_core.cloud_session.
|
|
||||||
|
|
||||||
Sets request-scoped ContextVars with the authenticated user_id, tier, and
|
|
||||||
custom writing model so that _allocate_orch_async in llm.py can forward them
|
|
||||||
to cf-orch without any service function signature changes.
|
|
||||||
|
|
||||||
Usage — add to main.py once:
|
|
||||||
|
|
||||||
from app.cloud_session import session_middleware_dep
|
|
||||||
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
|
|
||||||
|
|
||||||
From that point, any route (and every service/llm function it calls)
|
|
||||||
has access to the current user context via llm.get_request_*() helpers.
|
|
||||||
|
|
||||||
Writing model resolution order (first match wins):
|
|
||||||
1. USER_WRITING_MODELS env var — JSON dict mapping Directus UUID → model name
|
|
||||||
e.g. USER_WRITING_MODELS={"5b99ca9f-...": "meghan-letter-writer:latest"}
|
|
||||||
Use this for Monday; no Heimdall changes required.
|
|
||||||
2. session.meta["custom_writing_model"] — returned by Heimdall resolve endpoint
|
|
||||||
once Heimdall is updated to expose user_preferences fields.
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
|
|
||||||
from fastapi import Depends, Request, Response
|
|
||||||
|
|
||||||
from circuitforge_core.cloud_session import CloudSessionFactory, CloudUser, detect_byok
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
__all__ = ["CloudUser", "get_session", "require_tier", "session_middleware_dep"]
|
|
||||||
|
|
||||||
# JSON dict mapping Directus user UUID → custom writing model name.
|
|
||||||
# Used until Heimdall's resolve endpoint exposes user_preferences.
|
|
||||||
def _load_user_writing_models() -> dict[str, str]:
|
|
||||||
raw = os.environ.get("USER_WRITING_MODELS", "").strip()
|
|
||||||
if not raw:
|
|
||||||
return {}
|
|
||||||
try:
|
|
||||||
return json.loads(raw)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
log.warning("USER_WRITING_MODELS is not valid JSON — ignoring")
|
|
||||||
return {}
|
|
||||||
|
|
||||||
_USER_WRITING_MODELS: dict[str, str] = _load_user_writing_models()
|
|
||||||
|
|
||||||
|
|
||||||
_factory = CloudSessionFactory(
|
|
||||||
product="peregrine",
|
|
||||||
byok_detector=detect_byok,
|
|
||||||
)
|
|
||||||
|
|
||||||
get_session = _factory.dependency()
|
|
||||||
require_tier = _factory.require_tier
|
|
||||||
|
|
||||||
|
|
||||||
def session_middleware_dep(request: Request, response: Response) -> None:
|
|
||||||
"""Global FastAPI dependency — resolves the session and sets request-scoped
|
|
||||||
ContextVars so llm._allocate_orch_async can forward them to cf-orch.
|
|
||||||
|
|
||||||
Sets:
|
|
||||||
- user_id: real cloud UUID, or None for local/anon sessions
|
|
||||||
- tier: the resolved tier string (free/paid/premium/ultra/local)
|
|
||||||
- writing_model: custom fine-tuned model from Heimdall meta, or None
|
|
||||||
|
|
||||||
Add as a global dependency in main.py:
|
|
||||||
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
|
|
||||||
"""
|
|
||||||
from app.llm import set_request_tier, set_request_user_id, set_request_writing_model
|
|
||||||
|
|
||||||
session = _factory.resolve(request, response)
|
|
||||||
user_id = session.user_id
|
|
||||||
|
|
||||||
# Only forward real cloud UUIDs — local/dev/anon sessions use the shared catalog
|
|
||||||
if user_id in (None, "local", "local-dev") or (user_id or "").startswith("anon-"):
|
|
||||||
user_id = None
|
|
||||||
|
|
||||||
set_request_user_id(user_id)
|
|
||||||
set_request_tier(session.tier)
|
|
||||||
# Resolution order: env-var map (Monday path) → Heimdall meta (future path)
|
|
||||||
writing_model = (
|
|
||||||
_USER_WRITING_MODELS.get(session.user_id)
|
|
||||||
or session.meta.get("custom_writing_model")
|
|
||||||
)
|
|
||||||
set_request_writing_model(writing_model)
|
|
||||||
|
|
@ -1,811 +0,0 @@
|
||||||
"""LiteLLM wrapper for multi-provider AI support."""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from contextlib import asynccontextmanager
|
|
||||||
from contextvars import ContextVar
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
import litellm
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from app.config import settings
|
|
||||||
|
|
||||||
# LLM timeout configuration (seconds) - base values
|
|
||||||
LLM_TIMEOUT_HEALTH_CHECK = 30
|
|
||||||
LLM_TIMEOUT_COMPLETION = 120
|
|
||||||
LLM_TIMEOUT_JSON = 180 # JSON completions may take longer
|
|
||||||
|
|
||||||
# LLM-004: OpenRouter JSON-capable models (explicit allowlist)
|
|
||||||
OPENROUTER_JSON_CAPABLE_MODELS = {
|
|
||||||
# Anthropic models
|
|
||||||
"anthropic/claude-3-opus",
|
|
||||||
"anthropic/claude-3-sonnet",
|
|
||||||
"anthropic/claude-3-haiku",
|
|
||||||
"anthropic/claude-3.5-sonnet",
|
|
||||||
"anthropic/claude-3.5-haiku",
|
|
||||||
"anthropic/claude-haiku-4-5-20251001",
|
|
||||||
"anthropic/claude-sonnet-4-20250514",
|
|
||||||
"anthropic/claude-opus-4-20250514",
|
|
||||||
# OpenAI models
|
|
||||||
"openai/gpt-4-turbo",
|
|
||||||
"openai/gpt-4",
|
|
||||||
"openai/gpt-4o",
|
|
||||||
"openai/gpt-4o-mini",
|
|
||||||
"openai/gpt-3.5-turbo",
|
|
||||||
"openai/gpt-5-nano-2025-08-07",
|
|
||||||
# Google models
|
|
||||||
"google/gemini-pro",
|
|
||||||
"google/gemini-1.5-pro",
|
|
||||||
"google/gemini-1.5-flash",
|
|
||||||
"google/gemini-2.0-flash",
|
|
||||||
"google/gemini-3-flash-preview",
|
|
||||||
# DeepSeek models
|
|
||||||
"deepseek/deepseek-chat",
|
|
||||||
"deepseek/deepseek-reasoner",
|
|
||||||
# Mistral models
|
|
||||||
"mistralai/mistral-large",
|
|
||||||
"mistralai/mistral-medium",
|
|
||||||
}
|
|
||||||
|
|
||||||
# JSON-010: JSON extraction safety limits
|
|
||||||
MAX_JSON_EXTRACTION_RECURSION = 10
|
|
||||||
MAX_JSON_CONTENT_SIZE = 1024 * 1024 # 1MB
|
|
||||||
|
|
||||||
# Request-scoped user_id — set once by session_middleware_dep, read inside _allocate_orch_async.
|
|
||||||
# ContextVar is safe for concurrent async requests: each request task gets its own copy.
|
|
||||||
_request_user_id: ContextVar[str | None] = ContextVar("request_user_id", default=None)
|
|
||||||
|
|
||||||
|
|
||||||
def set_request_user_id(user_id: str | None) -> None:
|
|
||||||
_request_user_id.set(user_id)
|
|
||||||
|
|
||||||
|
|
||||||
def get_request_user_id() -> str | None:
|
|
||||||
return _request_user_id.get()
|
|
||||||
|
|
||||||
|
|
||||||
class LLMConfig(BaseModel):
|
|
||||||
"""LLM configuration model."""
|
|
||||||
|
|
||||||
provider: str
|
|
||||||
model: str
|
|
||||||
api_key: str
|
|
||||||
api_base: str | None = None
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class _OrchAllocation:
|
|
||||||
allocation_id: str
|
|
||||||
url: str
|
|
||||||
service: str
|
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
|
||||||
async def _allocate_orch_async(
|
|
||||||
coordinator_url: str,
|
|
||||||
service: str,
|
|
||||||
model_candidates: list[str],
|
|
||||||
ttl_s: float,
|
|
||||||
caller: str,
|
|
||||||
):
|
|
||||||
"""Async context manager that allocates a cf-orch service and releases on exit."""
|
|
||||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
||||||
payload: dict[str, Any] = {
|
|
||||||
"model_candidates": model_candidates,
|
|
||||||
"ttl_s": ttl_s,
|
|
||||||
"caller": caller,
|
|
||||||
}
|
|
||||||
uid = get_request_user_id()
|
|
||||||
if uid:
|
|
||||||
payload["user_id"] = uid
|
|
||||||
resp = await client.post(
|
|
||||||
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocate",
|
|
||||||
json=payload,
|
|
||||||
)
|
|
||||||
if not resp.is_success:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"cf-orch allocation failed for {service!r}: "
|
|
||||||
f"HTTP {resp.status_code} — {resp.text[:200]}"
|
|
||||||
)
|
|
||||||
data = resp.json()
|
|
||||||
alloc = _OrchAllocation(
|
|
||||||
allocation_id=data["allocation_id"],
|
|
||||||
url=data["url"],
|
|
||||||
service=service,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
yield alloc
|
|
||||||
finally:
|
|
||||||
try:
|
|
||||||
await client.delete(
|
|
||||||
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocations/{alloc.allocation_id}",
|
|
||||||
timeout=10.0,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
logging.debug("cf-orch release failed (non-fatal): %s", exc)
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_api_base(provider: str, api_base: str | None) -> str | None:
|
|
||||||
"""Normalize api_base for LiteLLM provider-specific expectations.
|
|
||||||
|
|
||||||
When using proxies/aggregators, users often paste a base URL that already
|
|
||||||
includes a version segment (e.g., `/v1`). Some LiteLLM provider handlers
|
|
||||||
append those segments internally, which can lead to duplicated paths like
|
|
||||||
`/v1/v1/...` and cause 404s.
|
|
||||||
"""
|
|
||||||
if not api_base:
|
|
||||||
return None
|
|
||||||
|
|
||||||
base = api_base.strip()
|
|
||||||
if not base:
|
|
||||||
return None
|
|
||||||
|
|
||||||
base = base.rstrip("/")
|
|
||||||
|
|
||||||
# Anthropic handler appends '/v1/messages'. If base already ends with '/v1',
|
|
||||||
# strip it to avoid '/v1/v1/messages'.
|
|
||||||
if provider == "anthropic" and base.endswith("/v1"):
|
|
||||||
base = base[: -len("/v1")].rstrip("/")
|
|
||||||
|
|
||||||
# Gemini handler appends '/v1/models/...'. If base already ends with '/v1',
|
|
||||||
# strip it to avoid '/v1/v1/models/...'.
|
|
||||||
if provider == "gemini" and base.endswith("/v1"):
|
|
||||||
base = base[: -len("/v1")].rstrip("/")
|
|
||||||
|
|
||||||
return base or None
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_text_parts(value: Any, depth: int = 0, max_depth: int = 10) -> list[str]:
|
|
||||||
"""Recursively extract text segments from nested response structures.
|
|
||||||
|
|
||||||
Handles strings, lists, dicts with 'text'/'content'/'value' keys, and objects
|
|
||||||
with text/content attributes. Limits recursion depth to avoid cycles.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
value: Input value that may contain text in strings, lists, dicts, or objects.
|
|
||||||
depth: Current recursion depth.
|
|
||||||
max_depth: Maximum recursion depth before returning no content.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A list of extracted text segments.
|
|
||||||
"""
|
|
||||||
if depth >= max_depth:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if value is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if isinstance(value, str):
|
|
||||||
return [value]
|
|
||||||
|
|
||||||
if isinstance(value, list):
|
|
||||||
parts: list[str] = []
|
|
||||||
next_depth = depth + 1
|
|
||||||
for item in value:
|
|
||||||
parts.extend(_extract_text_parts(item, next_depth, max_depth))
|
|
||||||
return parts
|
|
||||||
|
|
||||||
if isinstance(value, dict):
|
|
||||||
next_depth = depth + 1
|
|
||||||
if "text" in value:
|
|
||||||
return _extract_text_parts(value.get("text"), next_depth, max_depth)
|
|
||||||
if "content" in value:
|
|
||||||
return _extract_text_parts(value.get("content"), next_depth, max_depth)
|
|
||||||
if "value" in value:
|
|
||||||
return _extract_text_parts(value.get("value"), next_depth, max_depth)
|
|
||||||
return []
|
|
||||||
|
|
||||||
next_depth = depth + 1
|
|
||||||
if hasattr(value, "text"):
|
|
||||||
return _extract_text_parts(getattr(value, "text"), next_depth, max_depth)
|
|
||||||
if hasattr(value, "content"):
|
|
||||||
return _extract_text_parts(getattr(value, "content"), next_depth, max_depth)
|
|
||||||
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def _join_text_parts(parts: list[str]) -> str | None:
|
|
||||||
"""Join text parts with newlines, filtering empty strings.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
parts: Candidate text segments.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Joined string or None if the result is empty.
|
|
||||||
"""
|
|
||||||
joined = "\n".join(part for part in parts if part).strip()
|
|
||||||
return joined or None
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_message_text(message: Any) -> str | None:
|
|
||||||
"""Extract plain text from a LiteLLM message object across providers."""
|
|
||||||
content: Any = None
|
|
||||||
|
|
||||||
if hasattr(message, "content"):
|
|
||||||
content = message.content
|
|
||||||
elif isinstance(message, dict):
|
|
||||||
content = message.get("content")
|
|
||||||
|
|
||||||
return _join_text_parts(_extract_text_parts(content))
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_choice_text(choice: Any) -> str | None:
|
|
||||||
"""Extract plain text from a LiteLLM choice object.
|
|
||||||
|
|
||||||
Tries message.content first, then choice.text, then choice.delta. Handles both
|
|
||||||
object attributes and dict keys.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
choice: LiteLLM choice object or dict.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Extracted text or None if no content is found.
|
|
||||||
"""
|
|
||||||
message: Any = None
|
|
||||||
if hasattr(choice, "message"):
|
|
||||||
message = choice.message
|
|
||||||
elif isinstance(choice, dict):
|
|
||||||
message = choice.get("message")
|
|
||||||
|
|
||||||
content = _extract_message_text(message)
|
|
||||||
if content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
if hasattr(choice, "text"):
|
|
||||||
content = _join_text_parts(_extract_text_parts(getattr(choice, "text")))
|
|
||||||
if content:
|
|
||||||
return content
|
|
||||||
if isinstance(choice, dict) and "text" in choice:
|
|
||||||
content = _join_text_parts(_extract_text_parts(choice.get("text")))
|
|
||||||
if content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
if hasattr(choice, "delta"):
|
|
||||||
content = _join_text_parts(_extract_text_parts(getattr(choice, "delta")))
|
|
||||||
if content:
|
|
||||||
return content
|
|
||||||
if isinstance(choice, dict) and "delta" in choice:
|
|
||||||
content = _join_text_parts(_extract_text_parts(choice.get("delta")))
|
|
||||||
if content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _to_code_block(content: str | None, language: str = "text") -> str:
|
|
||||||
"""Wrap content in a markdown code block for client display."""
|
|
||||||
text = (content or "").strip()
|
|
||||||
if not text:
|
|
||||||
text = "<empty>"
|
|
||||||
return f"```{language}\n{text}\n```"
|
|
||||||
|
|
||||||
|
|
||||||
def _load_stored_config() -> dict:
|
|
||||||
"""Load config from config.json file."""
|
|
||||||
config_path = settings.config_path
|
|
||||||
if config_path.exists():
|
|
||||||
try:
|
|
||||||
return json.loads(config_path.read_text())
|
|
||||||
except (json.JSONDecodeError, OSError):
|
|
||||||
return {}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
|
|
||||||
def get_llm_config() -> LLMConfig:
|
|
||||||
"""Get current LLM configuration.
|
|
||||||
|
|
||||||
Priority: config.json file > environment variables/settings
|
|
||||||
"""
|
|
||||||
stored = _load_stored_config()
|
|
||||||
|
|
||||||
return LLMConfig(
|
|
||||||
provider=stored.get("provider", settings.llm_provider),
|
|
||||||
model=stored.get("model", settings.llm_model),
|
|
||||||
api_key=stored.get("api_key", settings.llm_api_key),
|
|
||||||
api_base=stored.get("api_base", settings.llm_api_base),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_model_name(config: LLMConfig) -> str:
|
|
||||||
"""Convert provider/model to LiteLLM format.
|
|
||||||
|
|
||||||
For most providers, adds the provider prefix if not already present.
|
|
||||||
For OpenRouter, always adds 'openrouter/' prefix since OpenRouter models
|
|
||||||
use nested prefixes like 'openrouter/anthropic/claude-3.5-sonnet'.
|
|
||||||
"""
|
|
||||||
provider_prefixes = {
|
|
||||||
"openai": "", # OpenAI models don't need prefix
|
|
||||||
"anthropic": "anthropic/",
|
|
||||||
"openrouter": "openrouter/",
|
|
||||||
"gemini": "gemini/",
|
|
||||||
"deepseek": "deepseek/",
|
|
||||||
"ollama": "ollama/",
|
|
||||||
}
|
|
||||||
|
|
||||||
prefix = provider_prefixes.get(config.provider, "")
|
|
||||||
|
|
||||||
# OpenRouter is special: always add openrouter/ prefix unless already present
|
|
||||||
# OpenRouter models use nested format: openrouter/anthropic/claude-3.5-sonnet
|
|
||||||
if config.provider == "openrouter":
|
|
||||||
if config.model.startswith("openrouter/"):
|
|
||||||
return config.model
|
|
||||||
return f"openrouter/{config.model}"
|
|
||||||
|
|
||||||
# For other providers, don't add prefix if model already has a known prefix
|
|
||||||
known_prefixes = ["openrouter/", "anthropic/", "gemini/", "deepseek/", "ollama/"]
|
|
||||||
if any(config.model.startswith(p) for p in known_prefixes):
|
|
||||||
return config.model
|
|
||||||
|
|
||||||
# Add provider prefix for models that need it
|
|
||||||
return f"{prefix}{config.model}" if prefix else config.model
|
|
||||||
|
|
||||||
|
|
||||||
def _supports_temperature(provider: str, model: str) -> bool:
|
|
||||||
"""Return whether passing `temperature` is supported for this model/provider combo.
|
|
||||||
|
|
||||||
Some models (e.g., OpenAI gpt-5 family) reject temperature values other than 1,
|
|
||||||
and LiteLLM may error when temperature is passed.
|
|
||||||
"""
|
|
||||||
_ = provider
|
|
||||||
model_lower = model.lower()
|
|
||||||
if "gpt-5" in model_lower:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def _get_reasoning_effort(provider: str, model: str) -> str | None:
|
|
||||||
"""Return a default reasoning_effort for models that require it.
|
|
||||||
|
|
||||||
Some OpenAI gpt-5 models may return empty message.content unless a supported
|
|
||||||
`reasoning_effort` is explicitly set. This keeps downstream JSON parsing reliable.
|
|
||||||
"""
|
|
||||||
_ = provider
|
|
||||||
model_lower = model.lower()
|
|
||||||
if "gpt-5" in model_lower:
|
|
||||||
return "minimal"
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
async def check_llm_health(
|
|
||||||
config: LLMConfig | None = None,
|
|
||||||
*,
|
|
||||||
include_details: bool = False,
|
|
||||||
test_prompt: str | None = None,
|
|
||||||
) -> dict[str, Any]:
|
|
||||||
"""Check if the LLM provider is accessible and working."""
|
|
||||||
if config is None:
|
|
||||||
config = get_llm_config()
|
|
||||||
|
|
||||||
# Check if API key is configured (except for Ollama)
|
|
||||||
if config.provider != "ollama" and not config.api_key:
|
|
||||||
return {
|
|
||||||
"healthy": False,
|
|
||||||
"provider": config.provider,
|
|
||||||
"model": config.model,
|
|
||||||
"error_code": "api_key_missing",
|
|
||||||
}
|
|
||||||
|
|
||||||
model_name = get_model_name(config)
|
|
||||||
|
|
||||||
prompt = test_prompt or "Hi"
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Make a minimal test call with timeout
|
|
||||||
# Pass API key directly to avoid race conditions with global os.environ
|
|
||||||
kwargs: dict[str, Any] = {
|
|
||||||
"model": model_name,
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
|
||||||
"max_tokens": 16,
|
|
||||||
"api_key": config.api_key,
|
|
||||||
"api_base": _normalize_api_base(config.provider, config.api_base),
|
|
||||||
"timeout": LLM_TIMEOUT_HEALTH_CHECK,
|
|
||||||
}
|
|
||||||
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
|
|
||||||
if reasoning_effort:
|
|
||||||
kwargs["reasoning_effort"] = reasoning_effort
|
|
||||||
|
|
||||||
response = await litellm.acompletion(**kwargs)
|
|
||||||
content = _extract_choice_text(response.choices[0])
|
|
||||||
if not content:
|
|
||||||
# LLM-003: Empty response should mark health check as unhealthy
|
|
||||||
logging.warning(
|
|
||||||
"LLM health check returned empty content",
|
|
||||||
extra={"provider": config.provider, "model": config.model},
|
|
||||||
)
|
|
||||||
result: dict[str, Any] = {
|
|
||||||
"healthy": False, # Fixed: empty content means unhealthy
|
|
||||||
"provider": config.provider,
|
|
||||||
"model": config.model,
|
|
||||||
"response_model": response.model if response else None,
|
|
||||||
"error_code": "empty_content", # Changed from warning_code
|
|
||||||
"message": "LLM returned empty response",
|
|
||||||
}
|
|
||||||
if include_details:
|
|
||||||
result["test_prompt"] = _to_code_block(prompt)
|
|
||||||
result["model_output"] = _to_code_block(None)
|
|
||||||
return result
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"healthy": True,
|
|
||||||
"provider": config.provider,
|
|
||||||
"model": config.model,
|
|
||||||
"response_model": response.model if response else None,
|
|
||||||
}
|
|
||||||
if include_details:
|
|
||||||
result["test_prompt"] = _to_code_block(prompt)
|
|
||||||
result["model_output"] = _to_code_block(content)
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
# Log full exception details server-side, but do not expose them to clients
|
|
||||||
logging.exception(
|
|
||||||
"LLM health check failed",
|
|
||||||
extra={"provider": config.provider, "model": config.model},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Provide a minimal, actionable client-facing hint without leaking secrets.
|
|
||||||
error_code = "health_check_failed"
|
|
||||||
message = str(e)
|
|
||||||
if "404" in message and "/v1/v1/" in message:
|
|
||||||
error_code = "duplicate_v1_path"
|
|
||||||
elif "404" in message:
|
|
||||||
error_code = "not_found_404"
|
|
||||||
elif "<!doctype html" in message.lower() or "<html" in message.lower():
|
|
||||||
error_code = "html_response"
|
|
||||||
result = {
|
|
||||||
"healthy": False,
|
|
||||||
"provider": config.provider,
|
|
||||||
"model": config.model,
|
|
||||||
"error_code": error_code,
|
|
||||||
}
|
|
||||||
if include_details:
|
|
||||||
result["test_prompt"] = _to_code_block(prompt)
|
|
||||||
result["model_output"] = _to_code_block(None)
|
|
||||||
result["error_detail"] = _to_code_block(message)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
async def complete(
|
|
||||||
prompt: str,
|
|
||||||
system_prompt: str | None = None,
|
|
||||||
config: LLMConfig | None = None,
|
|
||||||
max_tokens: int = 4096,
|
|
||||||
temperature: float = 0.7,
|
|
||||||
) -> str:
|
|
||||||
"""Make a completion request to the LLM."""
|
|
||||||
if config is None:
|
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
|
|
||||||
if cf_orch_url:
|
|
||||||
try:
|
|
||||||
async with _allocate_orch_async(
|
|
||||||
cf_orch_url,
|
|
||||||
"vllm",
|
|
||||||
model_candidates=["Qwen2.5-3B-Instruct"],
|
|
||||||
ttl_s=300.0,
|
|
||||||
caller="peregrine-resume-matcher",
|
|
||||||
) as alloc:
|
|
||||||
orch_config = LLMConfig(
|
|
||||||
provider="openai",
|
|
||||||
model="__auto__",
|
|
||||||
api_key="any",
|
|
||||||
api_base=alloc.url.rstrip("/") + "/v1",
|
|
||||||
)
|
|
||||||
return await complete(prompt, system_prompt, orch_config, max_tokens, temperature)
|
|
||||||
except Exception as exc:
|
|
||||||
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
|
|
||||||
config = get_llm_config()
|
|
||||||
|
|
||||||
model_name = get_model_name(config)
|
|
||||||
|
|
||||||
messages = []
|
|
||||||
if system_prompt:
|
|
||||||
messages.append({"role": "system", "content": system_prompt})
|
|
||||||
messages.append({"role": "user", "content": prompt})
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Pass API key directly to avoid race conditions with global os.environ
|
|
||||||
kwargs: dict[str, Any] = {
|
|
||||||
"model": model_name,
|
|
||||||
"messages": messages,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"api_key": config.api_key,
|
|
||||||
"api_base": _normalize_api_base(config.provider, config.api_base),
|
|
||||||
"timeout": LLM_TIMEOUT_COMPLETION,
|
|
||||||
}
|
|
||||||
if _supports_temperature(config.provider, model_name):
|
|
||||||
kwargs["temperature"] = temperature
|
|
||||||
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
|
|
||||||
if reasoning_effort:
|
|
||||||
kwargs["reasoning_effort"] = reasoning_effort
|
|
||||||
|
|
||||||
response = await litellm.acompletion(**kwargs)
|
|
||||||
|
|
||||||
content = _extract_choice_text(response.choices[0])
|
|
||||||
if not content:
|
|
||||||
raise ValueError("Empty response from LLM")
|
|
||||||
return content
|
|
||||||
except Exception as e:
|
|
||||||
# Log the actual error server-side for debugging
|
|
||||||
logging.error(f"LLM completion failed: {e}", extra={"model": model_name})
|
|
||||||
raise ValueError(
|
|
||||||
"LLM completion failed. Please check your API configuration and try again."
|
|
||||||
) from e
|
|
||||||
|
|
||||||
|
|
||||||
def _supports_json_mode(provider: str, model: str) -> bool:
|
|
||||||
"""Check if the model supports JSON mode."""
|
|
||||||
# Models that support response_format={"type": "json_object"}
|
|
||||||
json_mode_providers = ["openai", "anthropic", "gemini", "deepseek"]
|
|
||||||
if provider in json_mode_providers:
|
|
||||||
return True
|
|
||||||
# LLM-004: OpenRouter models - use explicit allowlist instead of substring matching
|
|
||||||
if provider == "openrouter":
|
|
||||||
return model in OPENROUTER_JSON_CAPABLE_MODELS
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _appears_truncated(data: dict) -> bool:
|
|
||||||
"""LLM-001: Check if JSON data appears to be truncated.
|
|
||||||
|
|
||||||
Detects suspicious patterns indicating incomplete responses.
|
|
||||||
"""
|
|
||||||
if not isinstance(data, dict):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Check for empty arrays that should typically have content
|
|
||||||
suspicious_empty_arrays = ["workExperience", "education", "skills"]
|
|
||||||
for key in suspicious_empty_arrays:
|
|
||||||
if key in data and data[key] == []:
|
|
||||||
# Log warning - these are rarely empty in real resumes
|
|
||||||
logging.warning(
|
|
||||||
"Possible truncation detected: '%s' is empty",
|
|
||||||
key,
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Check for missing critical sections
|
|
||||||
required_top_level = ["personalInfo"]
|
|
||||||
for key in required_top_level:
|
|
||||||
if key not in data:
|
|
||||||
logging.warning(
|
|
||||||
"Possible truncation detected: missing required section '%s'",
|
|
||||||
key,
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _get_retry_temperature(attempt: int, base_temp: float = 0.1) -> float:
|
|
||||||
"""LLM-002: Get temperature for retry attempt - increases with each retry.
|
|
||||||
|
|
||||||
Higher temperature on retries gives the model more variation to produce
|
|
||||||
different (hopefully valid) output.
|
|
||||||
"""
|
|
||||||
temperatures = [base_temp, 0.3, 0.5, 0.7]
|
|
||||||
return temperatures[min(attempt, len(temperatures) - 1)]
|
|
||||||
|
|
||||||
|
|
||||||
def _calculate_timeout(
|
|
||||||
operation: str,
|
|
||||||
max_tokens: int = 4096,
|
|
||||||
provider: str = "openai",
|
|
||||||
) -> int:
|
|
||||||
"""LLM-005: Calculate adaptive timeout based on operation and parameters."""
|
|
||||||
base_timeouts = {
|
|
||||||
"health_check": LLM_TIMEOUT_HEALTH_CHECK,
|
|
||||||
"completion": LLM_TIMEOUT_COMPLETION,
|
|
||||||
"json": LLM_TIMEOUT_JSON,
|
|
||||||
}
|
|
||||||
|
|
||||||
base = base_timeouts.get(operation, LLM_TIMEOUT_COMPLETION)
|
|
||||||
|
|
||||||
# Scale by token count (relative to 4096 baseline)
|
|
||||||
token_factor = max(1.0, max_tokens / 4096)
|
|
||||||
|
|
||||||
# Provider-specific latency adjustments
|
|
||||||
provider_factors = {
|
|
||||||
"openai": 1.0,
|
|
||||||
"anthropic": 1.2,
|
|
||||||
"openrouter": 1.5, # More variable latency
|
|
||||||
"ollama": 2.0, # Local models can be slower
|
|
||||||
}
|
|
||||||
provider_factor = provider_factors.get(provider, 1.0)
|
|
||||||
|
|
||||||
return int(base * token_factor * provider_factor)
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_json(content: str, _depth: int = 0) -> str:
|
|
||||||
"""Extract JSON from LLM response, handling various formats.
|
|
||||||
|
|
||||||
LLM-001: Improved to detect and reject likely truncated JSON.
|
|
||||||
LLM-007: Improved error messages for debugging.
|
|
||||||
JSON-010: Added recursion depth and size limits.
|
|
||||||
"""
|
|
||||||
# JSON-010: Safety limits
|
|
||||||
if _depth > MAX_JSON_EXTRACTION_RECURSION:
|
|
||||||
raise ValueError(f"JSON extraction exceeded max recursion depth: {_depth}")
|
|
||||||
if len(content) > MAX_JSON_CONTENT_SIZE:
|
|
||||||
raise ValueError(f"Content too large for JSON extraction: {len(content)} bytes")
|
|
||||||
|
|
||||||
original = content
|
|
||||||
|
|
||||||
# Remove markdown code blocks
|
|
||||||
if "```json" in content:
|
|
||||||
content = content.split("```json")[1].split("```")[0]
|
|
||||||
elif "```" in content:
|
|
||||||
parts = content.split("```")
|
|
||||||
if len(parts) >= 2:
|
|
||||||
content = parts[1]
|
|
||||||
# Remove language identifier if present (e.g., "json\n{...")
|
|
||||||
if content.startswith(("json", "JSON")):
|
|
||||||
content = content[4:]
|
|
||||||
|
|
||||||
content = content.strip()
|
|
||||||
|
|
||||||
# If content starts with {, find the matching }
|
|
||||||
if content.startswith("{"):
|
|
||||||
depth = 0
|
|
||||||
end_idx = -1
|
|
||||||
in_string = False
|
|
||||||
escape_next = False
|
|
||||||
|
|
||||||
for i, char in enumerate(content):
|
|
||||||
if escape_next:
|
|
||||||
escape_next = False
|
|
||||||
continue
|
|
||||||
if char == "\\":
|
|
||||||
escape_next = True
|
|
||||||
continue
|
|
||||||
if char == '"' and not escape_next:
|
|
||||||
in_string = not in_string
|
|
||||||
continue
|
|
||||||
if in_string:
|
|
||||||
continue
|
|
||||||
if char == "{":
|
|
||||||
depth += 1
|
|
||||||
elif char == "}":
|
|
||||||
depth -= 1
|
|
||||||
if depth == 0:
|
|
||||||
end_idx = i
|
|
||||||
break
|
|
||||||
|
|
||||||
# LLM-001: Check for unbalanced braces - loop ended without depth reaching 0
|
|
||||||
if end_idx == -1 and depth != 0:
|
|
||||||
logging.warning(
|
|
||||||
"JSON extraction found unbalanced braces (depth=%d), possible truncation",
|
|
||||||
depth,
|
|
||||||
)
|
|
||||||
|
|
||||||
if end_idx != -1:
|
|
||||||
return content[: end_idx + 1]
|
|
||||||
|
|
||||||
# Try to find JSON object in the content (only if not already at start)
|
|
||||||
start_idx = content.find("{")
|
|
||||||
if start_idx > 0:
|
|
||||||
# Only recurse if { is found after position 0 to avoid infinite recursion
|
|
||||||
return _extract_json(content[start_idx:], _depth + 1)
|
|
||||||
|
|
||||||
# LLM-007: Log unrecognized format for debugging
|
|
||||||
logging.error(
|
|
||||||
"Could not extract JSON from response format. Content preview: %s",
|
|
||||||
content[:200] if content else "<empty>",
|
|
||||||
)
|
|
||||||
raise ValueError(f"No JSON found in response: {original[:200]}")
|
|
||||||
|
|
||||||
|
|
||||||
async def complete_json(
|
|
||||||
prompt: str,
|
|
||||||
system_prompt: str | None = None,
|
|
||||||
config: LLMConfig | None = None,
|
|
||||||
max_tokens: int = 4096,
|
|
||||||
retries: int = 2,
|
|
||||||
) -> dict[str, Any]:
|
|
||||||
"""Make a completion request expecting JSON response.
|
|
||||||
|
|
||||||
Uses JSON mode when available, with retry logic for reliability.
|
|
||||||
"""
|
|
||||||
if config is None:
|
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
|
|
||||||
if cf_orch_url:
|
|
||||||
try:
|
|
||||||
async with _allocate_orch_async(
|
|
||||||
cf_orch_url,
|
|
||||||
"vllm",
|
|
||||||
model_candidates=["Qwen2.5-3B-Instruct"],
|
|
||||||
ttl_s=300.0,
|
|
||||||
caller="peregrine-resume-matcher",
|
|
||||||
) as alloc:
|
|
||||||
orch_config = LLMConfig(
|
|
||||||
provider="openai",
|
|
||||||
model="__auto__",
|
|
||||||
api_key="any",
|
|
||||||
api_base=alloc.url.rstrip("/") + "/v1",
|
|
||||||
)
|
|
||||||
return await complete_json(prompt, system_prompt, orch_config, max_tokens, retries)
|
|
||||||
except Exception as exc:
|
|
||||||
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
|
|
||||||
config = get_llm_config()
|
|
||||||
|
|
||||||
model_name = get_model_name(config)
|
|
||||||
|
|
||||||
# Build messages
|
|
||||||
json_system = (
|
|
||||||
system_prompt or ""
|
|
||||||
) + "\n\nYou must respond with valid JSON only. No explanations, no markdown."
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": json_system},
|
|
||||||
{"role": "user", "content": prompt},
|
|
||||||
]
|
|
||||||
|
|
||||||
# Check if we can use JSON mode
|
|
||||||
use_json_mode = _supports_json_mode(config.provider, config.model)
|
|
||||||
|
|
||||||
last_error = None
|
|
||||||
for attempt in range(retries + 1):
|
|
||||||
try:
|
|
||||||
# Build request kwargs
|
|
||||||
# Pass API key directly to avoid race conditions with global os.environ
|
|
||||||
kwargs: dict[str, Any] = {
|
|
||||||
"model": model_name,
|
|
||||||
"messages": messages,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
"api_key": config.api_key,
|
|
||||||
"api_base": _normalize_api_base(config.provider, config.api_base),
|
|
||||||
"timeout": _calculate_timeout("json", max_tokens, config.provider),
|
|
||||||
}
|
|
||||||
if _supports_temperature(config.provider, model_name):
|
|
||||||
# LLM-002: Increase temperature on retry for variation
|
|
||||||
kwargs["temperature"] = _get_retry_temperature(attempt)
|
|
||||||
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
|
|
||||||
if reasoning_effort:
|
|
||||||
kwargs["reasoning_effort"] = reasoning_effort
|
|
||||||
|
|
||||||
# Add JSON mode if supported
|
|
||||||
if use_json_mode:
|
|
||||||
kwargs["response_format"] = {"type": "json_object"}
|
|
||||||
|
|
||||||
response = await litellm.acompletion(**kwargs)
|
|
||||||
content = _extract_choice_text(response.choices[0])
|
|
||||||
|
|
||||||
if not content:
|
|
||||||
raise ValueError("Empty response from LLM")
|
|
||||||
|
|
||||||
logging.debug(f"LLM response (attempt {attempt + 1}): {content[:300]}")
|
|
||||||
|
|
||||||
# Extract and parse JSON
|
|
||||||
json_str = _extract_json(content)
|
|
||||||
result = json.loads(json_str)
|
|
||||||
|
|
||||||
# LLM-001: Check if parsed result appears truncated
|
|
||||||
if isinstance(result, dict) and _appears_truncated(result):
|
|
||||||
logging.warning(
|
|
||||||
"Parsed JSON appears truncated, but proceeding with result"
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
last_error = e
|
|
||||||
logging.warning(f"JSON parse failed (attempt {attempt + 1}): {e}")
|
|
||||||
if attempt < retries:
|
|
||||||
# Add hint to prompt for retry
|
|
||||||
messages[-1]["content"] = (
|
|
||||||
prompt
|
|
||||||
+ "\n\nIMPORTANT: Output ONLY a valid JSON object. Start with { and end with }."
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
raise ValueError(f"Failed to parse JSON after {retries + 1} attempts: {e}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
last_error = e
|
|
||||||
logging.warning(f"LLM call failed (attempt {attempt + 1}): {e}")
|
|
||||||
if attempt < retries:
|
|
||||||
continue
|
|
||||||
raise
|
|
||||||
|
|
||||||
raise ValueError(f"Failed after {retries + 1} attempts: {last_error}")
|
|
||||||
|
|
@ -1,88 +0,0 @@
|
||||||
"""FastAPI application entry point."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import sys
|
|
||||||
from contextlib import asynccontextmanager
|
|
||||||
|
|
||||||
from fastapi import Depends, FastAPI
|
|
||||||
|
|
||||||
# Fix for Windows: Use ProactorEventLoop for subprocess support (Playwright)
|
|
||||||
if sys.platform == "win32":
|
|
||||||
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
|
||||||
|
|
||||||
from app import __version__
|
|
||||||
from app.cloud_session import session_middleware_dep
|
|
||||||
from app.config import settings
|
|
||||||
from app.database import db
|
|
||||||
from app.pdf import close_pdf_renderer, init_pdf_renderer
|
|
||||||
from app.routers import config_router, enrichment_router, health_router, jobs_router, resumes_router
|
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
|
||||||
async def lifespan(app: FastAPI):
|
|
||||||
"""Application lifespan manager."""
|
|
||||||
# Startup
|
|
||||||
settings.data_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
# PDF renderer uses lazy initialization - will initialize on first use
|
|
||||||
# await init_pdf_renderer()
|
|
||||||
yield
|
|
||||||
# Shutdown - wrap each cleanup in try-except to ensure all resources are released
|
|
||||||
try:
|
|
||||||
await close_pdf_renderer()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error closing PDF renderer: {e}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
db.close()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error closing database: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(
|
|
||||||
title="Resume Matcher API",
|
|
||||||
description="AI-powered resume tailoring for job descriptions",
|
|
||||||
version=__version__,
|
|
||||||
lifespan=lifespan,
|
|
||||||
dependencies=[Depends(session_middleware_dep)],
|
|
||||||
)
|
|
||||||
|
|
||||||
# CORS middleware - origins configurable via CORS_ORIGINS env var
|
|
||||||
app.add_middleware(
|
|
||||||
CORSMiddleware,
|
|
||||||
allow_origins=settings.cors_origins,
|
|
||||||
allow_credentials=True,
|
|
||||||
allow_methods=["*"],
|
|
||||||
allow_headers=["*"],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Include routers
|
|
||||||
app.include_router(health_router, prefix="/api/v1")
|
|
||||||
app.include_router(config_router, prefix="/api/v1")
|
|
||||||
app.include_router(resumes_router, prefix="/api/v1")
|
|
||||||
app.include_router(jobs_router, prefix="/api/v1")
|
|
||||||
app.include_router(enrichment_router, prefix="/api/v1")
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
|
||||||
async def root():
|
|
||||||
"""Root endpoint."""
|
|
||||||
return {
|
|
||||||
"name": "Resume Matcher API",
|
|
||||||
"version": __version__,
|
|
||||||
"docs": "/docs",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import uvicorn
|
|
||||||
|
|
||||||
uvicorn.run(
|
|
||||||
"app.main:app",
|
|
||||||
host=settings.host,
|
|
||||||
port=settings.port,
|
|
||||||
reload=True,
|
|
||||||
)
|
|
||||||
Loading…
Reference in a new issue