feat: migrate to circuitforge-core for db, llm router, and tiers
Some checks failed
CI / test (push) Failing after 24s
Some checks failed
CI / test (push) Failing after 24s
This commit is contained in:
parent
608e0fa922
commit
818e46c17e
6 changed files with 39 additions and 194 deletions
|
|
@ -1,7 +1,7 @@
|
||||||
"""
|
"""
|
||||||
Tier definitions and feature gates for Peregrine.
|
Tier definitions and feature gates for Peregrine.
|
||||||
|
|
||||||
Tiers: free < paid < premium
|
Tiers: free < paid < premium < ultra (ultra reserved; no Peregrine features use it yet)
|
||||||
FEATURES maps feature key → minimum tier required.
|
FEATURES maps feature key → minimum tier required.
|
||||||
Features not in FEATURES are available to all tiers (free).
|
Features not in FEATURES are available to all tiers (free).
|
||||||
|
|
||||||
|
|
@ -25,7 +25,11 @@ from __future__ import annotations
|
||||||
import os as _os
|
import os as _os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
TIERS = ["free", "paid", "premium"]
|
from circuitforge_core.tiers import (
|
||||||
|
can_use as _core_can_use,
|
||||||
|
TIERS,
|
||||||
|
tier_label as _core_tier_label,
|
||||||
|
)
|
||||||
|
|
||||||
# Maps feature key → minimum tier string required.
|
# Maps feature key → minimum tier string required.
|
||||||
# Features absent from this dict are free (available to all).
|
# Features absent from this dict are free (available to all).
|
||||||
|
|
@ -132,25 +136,20 @@ def can_use(
|
||||||
Returns False for unknown/invalid tier strings.
|
Returns False for unknown/invalid tier strings.
|
||||||
"""
|
"""
|
||||||
effective_tier = demo_tier if (demo_tier is not None and _DEMO_MODE) else tier
|
effective_tier = demo_tier if (demo_tier is not None and _DEMO_MODE) else tier
|
||||||
required = FEATURES.get(feature)
|
# Pass Peregrine's BYOK_UNLOCKABLE via has_byok collapse — core's frozenset is empty
|
||||||
if required is None:
|
|
||||||
return True # not gated — available to all
|
|
||||||
if has_byok and feature in BYOK_UNLOCKABLE:
|
if has_byok and feature in BYOK_UNLOCKABLE:
|
||||||
return True
|
return True
|
||||||
try:
|
return _core_can_use(feature, effective_tier, _features=FEATURES)
|
||||||
return TIERS.index(effective_tier) >= TIERS.index(required)
|
|
||||||
except ValueError:
|
|
||||||
return False # invalid tier string
|
|
||||||
|
|
||||||
|
|
||||||
def tier_label(feature: str, has_byok: bool = False) -> str:
|
def tier_label(feature: str, has_byok: bool = False) -> str:
|
||||||
"""Return a display label for a locked feature, or '' if free/unlocked."""
|
"""Return a display label for a locked feature, or '' if free/unlocked."""
|
||||||
if has_byok and feature in BYOK_UNLOCKABLE:
|
if has_byok and feature in BYOK_UNLOCKABLE:
|
||||||
return ""
|
return ""
|
||||||
required = FEATURES.get(feature)
|
raw = _core_tier_label(feature, _features=FEATURES)
|
||||||
if required is None:
|
if not raw or raw == "free":
|
||||||
return ""
|
return ""
|
||||||
return "🔒 Paid" if required == "paid" else "⭐ Premium"
|
return "🔒 Paid" if raw == "paid" else "⭐ Premium"
|
||||||
|
|
||||||
|
|
||||||
def effective_tier(
|
def effective_tier(
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,9 @@
|
||||||
# Extracted from environment.yml for Docker pip installs
|
# Extracted from environment.yml for Docker pip installs
|
||||||
# Keep in sync with environment.yml
|
# Keep in sync with environment.yml
|
||||||
|
|
||||||
|
# ── CircuitForge shared core ───────────────────────────────────────────────
|
||||||
|
-e ../circuitforge-core
|
||||||
|
|
||||||
# ── Web UI ────────────────────────────────────────────────────────────────
|
# ── Web UI ────────────────────────────────────────────────────────────────
|
||||||
streamlit>=1.35
|
streamlit>=1.35
|
||||||
watchdog
|
watchdog
|
||||||
|
|
@ -78,3 +81,10 @@ lxml
|
||||||
# ── Documentation ────────────────────────────────────────────────────────
|
# ── Documentation ────────────────────────────────────────────────────────
|
||||||
mkdocs>=1.5
|
mkdocs>=1.5
|
||||||
mkdocs-material>=9.5
|
mkdocs-material>=9.5
|
||||||
|
|
||||||
|
# ── Vue SPA API backend ──────────────────────────────────────────────────
|
||||||
|
fastapi>=0.100.0
|
||||||
|
uvicorn[standard]>=0.20.0
|
||||||
|
PyJWT>=2.8.0
|
||||||
|
cryptography>=40.0.0
|
||||||
|
python-multipart>=0.0.6
|
||||||
|
|
|
||||||
|
|
@ -9,30 +9,14 @@ from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from circuitforge_core.db import get_connection as _cf_get_connection
|
||||||
|
|
||||||
DEFAULT_DB = Path(os.environ.get("STAGING_DB", Path(__file__).parent.parent / "staging.db"))
|
DEFAULT_DB = Path(os.environ.get("STAGING_DB", Path(__file__).parent.parent / "staging.db"))
|
||||||
|
|
||||||
|
|
||||||
def get_connection(db_path: Path = DEFAULT_DB, key: str = "") -> "sqlite3.Connection":
|
def get_connection(db_path: Path = DEFAULT_DB, key: str = "") -> "sqlite3.Connection":
|
||||||
"""
|
"""Thin shim — delegates to circuitforge_core.db.get_connection."""
|
||||||
Open a database connection.
|
return _cf_get_connection(db_path, key)
|
||||||
|
|
||||||
In cloud mode with a key: uses SQLCipher (AES-256 encrypted, API-identical to sqlite3).
|
|
||||||
Otherwise: vanilla sqlite3.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
db_path: Path to the SQLite/SQLCipher database file.
|
|
||||||
key: SQLCipher encryption key (hex string). Empty = unencrypted.
|
|
||||||
"""
|
|
||||||
import os as _os
|
|
||||||
cloud_mode = _os.environ.get("CLOUD_MODE", "").lower() in ("1", "true", "yes")
|
|
||||||
if cloud_mode and key:
|
|
||||||
from pysqlcipher3 import dbapi2 as _sqlcipher
|
|
||||||
conn = _sqlcipher.connect(str(db_path))
|
|
||||||
conn.execute(f"PRAGMA key='{key}'")
|
|
||||||
return conn
|
|
||||||
else:
|
|
||||||
import sqlite3 as _sqlite3
|
|
||||||
return _sqlite3.connect(str(db_path))
|
|
||||||
|
|
||||||
|
|
||||||
CREATE_JOBS = """
|
CREATE_JOBS = """
|
||||||
|
|
|
||||||
|
|
@ -2,168 +2,18 @@
|
||||||
LLM abstraction layer with priority fallback chain.
|
LLM abstraction layer with priority fallback chain.
|
||||||
Reads config/llm.yaml. Tries backends in order; falls back on any error.
|
Reads config/llm.yaml. Tries backends in order; falls back on any error.
|
||||||
"""
|
"""
|
||||||
import os
|
|
||||||
import yaml
|
|
||||||
import requests
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from openai import OpenAI
|
|
||||||
|
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||||
|
|
||||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||||
|
|
||||||
|
|
||||||
class LLMRouter:
|
class LLMRouter(_CoreLLMRouter):
|
||||||
|
"""Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
|
||||||
|
|
||||||
def __init__(self, config_path: Path = CONFIG_PATH):
|
def __init__(self, config_path: Path = CONFIG_PATH):
|
||||||
with open(config_path) as f:
|
super().__init__(config_path)
|
||||||
self.config = yaml.safe_load(f)
|
|
||||||
|
|
||||||
def _is_reachable(self, base_url: str) -> bool:
|
|
||||||
"""Quick health-check ping. Returns True if backend is up."""
|
|
||||||
health_url = base_url.rstrip("/").removesuffix("/v1") + "/health"
|
|
||||||
try:
|
|
||||||
resp = requests.get(health_url, timeout=2)
|
|
||||||
return resp.status_code < 500
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _resolve_model(self, client: OpenAI, model: str) -> str:
|
|
||||||
"""Resolve __auto__ to the first model served by vLLM."""
|
|
||||||
if model != "__auto__":
|
|
||||||
return model
|
|
||||||
models = client.models.list()
|
|
||||||
return models.data[0].id
|
|
||||||
|
|
||||||
def complete(self, prompt: str, system: str | None = None,
|
|
||||||
model_override: str | None = None,
|
|
||||||
fallback_order: list[str] | None = None,
|
|
||||||
images: list[str] | None = None,
|
|
||||||
max_tokens: int | None = None) -> str:
|
|
||||||
"""
|
|
||||||
Generate a completion. Tries each backend in fallback_order.
|
|
||||||
|
|
||||||
model_override: when set, replaces the configured model for
|
|
||||||
openai_compat backends (e.g. pass a research-specific ollama model).
|
|
||||||
fallback_order: when set, overrides config fallback_order for this
|
|
||||||
call (e.g. pass config["research_fallback_order"] for research tasks).
|
|
||||||
images: optional list of base64-encoded PNG/JPG strings. When provided,
|
|
||||||
backends without supports_images=true are skipped. vision_service backends
|
|
||||||
are only tried when images is provided.
|
|
||||||
Raises RuntimeError if all backends are exhausted.
|
|
||||||
"""
|
|
||||||
if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"):
|
|
||||||
raise RuntimeError(
|
|
||||||
"AI inference is disabled in the public demo. "
|
|
||||||
"Run your own instance to use AI features."
|
|
||||||
)
|
|
||||||
order = fallback_order if fallback_order is not None else self.config["fallback_order"]
|
|
||||||
for name in order:
|
|
||||||
backend = self.config["backends"][name]
|
|
||||||
|
|
||||||
if not backend.get("enabled", True):
|
|
||||||
print(f"[LLMRouter] {name}: disabled, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
supports_images = backend.get("supports_images", False)
|
|
||||||
is_vision_service = backend["type"] == "vision_service"
|
|
||||||
|
|
||||||
# vision_service only used when images provided
|
|
||||||
if is_vision_service and not images:
|
|
||||||
print(f"[LLMRouter] {name}: vision_service skipped (no images)")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# non-vision backends skipped when images provided and they don't support it
|
|
||||||
if images and not supports_images and not is_vision_service:
|
|
||||||
print(f"[LLMRouter] {name}: no image support, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if is_vision_service:
|
|
||||||
if not self._is_reachable(backend["base_url"]):
|
|
||||||
print(f"[LLMRouter] {name}: unreachable, skipping")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
resp = requests.post(
|
|
||||||
backend["base_url"].rstrip("/") + "/analyze",
|
|
||||||
json={
|
|
||||||
"prompt": prompt,
|
|
||||||
"image_base64": images[0] if images else "",
|
|
||||||
},
|
|
||||||
timeout=60,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
print(f"[LLMRouter] Used backend: {name} (vision_service)")
|
|
||||||
return resp.json()["text"]
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif backend["type"] == "openai_compat":
|
|
||||||
if not self._is_reachable(backend["base_url"]):
|
|
||||||
print(f"[LLMRouter] {name}: unreachable, skipping")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
client = OpenAI(
|
|
||||||
base_url=backend["base_url"],
|
|
||||||
api_key=backend.get("api_key") or "any",
|
|
||||||
)
|
|
||||||
raw_model = model_override or backend["model"]
|
|
||||||
model = self._resolve_model(client, raw_model)
|
|
||||||
messages = []
|
|
||||||
if system:
|
|
||||||
messages.append({"role": "system", "content": system})
|
|
||||||
if images and supports_images:
|
|
||||||
content = [{"type": "text", "text": prompt}]
|
|
||||||
for img in images:
|
|
||||||
content.append({
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": f"data:image/png;base64,{img}"},
|
|
||||||
})
|
|
||||||
messages.append({"role": "user", "content": content})
|
|
||||||
else:
|
|
||||||
messages.append({"role": "user", "content": prompt})
|
|
||||||
|
|
||||||
create_kwargs: dict = {"model": model, "messages": messages}
|
|
||||||
if max_tokens is not None:
|
|
||||||
create_kwargs["max_tokens"] = max_tokens
|
|
||||||
resp = client.chat.completions.create(**create_kwargs)
|
|
||||||
print(f"[LLMRouter] Used backend: {name} ({model})")
|
|
||||||
return resp.choices[0].message.content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif backend["type"] == "anthropic":
|
|
||||||
api_key = os.environ.get(backend["api_key_env"], "")
|
|
||||||
if not api_key:
|
|
||||||
print(f"[LLMRouter] {name}: {backend['api_key_env']} not set, skipping")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
import anthropic as _anthropic
|
|
||||||
client = _anthropic.Anthropic(api_key=api_key)
|
|
||||||
if images and supports_images:
|
|
||||||
content = []
|
|
||||||
for img in images:
|
|
||||||
content.append({
|
|
||||||
"type": "image",
|
|
||||||
"source": {"type": "base64", "media_type": "image/png", "data": img},
|
|
||||||
})
|
|
||||||
content.append({"type": "text", "text": prompt})
|
|
||||||
else:
|
|
||||||
content = prompt
|
|
||||||
kwargs: dict = {
|
|
||||||
"model": backend["model"],
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"messages": [{"role": "user", "content": content}],
|
|
||||||
}
|
|
||||||
if system:
|
|
||||||
kwargs["system"] = system
|
|
||||||
msg = client.messages.create(**kwargs)
|
|
||||||
print(f"[LLMRouter] Used backend: {name}")
|
|
||||||
return msg.content[0].text
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
|
||||||
continue
|
|
||||||
|
|
||||||
raise RuntimeError("All LLM backends exhausted")
|
|
||||||
|
|
||||||
|
|
||||||
# Module-level singleton for convenience
|
# Module-level singleton for convenience
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ def test_router_uses_first_reachable_backend():
|
||||||
mock_response.choices[0].message.content = "hello"
|
mock_response.choices[0].message.content = "hello"
|
||||||
|
|
||||||
with patch.object(router, "_is_reachable", side_effect=[False, True, True, True, True]), \
|
with patch.object(router, "_is_reachable", side_effect=[False, True, True, True, True]), \
|
||||||
patch("scripts.llm_router.OpenAI") as MockOpenAI:
|
patch("circuitforge_core.llm.router.OpenAI") as MockOpenAI:
|
||||||
instance = MockOpenAI.return_value
|
instance = MockOpenAI.return_value
|
||||||
instance.chat.completions.create.return_value = mock_response
|
instance.chat.completions.create.return_value = mock_response
|
||||||
mock_model = MagicMock()
|
mock_model = MagicMock()
|
||||||
|
|
@ -54,7 +54,7 @@ def test_is_reachable_returns_false_on_connection_error():
|
||||||
|
|
||||||
router = LLMRouter(CONFIG_PATH)
|
router = LLMRouter(CONFIG_PATH)
|
||||||
|
|
||||||
with patch("scripts.llm_router.requests.get", side_effect=requests.ConnectionError):
|
with patch("circuitforge_core.llm.router.requests.get", side_effect=requests.ConnectionError):
|
||||||
result = router._is_reachable("http://localhost:9999/v1")
|
result = router._is_reachable("http://localhost:9999/v1")
|
||||||
|
|
||||||
assert result is False
|
assert result is False
|
||||||
|
|
@ -92,8 +92,8 @@ def test_complete_skips_backend_without_image_support(tmp_path):
|
||||||
mock_resp.status_code = 200
|
mock_resp.status_code = 200
|
||||||
mock_resp.json.return_value = {"text": "B — collaborative"}
|
mock_resp.json.return_value = {"text": "B — collaborative"}
|
||||||
|
|
||||||
with patch("scripts.llm_router.requests.get") as mock_get, \
|
with patch("circuitforge_core.llm.router.requests.get") as mock_get, \
|
||||||
patch("scripts.llm_router.requests.post") as mock_post:
|
patch("circuitforge_core.llm.router.requests.post") as mock_post:
|
||||||
# health check returns ok for vision_service
|
# health check returns ok for vision_service
|
||||||
mock_get.return_value = MagicMock(status_code=200)
|
mock_get.return_value = MagicMock(status_code=200)
|
||||||
mock_post.return_value = mock_resp
|
mock_post.return_value = mock_resp
|
||||||
|
|
@ -127,7 +127,7 @@ def test_complete_without_images_skips_vision_service(tmp_path):
|
||||||
cfg_file.write_text(yaml.dump(cfg))
|
cfg_file.write_text(yaml.dump(cfg))
|
||||||
|
|
||||||
router = LLMRouter(config_path=cfg_file)
|
router = LLMRouter(config_path=cfg_file)
|
||||||
with patch("scripts.llm_router.requests.post") as mock_post:
|
with patch("circuitforge_core.llm.router.requests.post") as mock_post:
|
||||||
try:
|
try:
|
||||||
router.complete("text only prompt")
|
router.complete("text only prompt")
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,9 @@ from app.wizard.tiers import can_use, tier_label, TIERS, FEATURES, BYOK_UNLOCKAB
|
||||||
|
|
||||||
|
|
||||||
def test_tiers_list():
|
def test_tiers_list():
|
||||||
assert TIERS == ["free", "paid", "premium"]
|
# Peregrine uses the core tier list; "ultra" is included but no features require it yet
|
||||||
|
assert TIERS[:3] == ["free", "paid", "premium"]
|
||||||
|
assert "ultra" in TIERS
|
||||||
|
|
||||||
|
|
||||||
def test_can_use_free_feature_always():
|
def test_can_use_free_feature_always():
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue