feat: cover letter training export (Phase 1) #111
1 changed files with 33 additions and 1 deletions
|
|
@ -59,6 +59,12 @@ MAX_JSON_CONTENT_SIZE = 1024 * 1024 # 1MB
|
||||||
# Request-scoped user_id — set once by session_middleware_dep, read inside _allocate_orch_async.
|
# Request-scoped user_id — set once by session_middleware_dep, read inside _allocate_orch_async.
|
||||||
# ContextVar is safe for concurrent async requests: each request task gets its own copy.
|
# ContextVar is safe for concurrent async requests: each request task gets its own copy.
|
||||||
_request_user_id: ContextVar[str | None] = ContextVar("request_user_id", default=None)
|
_request_user_id: ContextVar[str | None] = ContextVar("request_user_id", default=None)
|
||||||
|
_request_tier: ContextVar[str | None] = ContextVar("request_tier", default=None)
|
||||||
|
# Custom writing model for premium/ultra users — populated from Heimdall license key meta.
|
||||||
|
# Set to None for all other tiers; complete() falls back to the shared base model.
|
||||||
|
_request_writing_model: ContextVar[str | None] = ContextVar("request_writing_model", default=None)
|
||||||
|
|
||||||
|
_PREMIUM_TIERS: frozenset[str] = frozenset({"premium", "ultra"})
|
||||||
|
|
||||||
|
|
||||||
def set_request_user_id(user_id: str | None) -> None:
|
def set_request_user_id(user_id: str | None) -> None:
|
||||||
|
|
@ -69,6 +75,22 @@ def get_request_user_id() -> str | None:
|
||||||
return _request_user_id.get()
|
return _request_user_id.get()
|
||||||
|
|
||||||
|
|
||||||
|
def set_request_tier(tier: str | None) -> None:
|
||||||
|
_request_tier.set(tier)
|
||||||
|
|
||||||
|
|
||||||
|
def get_request_tier() -> str | None:
|
||||||
|
return _request_tier.get()
|
||||||
|
|
||||||
|
|
||||||
|
def set_request_writing_model(model: str | None) -> None:
|
||||||
|
_request_writing_model.set(model)
|
||||||
|
|
||||||
|
|
||||||
|
def get_request_writing_model() -> str | None:
|
||||||
|
return _request_writing_model.get()
|
||||||
|
|
||||||
|
|
||||||
class LLMConfig(BaseModel):
|
class LLMConfig(BaseModel):
|
||||||
"""LLM configuration model."""
|
"""LLM configuration model."""
|
||||||
|
|
||||||
|
|
@ -481,10 +503,20 @@ async def complete(
|
||||||
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
|
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
|
||||||
if cf_orch_url:
|
if cf_orch_url:
|
||||||
try:
|
try:
|
||||||
|
# Premium/ultra users get their personal fine-tuned writing model as the
|
||||||
|
# first candidate; the base model is the fallback so cf-orch can
|
||||||
|
# degrade gracefully if the personal model isn't loaded yet.
|
||||||
|
tier = get_request_tier()
|
||||||
|
writing_model = get_request_writing_model()
|
||||||
|
model_candidates: list[str] = (
|
||||||
|
[writing_model, "Qwen2.5-3B-Instruct"]
|
||||||
|
if writing_model and tier in _PREMIUM_TIERS
|
||||||
|
else ["Qwen2.5-3B-Instruct"]
|
||||||
|
)
|
||||||
async with _allocate_orch_async(
|
async with _allocate_orch_async(
|
||||||
cf_orch_url,
|
cf_orch_url,
|
||||||
"vllm",
|
"vllm",
|
||||||
model_candidates=["Qwen2.5-3B-Instruct"],
|
model_candidates=model_candidates,
|
||||||
ttl_s=300.0,
|
ttl_s=300.0,
|
||||||
caller="peregrine-resume-matcher",
|
caller="peregrine-resume-matcher",
|
||||||
) as alloc:
|
) as alloc:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue