Add scripts/rate_limit.py with cloud-aware key function: - In cloud mode, extracts user_id from _request_db ContextVar path (part[-3]) so each cloud user has their own rate limit bucket - In demo mode, returns unique per-request key to disable limiting entirely (_demo_guard handles write-blocking; rate limiting would block the demo UX) - Falls back to client IP for local/self-hosted installs Wire limiter to 4 endpoints with conservative per-user limits: - POST /generate/cover-letter: 20/hour - POST /research/run: 10/hour - POST /qa/suggest: 60/hour - POST /survey/analyze: 30/hour Add _demo_guard() to generate_research and suggest_qa_answer (was missing). Fix pre-existing silent except in suggest_qa_answer: was bare except pass, now logs warning with exc_info. Add _RL_WIZARD placeholder constant with TODO to wire to wizard/ai/interview after feat/77 merges (declared but intentionally not applied yet to avoid false sense of security — comment makes the gap explicit). 18 tests covering cloud user isolation, demo bypass, IP fallback, all 4 endpoints returning 429 on excess, retry_after header, and demo guard. Closes: #122
32 lines
1.2 KiB
Python
32 lines
1.2 KiB
Python
"""Per-user rate limiting for Peregrine LLM generation endpoints."""
|
|
from pathlib import Path
|
|
|
|
from slowapi import Limiter
|
|
from slowapi.errors import RateLimitExceeded
|
|
from slowapi.util import get_remote_address
|
|
from starlette.requests import Request
|
|
from starlette.responses import JSONResponse
|
|
|
|
|
|
def _rate_key(request: Request) -> str:
|
|
"""Cloud mode: user_id from DB path. Local mode: client IP. Demo: unique key (no rate limit)."""
|
|
from dev_api import IS_DEMO, _CLOUD_MODE, _request_db # lazy import avoids circular
|
|
if IS_DEMO:
|
|
return f"demo-{id(request)}" # unique per request — effectively no rate limiting
|
|
db_path = _request_db.get()
|
|
if _CLOUD_MODE and db_path:
|
|
return Path(db_path).parts[-3] # user_id segment
|
|
return get_remote_address(request)
|
|
|
|
|
|
limiter = Limiter(key_func=_rate_key)
|
|
|
|
|
|
def rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse:
|
|
"""Return 429 with Retry-After header."""
|
|
retry_after = getattr(exc, "retry_after", 60)
|
|
return JSONResponse(
|
|
status_code=429,
|
|
content={"error": "rate_limit_exceeded", "retry_after": retry_after},
|
|
headers={"Retry-After": str(retry_after)},
|
|
)
|