Per-user LLM rate limiting via slowapi: cloud-aware key function, 4 endpoint limits, demo bypass, SSRF and path traversal already in fix/ci-ruff-lint merge. Closes: #122
92 lines
4.9 KiB
Text
92 lines
4.9 KiB
Text
# .env.example — copy to .env
|
|
# Auto-generated by the setup wizard, or fill in manually.
|
|
# NEVER commit .env to git.
|
|
|
|
STREAMLIT_PORT=8502
|
|
OLLAMA_PORT=11434
|
|
VLLM_PORT=8000
|
|
CF_TEXT_PORT=8006
|
|
SEARXNG_PORT=8888
|
|
VISION_PORT=8002
|
|
VISION_MODEL=vikhyatk/moondream2
|
|
VISION_REVISION=2025-01-09
|
|
|
|
DOCS_DIR=~/Documents/JobSearch
|
|
OLLAMA_MODELS_DIR=~/models/ollama
|
|
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
|
|
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
|
|
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
|
|
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
|
|
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
|
|
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
|
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
|
|
|
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
|
|
# Set any of these to configure LLM backends without needing a config/llm.yaml.
|
|
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
|
|
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
|
|
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
|
|
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
|
|
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
|
|
|
|
# API keys (required for remote profile)
|
|
ANTHROPIC_API_KEY=
|
|
OPENAI_COMPAT_URL=
|
|
OPENAI_COMPAT_KEY=
|
|
|
|
# Feedback button — Forgejo issue filing
|
|
FORGEJO_API_TOKEN= # dev/admin token (your personal account)
|
|
FORGEJO_BOT_TOKEN= # cf-bugbot bot token — used for in-app feedback; falls back to FORGEJO_API_TOKEN
|
|
FORGEJO_REPO=pyr0ball/peregrine
|
|
FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
|
# GITHUB_TOKEN= # future — enable when public mirror is active
|
|
# GITHUB_REPO= # future
|
|
|
|
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
|
|
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
|
|
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
|
|
CF_LICENSE_KEY=
|
|
GPU_SERVER_URL=https://orch.circuitforge.tech
|
|
# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL
|
|
|
|
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
|
|
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
|
|
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
|
|
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
|
|
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
|
|
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
|
|
# Defaults to 127.0.0.1 (same-host coordinator).
|
|
# Set to your host LAN IP for a remote coordinator.
|
|
CF_ORCH_COORDINATOR_URL=http://localhost:7700
|
|
CF_ORCH_NODE_ID=peregrine
|
|
CF_ORCH_AGENT_PORT=7701
|
|
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
|
|
|
|
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
|
CLOUD_MODE=false
|
|
CLOUD_DATA_ROOT=/devl/menagerie-data
|
|
SYNC_DB_PATH= # optional; defaults to CLOUD_DATA_ROOT/sync.db
|
|
SYNC_DB_KEY= # optional; SQLCipher key for at-rest encryption
|
|
DIRECTUS_JWT_SECRET= # must match website/.env DIRECTUS_SECRET value
|
|
CF_SERVER_SECRET= # random 64-char hex — generate: openssl rand -hex 32
|
|
PLATFORM_DB_URL=postgresql://cf_platform:<password>@host.docker.internal:5433/circuitforge_platform
|
|
HEIMDALL_URL=http://cf-license:8000 # internal Docker URL; override for external access
|
|
HEIMDALL_ADMIN_TOKEN= # must match ADMIN_TOKEN in circuitforge-license .env
|
|
|
|
# ── Memory (mnemo sidecar) — opt-in, requires --profile memory ───────────────
|
|
# Launch with: docker compose --profile memory --profile <gpu-profile> up -d
|
|
# Mnemo builds a persistent knowledge graph from conversations and injects
|
|
# relevant context back into LLM prompts. Uses the ollama service as its LLM.
|
|
MNEMO_HOST=mnemo # internal service name; change for external sidecar
|
|
MNEMO_PORT=8080
|
|
MNEMO_LLM_PROVIDER=ollama # ollama | openai | anthropic | custom
|
|
MNEMO_LLM_BASE_URL=http://ollama:11434/v1 # override for external LLM
|
|
MNEMO_LLM_API_KEY=ollama # "ollama" is a dummy value for local Ollama
|
|
MNEMO_LLM_MODEL=llama3.2:3b # must be pulled in the ollama container
|
|
|
|
# ── Rate limiting (LLM generation endpoints) ─────────────────────────────────
|
|
LLM_RATE_COVER_LETTER=20/hour
|
|
LLM_RATE_RESEARCH=10/hour
|
|
LLM_RATE_QA_SUGGEST=60/hour
|
|
LLM_RATE_SURVEY=30/hour
|
|
LLM_RATE_WIZARD=60/hour
|