peregrine/.env.example

# .env.example — copy to .env
# Auto-generated by the setup wizard, or fill in manually.
# NEVER commit .env to git.

STREAMLIT_PORT=8502
OLLAMA_PORT=11434
VLLM_PORT=8000
CF_TEXT_PORT=8006
SEARXNG_PORT=8888
VISION_PORT=8002
VISION_MODEL=vikhyatk/moondream2
VISION_REVISION=2025-01-09

DOCS_DIR=~/Documents/JobSearch
OLLAMA_MODELS_DIR=~/models/ollama
VLLM_MODELS_DIR=~/models/vllm        # override with full path to your model dir
VLLM_MODEL=Ouro-1.4B                # cover letters — fast 1.4B model
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking  # research — reasoning 2.6B model; restart vllm to switch
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf  # cf-text GGUF model; set to "mock" to disable
VLLM_MAX_MODEL_LEN=4096             # increase to 8192 for Thinking models with long CoT
VLLM_GPU_MEM_UTIL=0.75              # lower to 0.6 if sharing GPU with other services
OLLAMA_DEFAULT_MODEL=llama3.2:3b

# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
# Set any of these to configure LLM backends without needing a config/llm.yaml.
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
OLLAMA_HOST=http://localhost:11434   # Ollama host; override if on a different machine
OLLAMA_MODEL=llama3.2:3b            # model to request from Ollama
OPENAI_MODEL=gpt-4o-mini            # model override for OpenAI-compat backend
ANTHROPIC_MODEL=claude-haiku-4-5-20251001  # model override for Anthropic backend

# API keys (required for remote profile)
ANTHROPIC_API_KEY=
OPENAI_COMPAT_URL=
OPENAI_COMPAT_KEY=

# Feedback button — Forgejo issue filing
FORGEJO_API_TOKEN=             # dev/admin token (your personal account)
FORGEJO_BOT_TOKEN=             # cf-bugbot bot token — used for in-app feedback; falls back to FORGEJO_API_TOKEN
FORGEJO_REPO=pyr0ball/peregrine
FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
# GITHUB_TOKEN=          # future — enable when public mirror is active
# GITHUB_REPO=           # future

# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
CF_LICENSE_KEY=
GPU_SERVER_URL=https://orch.circuitforge.tech
# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL

# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
# CF_ORCH_NODE_ID:         name shown on the dashboard (default: peregrine)
# CF_ORCH_AGENT_PORT:      host port for the agent HTTP server (default: 7701)
# CF_ORCH_ADVERTISE_HOST:  IP the coordinator uses to reach back to this agent.
#                          Defaults to 127.0.0.1 (same-host coordinator).
#                          Set to your host LAN IP for a remote coordinator.
CF_ORCH_COORDINATOR_URL=http://localhost:7700
CF_ORCH_NODE_ID=peregrine
CF_ORCH_AGENT_PORT=7701
#CF_ORCH_ADVERTISE_HOST=10.1.10.71

# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
CLOUD_MODE=false
CLOUD_DATA_ROOT=/devl/menagerie-data
SYNC_DB_PATH=                  # optional; defaults to CLOUD_DATA_ROOT/sync.db
SYNC_DB_KEY=                   # optional; SQLCipher key for at-rest encryption
DIRECTUS_JWT_SECRET=           # must match website/.env DIRECTUS_SECRET value
CF_SERVER_SECRET=              # random 64-char hex — generate: openssl rand -hex 32
PLATFORM_DB_URL=postgresql://cf_platform:<password>@host.docker.internal:5433/circuitforge_platform
HEIMDALL_URL=http://cf-license:8000   # internal Docker URL; override for external access
HEIMDALL_ADMIN_TOKEN=                 # must match ADMIN_TOKEN in circuitforge-license .env

# ── Memory (mnemo sidecar) — opt-in, requires --profile memory ───────────────
# Launch with: docker compose --profile memory --profile <gpu-profile> up -d
# Mnemo builds a persistent knowledge graph from conversations and injects
# relevant context back into LLM prompts. Uses the ollama service as its LLM.
MNEMO_HOST=mnemo                         # internal service name; change for external sidecar
MNEMO_PORT=8080
MNEMO_LLM_PROVIDER=ollama               # ollama | openai | anthropic | custom
MNEMO_LLM_BASE_URL=http://ollama:11434/v1  # override for external LLM
MNEMO_LLM_API_KEY=ollama                # "ollama" is a dummy value for local Ollama
MNEMO_LLM_MODEL=llama3.2:3b            # must be pulled in the ollama container

# ── Rate limiting (LLM generation endpoints) ─────────────────────────────────
LLM_RATE_COVER_LETTER=20/hour
LLM_RATE_RESEARCH=10/hour
LLM_RATE_QA_SUGGEST=60/hour
LLM_RATE_SURVEY=30/hour
LLM_RATE_WIZARD=60/hour