Compare commits

...

No commits in common. "main" and "feature/vue-spa" have entirely different histories.

246 changed files with 1963 additions and 27500 deletions

View file

@ -1,44 +0,0 @@
# git-cliff changelog configuration for Peregrine
# See: https://git-cliff.org/docs/configuration
[changelog]
header = """
# Changelog\n
"""
body = """
{% if version %}\
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else %}\
## [Unreleased]
{% endif %}\
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | upper_first }}
{% for commit in commits %}
- {% if commit.scope %}**{{ commit.scope }}:** {% endif %}{{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
trim = true
[git]
conventional_commits = true
filter_unconventional = true
split_commits = false
commit_preprocessors = []
commit_parsers = [
{ message = "^feat", group = "Features" },
{ message = "^fix", group = "Bug Fixes" },
{ message = "^perf", group = "Performance" },
{ message = "^refactor", group = "Refactoring" },
{ message = "^docs", group = "Documentation" },
{ message = "^test", group = "Testing" },
{ message = "^chore", group = "Chores" },
{ message = "^ci", group = "CI/CD" },
{ message = "^revert", group = "Reverts" },
]
filter_commits = false
tag_pattern = "v[0-9].*"
skip_tags = ""
ignore_tags = ""
topo_order = false
sort_commits = "oldest"

View file

@ -2,10 +2,9 @@
# Auto-generated by the setup wizard, or fill in manually.
# NEVER commit .env to git.
STREAMLIT_PORT=8502
STREAMLIT_PORT=8501
OLLAMA_PORT=11434
VLLM_PORT=8000
CF_TEXT_PORT=8006
SEARXNG_PORT=8888
VISION_PORT=8002
VISION_MODEL=vikhyatk/moondream2
@ -13,22 +12,10 @@ VISION_REVISION=2025-01-09
DOCS_DIR=~/Documents/JobSearch
OLLAMA_MODELS_DIR=~/models/ollama
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
VLLM_MODELS_DIR=~/models/vllm
VLLM_MODEL=Ouro-1.4B
OLLAMA_DEFAULT_MODEL=llama3.2:3b
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
# Set any of these to configure LLM backends without needing a config/llm.yaml.
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
# API keys (required for remote profile)
ANTHROPIC_API_KEY=
OPENAI_COMPAT_URL=
@ -41,26 +28,6 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
# GITHUB_TOKEN= # future — enable when public mirror is active
# GITHUB_REPO= # future
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
CF_LICENSE_KEY=
GPU_SERVER_URL=https://orch.circuitforge.tech
# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
# Defaults to 127.0.0.1 (same-host coordinator).
# Set to your host LAN IP for a remote coordinator.
CF_ORCH_COORDINATOR_URL=http://localhost:7700
CF_ORCH_NODE_ID=peregrine
CF_ORCH_AGENT_PORT=7701
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
CLOUD_MODE=false
CLOUD_DATA_ROOT=/devl/menagerie-data

View file

@ -1,63 +0,0 @@
# Peregrine CI — lint, type-check, test on PR/push
# Full-stack: FastAPI (Python) + Vue 3 SPA (Node)
# Adapted from Circuit-Forge/cf-agents workflows/ci.yml (cf-agents#4 tracks the
# upstream ci-fullstack.yml variant; update this file when that lands).
name: CI
on:
push:
branches: [main, 'feature/**', 'fix/**', 'freeze/**']
pull_request:
branches: [main]
jobs:
backend:
name: Backend (Python)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: pip
- name: Install system dependencies
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
- name: Install dependencies
run: pip install -r requirements.txt
- name: Install lint tools
run: pip install ruff
- name: Lint
run: ruff check .
- name: Test
run: pytest tests/ -v --tb=short
frontend:
name: Frontend (Vue)
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: npm
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Type check
run: npx vue-tsc --noEmit
- name: Test
run: npm run test

View file

@ -1,35 +0,0 @@
# Mirror push to GitHub and Codeberg on every push to main or tag.
# Copied from Circuit-Forge/cf-agents workflows/mirror.yml
# Required secrets: GH_MIRROR_TOKEN, CODEBERG_MIRROR_TOKEN
# Note: Forgejo reserves the GITHUB_* prefix for secret names — use GH_* instead.
name: Mirror
on:
push:
branches: [main]
tags: ['v*']
jobs:
mirror:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Mirror to GitHub
env:
GH_MIRROR_PAT: ${{ secrets.GH_MIRROR_TOKEN }}
REPO: ${{ github.event.repository.name }}
run: |
git remote add github "https://x-access-token:${GH_MIRROR_PAT}@github.com/CircuitForgeLLC/${REPO}.git"
git push github --mirror
- name: Mirror to Codeberg
env:
CODEBERG_TOKEN: ${{ secrets.CODEBERG_MIRROR_TOKEN }}
REPO: ${{ github.event.repository.name }}
run: |
git remote add codeberg "https://CircuitForge:${CODEBERG_TOKEN}@codeberg.org/CircuitForge/${REPO}.git"
git push codeberg --mirror

View file

@ -1,71 +0,0 @@
# Tag-triggered release workflow.
# Generates changelog and creates Forgejo release on v* tags.
# Copied from Circuit-Forge/cf-agents workflows/release.yml
#
# Docker push is intentionally disabled — BSL 1.1 registry policy not yet resolved.
# Tracked in Circuit-Forge/cf-agents#3. Re-enable the Docker steps when that lands.
#
# Required secrets: FORGEJO_RELEASE_TOKEN
# (GHCR_TOKEN not needed until Docker push is enabled)
name: Release
on:
push:
tags: ['v*']
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# ── Changelog ────────────────────────────────────────────────────────────
- name: Generate changelog
uses: orhun/git-cliff-action@v3
id: cliff
with:
config: .cliff.toml
args: --latest --strip header
env:
OUTPUT: CHANGES.md
# ── Docker (disabled — BSL registry policy pending cf-agents#3) ──────────
# - name: Set up QEMU
# uses: docker/setup-qemu-action@v3
# - name: Set up Buildx
# uses: docker/setup-buildx-action@v3
# - name: Log in to GHCR
# uses: docker/login-action@v3
# with:
# registry: ghcr.io
# username: ${{ github.actor }}
# password: ${{ secrets.GHCR_TOKEN }}
# - name: Build and push Docker image
# uses: docker/build-push-action@v6
# with:
# context: .
# push: true
# platforms: linux/amd64,linux/arm64
# tags: |
# ghcr.io/circuitforgellc/peregrine:${{ github.ref_name }}
# ghcr.io/circuitforgellc/peregrine:latest
# cache-from: type=gha
# cache-to: type=gha,mode=max
# ── Forgejo Release ───────────────────────────────────────────────────────
- name: Create Forgejo release
env:
FORGEJO_TOKEN: ${{ secrets.FORGEJO_RELEASE_TOKEN }}
REPO: ${{ github.event.repository.name }}
TAG: ${{ github.ref_name }}
NOTES: ${{ steps.cliff.outputs.content }}
run: |
curl -sS -X POST \
"https://git.opensourcesolarpunk.com/api/v1/repos/Circuit-Forge/${REPO}/releases" \
-H "Authorization: token ${FORGEJO_TOKEN}" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg tag "$TAG" --arg body "$NOTES" \
'{tag_name: $tag, name: $tag, body: $body}')"

View file

@ -1,7 +1,3 @@
# Peregrine CI — runs on GitHub mirror for public credibility badge.
# Forgejo (.forgejo/workflows/ci.yml) is the canonical CI — keep these in sync.
# No Forgejo-specific secrets used here; circuitforge-core is public on Forgejo.
name: CI
on:
@ -11,46 +7,23 @@ on:
branches: [main]
jobs:
backend:
name: Backend (Python)
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- name: Install system dependencies
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
python-version: "3.11"
cache: pip
- name: Install dependencies
run: pip install -r requirements.txt
- name: Lint
run: ruff check .
- name: Test
- name: Run tests
run: pytest tests/ -v --tb=short
frontend:
name: Frontend (Vue)
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: npm
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Type check
run: npx vue-tsc --noEmit
- name: Test
run: npm run test

3
.gitignore vendored
View file

@ -40,11 +40,8 @@ pytest-output.txt
docs/superpowers/
data/email_score.jsonl
data/email_score.jsonl.bad-labels
data/email_label_queue.jsonl
data/email_compare_sample.jsonl
data/.feedback_ratelimit.json
data/config/
config/label_tool.yaml
config/server.yaml

View file

@ -9,413 +9,6 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
---
## [0.9.5] — 2026-05-08
### Fixed
- **Theme: dark/explicit themes show correct page background**`index.html` inline style
set `html, body { background: #eaeff8 }` hardcoded. `body` paints on top of `html`, so
even when `html { background: var(--color-surface) }` correctly resolved to `#16202e` in
dark mode, the body's hardcoded light background covered it — producing dark cards on a
light page. Fixed by: (1) removing body background from the inline style; (2) adding a
tiny blocking `<script>` that reads `cf-theme` / `cf-hacker-mode` from localStorage and
sets `data-theme` on `<html>` before first paint; (3) adding
`html[data-theme="dark"|"solarized-dark"|"hacker"]` rules so FOUT prevention fires the
right background immediately on load.
---
## [0.9.4] — 2026-05-08
### Added
- **Messages view — expandable email timeline** — click any email item to lazy-load
and read the full body inline (HTML stripped to plain text via `DOMParser`).
Bodies are fetched on-demand via the new `GET /api/contacts/{id}` endpoint to avoid
loading 50KB+ email bodies on every page view.
- **Messages view — compose bar** — action buttons (Log call, Log note, Use template,
Draft reply with LLM, Call via Osprey) moved from the always-visible header into a
sticky bottom compose bar triggered by a New toggle. Reduces visual clutter when
just reading the thread.
- **Home view — "Skip review" checkbox** — when adding jobs by URL, a checkbox (default
on) sends them directly to the Apply queue, bypassing Job Review.
- **ContactsView — sync status** — shows last completed sync time and a spinner when
an email sync is running.
- **imap_sync: Indeed alert parser**`parse_indeed_alert()` extracts job title,
company, location, salary, and canonical URL from Indeed Job Alert digest emails.
- **scrape_url: Oracle HCM support** — Playwright-based scraper for Oracle HCM
CandidateExperience portals (React SPAs requiring JS execution).
- **manage.sh** — compose engine auto-detection (docker compose / podman compose /
podman-compose), `build` command, and cloud/demo stack shortcuts.
- **theme.css**`--color-overlay` token for modal/dialog backdrops.
### Fixed
- **Messages view layout** — changed `height: 100%` to `height: 100dvh` with a mobile
override for the 56px tab bar. `height: 100%` was resolving to "shrink-wrap" because
`.app-main` has no explicit height; compose bar is now correctly pinned to the bottom.
- **Accessibility: danger button contrast**`btn--danger` used `color: white` on
`--app-accent` (Talon Orange), yielding 2.8:1 contrast (fails WCAG AA 4.5:1 for
normal text). Fixed to `color: var(--app-accent-text)` (dark navy, 5.5:1).
- **Accessibility: warning badge contrast**`tab-badge` in Job Review used `color: white`
on `--color-warning` (amber). Same fix applied.
- **Theme: Interviews signal banners** — hardcoded `rgba(245,158,11,…)` / `rgba(39,174,…)`
/ `rgba(192,57,…)` replaced with `color-mix()` against `--color-warning/success/error`.
- **Theme: Interviews signal count**`color: #e67e22` hardcode replaced with
`var(--app-accent)`.
- **Theme: References academic tag chip**`color: #7c3aed` hardcode replaced with
`var(--status-synced)`; background uses `color-mix()` with the same token.
- **Theme: Interviews signal-move button**`color: #fff` on `--color-primary` fails
in dark mode (light green bg); fixed to `var(--color-text-inverse)`.
- **Modal backdrops**`rgba(0,0,0,0.5)` replaced with `var(--color-overlay)` for
theme consistency.
---
## [0.9.3] — 2026-05-05
### Added
- **Editable resume review** — proposed summary and experience bullets in the review modal
are now editable text areas. Edits flow through `apply_review_decisions()` and override
the LLM output in the final resume struct. Preview textarea in Apply Workspace is also
editable, with manual changes preserved through the approve step via `preview_text_override`.
### Fixed
- **Double bullets in resume optimizer**`_section_text_for_prompt` now strips existing
bullet characters before prefixing with `•`, and `_reparse_experience_bullets` uses a
greedy strip regex so `• •` patterns can no longer survive parsing.
- **Asterisk markup in summary** — added `_clean_summary_markup()` to strip LLM-generated
markdown bullet chars (`*`, `-`, etc.) from career summary output; injected no-markdown
rule into the LLM prompt's CRITICAL RULES list.
- **Light theme dark CSS bleed**`peregrine.css` media dark override now scoped to
`:root:not([data-theme])` (auto mode only) instead of `:root:not([data-theme="hacker"])`.
Fixes dark navy `--app-primary-light`/`--app-accent-light` bleeding into light themes
(light, solarized-light, colorblind) on dark-OS machines.
---
## [0.9.2] — 2026-05-02
### Added
- **Cover letter training export** (#111) — opt-in consent gate (`training_export_opt_in`
in `user.yaml`, default off) lets users export applied-job cover letters as Alpaca-format
JSONL for local fine-tuning. Per-job exclude/restore curation in Settings → Fine-Tune.
Streaming JSONL download merges DB pairs with any previously uploaded file pairs.
Cloud fine-tune Phase 2 stub (501) reserved for cf-orch integration.
- **WizardTrainingStep** — new onboarding consent step inserted between Resume and Identity;
skippable, opt-in default off, cloud-aware privacy copy.
- **a11y:** confirmed-state toggle (no optimistic DOM divergence), visible Premium tier gate
with upgrade link, `aria-live` region on pairs list, cloud-aware consent copy.
---
## [0.9.0] — 2026-04-20
### Added
- **Messaging tab** (#74) — per-job communication timeline replacing `/contacts`.
Unified view of IMAP emails (`job_contacts`) and manually logged entries (`messages`).
Log calls and in-person notes with timestamp. Message template library with 4 built-in
templates (follow-up, thank-you, accommodation request, withdrawal) and user-created
templates with `{{token}}` substitution. LLM draft reply for inbound emails (BYOK-unlockable,
BSL 1.1). Draft approval flow with inline editing and one-click clipboard copy. Osprey
IVR stub button (Phase 2 placeholder with easter egg). `migrations/008_messaging.sql`.
- **Public demo experience** (#103) — full read-only demo mode at `demo.circuitforge.tech/peregrine`.
`IS_DEMO=true` write-blocks all mutating API endpoints with a toast notification.
Ephemeral seed data via tmpfs + `demo/seed.sql` (resets on container start). WelcomeModal
on first visit (localStorage-gated). Per-view HintChips guiding new users through the
job search flow (localStorage-dismissed). DemoBanner with accessible CTA buttons
(WCAG-compliant contrast in light and dark themes). `migrations/006_missing_columns.sql`.
- **References tracker and recommendation letter system** (#96) — track professional
references and generate LLM-drafted recommendation request letters.
- **Shadow listing detector** — flags duplicate or aggregator-reposted job listings.
- **Hired feedback widget** — capture post-hire notes and retrospective feedback on jobs.
- **Interview prep Q&A** — LLM-generated practice questions for the selected job.
- **Resume library ↔ profile sync**`POST /api/resumes/{id}/apply-to-profile` pushes
a library resume into the active profile; `PUT /api/settings/resume` syncs edits back
to the default library entry. `ResumeSyncConfirmModal` shows a before/after diff.
`ResumeProfileView` extended with career summary, education, and achievements sections.
`migrations/007_resume_sync.sql` adds `synced_at` to `resumes`.
- **Plausible analytics** — lightweight privacy-preserving analytics in Vue SPA and docs.
- **cf_text / cf_voice LLM backends** — wire trunk service backends in `llm.yaml`.
- **Mission alignment domains** — load preferred company domains from
`config/mission_domains.yaml` rather than hardcoded values.
- **GitHub Actions CI** — workflow for public credibility badge (`ci.yml`).
- **`CF_APP_NAME` cloud annotation** — coordinator pipeline attribution for multi-product
cloud deployments.
### Changed
- `/contacts` route now redirects to `/messages`; nav item renamed "Messages" → "Contacts"
label removed. `ContactsView.vue` preserved for reference, router points to `MessagingView`.
- Survey `/analyze` endpoint is now fully async via the task queue (no blocking LLM call
on the request thread).
- nginx config adds `/peregrine/` base-path routing for subdirectory deployments.
- `compose.demo.yml` updated for Vue/FastAPI architecture with tmpfs demo volume.
### Fixed
- Tier bypass and draft body persistence after page navigation.
- `canDraftLlm` cleanup and message list `limit` cap.
- DemoBanner button contrast — semantic surface token instead of hardcoded white.
- Period split in `profile_to_library` now handles ISO date strings containing hyphens.
- Cloud startup sweeps all user DBs for pending migrations on deploy.
- Resume import strips CID glyph references via `resume_parser` extractors.
- Survey and interview tests updated for `hired_feedback` column and async analyze flow.
---
## [0.8.6] — 2026-04-12
### Added
- **Resume Review Modal** — paged tabbed dialog replaces the inline resume review
section in the Apply workspace. Pages through Skills diff, Summary diff, one page
per experience entry, and a Confirm summary. Color-coded tab status: unvisited
(gray), in-progress (indigo), accepted (green), partial (amber), skipped (slate).
Full ARIA tabs pattern with focus trap and `Teleport to body`.
- **Resume Library** — new `/resumes` page for managing saved resumes. Two-column
layout: list sidebar + full-text preview pane. Supports import (.txt, .pdf, .docx,
.odt, .yaml), rename (Edit), set as default, download (txt/pdf/yaml), and delete
(guarded: disabled when only resume or is default). 5 MB upload limit.
- **ResumeLibraryCard** — compact widget shown above the ATS Resume Optimizer in the
Apply workspace. Displays the currently active resume for the job (job-specific or
global default), with Switch and Manage deep links.
- **Resume library API**`GET/POST /api/resumes`, `GET/PATCH/DELETE /api/resumes/{id}`,
`POST /api/resumes/{id}/set-default`, `POST /api/resumes/import`,
`GET/PATCH /api/jobs/{job_id}/resume`. `approve_resume` extended with
`save_to_library` + `resume_name` params to save optimized resumes directly.
- **`resumes` DB migration** — `migrations/005_resumes_table.sql` adds `resumes` table
(10 columns) and `resume_id` FK on `jobs`.
- **Resumes nav link** — Document icon entry added after Apply in the main nav.
### Changed
- Resume optimizer "Awaiting review" state now triggers the Review Modal instead of
rendering an inline diff; save-to-library checkbox and name input surfaced on the
preview confirmation step.
---
## [0.8.5] — 2026-04-02
### Added
- **Vue onboarding wizard** — 7-step first-run setup replaces the Streamlit wizard
in the Vue SPA: Hardware detection → Tier → Resume upload/build → Identity →
Inference & API keys → Search preferences → Integrations. Progress saves to
`user.yaml` on every step; crash-recovery resumes from the last completed step.
- **Wizard API endpoints**`GET /api/wizard/status`, `POST /api/wizard/step`,
`GET /api/wizard/hardware`, `POST /api/wizard/inference/test`,
`POST /api/wizard/complete`. Inference test always soft-fails so Ollama being
unreachable never blocks setup completion.
- **Cloud auto-skip** — cloud instances automatically complete steps 1 (hardware),
2 (tier), and 5 (inference) and drop the user directly on the Resume step.
- **`wizardGuard` router gate** — all Vue routes require wizard completion; completed
users are bounced away from `/setup` to `/`.
- **Chip-input search step** — job titles and locations entered as press-Enter/comma
chips; validates at least one title before advancing.
- **Integrations tile grid** — optional step 7 shows Notion, Calendar, Slack, Discord,
Drive with paid-tier badges; skippable on Finish.
### Fixed
- **User config isolation: dangerous fallback removed**`_user_yaml_path()` fell
back to `/devl/job-seeker/config/user.yaml` (legacy profile) when `user.yaml`
didn't exist at the expected path; new users now get an empty dict instead of
another user's data. Affects profile, resume, search, and all wizard endpoints.
- **Resume path not user-isolated**`RESUME_PATH = Path("config/plain_text_resume.yaml")`
was a relative CWD path shared across all users. Replaced with `_resume_path()`
derived from `_user_yaml_path()` / `STAGING_DB`.
- **Resume upload silently returned empty data**`upload_resume` was passing a
file path string to `structure_resume()` which expects raw text; now reads bytes
and dispatches to the correct extractor (`extract_text_from_pdf` / `_docx` / `_odt`).
- **Wizard resume step read wrong envelope field**`WizardResumeStep.vue` read
`data.experience` but the upload response wraps parsed data under `data.data`.
---
## [0.8.4] — 2026-04-02
### Fixed
- **Cloud: cover letter used wrong user's profile**`generate_cover_letter.generate()`
loaded `_profile` from the global `config/user.yaml` at module import time, so all
cloud users got the default user's name, voice, and mission preferences in their
generated letters. `generate()` now accepts a `user_yaml_path` parameter; `task_runner`
derives it from the per-user config directory (`db_path/../config/user.yaml`) and
passes it through. `_build_system_context`, `_build_mission_notes`, `detect_mission_alignment`,
`build_prompt`, and `_trim_to_letter_end` all accept a `profile` override so the
per-call profile is used end-to-end without breaking CLI mode.
- **Apply Workspace: hardcoded config paths in cloud mode**`4_Apply.py` was loading
`_USER_YAML` and `RESUME_YAML` from the repo-root `config/` before `resolve_session()`
ran, so cloud users saw the global (Meg's) resume in the Apply tab. Both paths now
derive from `get_config_dir()` after session resolution.
### Changed
- **Vue SPA open to all tiers** — Vue 3 frontend is no longer gated behind the beta
flag; all tier users can switch to the Vue UI from Settings.
- **LLM model candidates** — vllm backend now tries Qwen2.5-3B first, Phi-4-mini
as fallback (was reversed). cf_orch allocation block added to vllm config.
- **Preflight** — removed `vllm` from Docker adoption list; vllm is now managed
entirely by cf-orch and should not be stubbed by preflight.
---
## [0.8.3] — 2026-04-01
### Fixed
- **CI: Forgejo auth** — GitHub Actions `pip install` was failing to fetch
`circuitforge-core` from the private Forgejo VCS URL. Added `FORGEJO_TOKEN`
repository secret and a `git config insteadOf` step to inject credentials
before `pip install`.
- **CI: settings API tests** — 6 `test_dev_api_settings` PUT/POST tests were
returning HTTP 500 in CI because `_user_yaml_path()` read the module-level
`DB_PATH` constant (frozen at import time), so `monkeypatch.setenv("STAGING_DB")`
had no effect. Fixed by reading `os.environ` at call time.
---
## [0.8.2] — 2026-04-01
### Fixed
- **CI pipeline**`pip install -r requirements.txt` was failing in GitHub Actions
because `-e ../circuitforge-core` requires a sibling directory that doesn't exist
in a single-repo checkout. Replaced with a `git+https://` VCS URL fallback;
`Dockerfile.cfcore` still installs from the local `COPY` to avoid redundant
network fetches during Docker builds.
- **Vue-nav reload loop**`sync_ui_cookie()` was calling
`window.parent.location.reload()` on every render when `user.yaml` has
`ui_preference: vue` but no Caddy proxy is in the traffic path (test instances,
bare Docker). Gated the reload on `PEREGRINE_CADDY_PROXY=1`; instances without
the env var set the cookie silently and skip the reload.
### Changed
- **cfcore VRAM lease integration** — the task scheduler now acquires a VRAM lease
from the cf-orch coordinator before running a batch of LLM tasks and releases it
when the batch completes. Visible in the coordinator dashboard at `:7700`.
- **`CF_ORCH_URL` env var** — scheduler reads coordinator address from
`CF_ORCH_URL` (default `http://localhost:7700`); set to
`http://host.docker.internal:7700` in Docker compose files so containers can
reach the host coordinator.
- **All compose files on `Dockerfile.cfcore`**`compose.yml`, `compose.cloud.yml`,
and `compose.test-cfcore.yml` all use the parent-context build. `build: .` is
removed from `compose.yml`.
---
## [0.8.1] — 2026-04-01
### Fixed
- **Job title suggester silent failure** — when the LLM returned empty arrays or
non-JSON text, the spinner would complete with zero UI feedback. Now shows an
explicit "No new suggestions found" info message with a resume-upload hint for
new users who haven't uploaded a resume yet.
- **Suggester exception handling** — catch `Exception` instead of only
`RuntimeError` so connection errors and `FileNotFoundError` (missing llm.yaml)
surface as error messages rather than crashing the page silently.
### Added
- **`Dockerfile.cfcore`** — parent-context Dockerfile that copies
`circuitforge-core/` alongside `peregrine/` before `pip install`, resolving
the `-e ../circuitforge-core` editable requirement inside Docker.
- **`compose.test-cfcore.yml`** — single-user test instance on port 8516 for
smoke-testing cfcore shim integration before promoting to the cloud instance.
---
## [0.8.0] — 2026-04-01
### Added
- **ATS Resume Optimizer** (gap report free; LLM rewrite paid+)
- `scripts/resume_optimizer.py` — full pipeline: TF-IDF gap extraction →
`prioritize_gaps``rewrite_for_ats` → hallucination guard (anchor-set
diffing on employers, institutions, and dates)
- `scripts/db.py``optimized_resume` + `ats_gap_report` columns;
`save_optimized_resume` / `get_optimized_resume` helpers
- `GET /api/jobs/{id}/resume_optimizer` — fetch gap report + rewrite
- `POST /api/jobs/{id}/resume_optimizer/generate` — queue rewrite task
- `GET /api/jobs/{id}/resume_optimizer/task` — poll task status
- `web/src/components/ResumeOptimizerPanel.vue` — gap report (all tiers),
LLM rewrite section (paid+), hallucination warning badge, `.txt` download
- `ResumeOptimizerPanel` integrated into `ApplyWorkspace`
- **Vue SPA full merge** (closes #8) — `feature/vue-spa` merged to `main`
- `dev-api.py` — full FastAPI backend (settings, jobs, interviews, prep,
survey, digest, resume optimizer); cloud session middleware (JWT → per-user
SQLite); BYOK credential store
- `dev_api.py` — symlink → `dev-api.py` for importable module alias
- `scripts/job_ranker.py` — two-stage ranking for `/api/jobs/stack`
- `scripts/credential_store.py` — per-user BYOK API key management
- `scripts/user_profile.py``load_user_profile` / `save_user_profile`
- `web/src/components/TaskIndicator.vue` + `web/src/stores/tasks.ts`
live background task queue display
- `web/public/` — peregrine logo assets (SVG + PNG)
- **API test suite** — 5 new test modules (622 tests total)
- `tests/test_dev_api_settings.py` (38 tests)
- `tests/test_dev_api_interviews.py`, `test_dev_api_prep.py`,
`test_dev_api_survey.py`, `test_dev_api_digest.py`
### Fixed
- **Cloud DB routing**`app/pages/1_Job_Review.py`, `5_Interviews.py`,
`6_Interview_Prep.py`, `7_Survey.py` were hardcoding `DEFAULT_DB`; now
use `get_db_path()` for correct per-user routing in cloud mode (#24)
- **Test isolation**`importlib.reload(dev_api)` in digest/interviews
fixtures reset all module globals, silently breaking `monkeypatch.setattr`
in subsequent test files; replaced with targeted `monkeypatch.setattr(dev_api,
"DB_PATH", tmp_db)` (#26)
---
## [0.7.0] — 2026-03-22
### Added
- **Vue 3 SPA — beta access for paid tier** — The new Vue 3 frontend (built with
Vite + UnoCSS) is now merged into `main` and available to paid-tier subscribers
as an opt-in beta. The Streamlit UI remains the default and will continue to
receive full support.
- `web/` — full Vue 3 SPA source (components, stores, router, composables,
views) from `feature/vue-spa`
- `web/src/components/ClassicUIButton.vue` — one-click switch back to the
Classic (Streamlit) UI; sets `prgn_ui=streamlit` cookie and appends
`?prgn_switch=streamlit` so `user.yaml` stays in sync
- `web/src/composables/useFeatureFlag.ts` — reads `prgn_demo_tier` cookie for
demo toolbar visual consistency (display-only, not an authoritative gate)
- **UI switcher** — Reddit-style opt-in to the Vue SPA with durable preference
persistence and graceful fallback.
- `app/components/ui_switcher.py``sync_ui_cookie()`, `switch_ui()`,
`render_banner()`, `render_settings_toggle()`
- `scripts/user_profile.py``ui_preference` field (`streamlit` | `vue`,
default: `streamlit`) with round-trip `save()`
- `app/wizard/tiers.py``vue_ui_beta: "paid"` feature key; `demo_tier`
keyword arg on `can_use()` for thread-safe demo mode simulation
- Banner (dismissible, paid tier only) + Settings → System → Deployment toggle
- Caddy cookie routing: `prgn_ui=vue` → nginx Vue SPA; absent/`streamlit` →
Streamlit. 502 fallback clears cookie and redirects with `?ui_fallback=1`
- **Demo toolbar** — slim full-width tier-simulation bar for `DEMO_MODE`
instances. Free / Paid / Premium pills let demo visitors explore all feature
tiers without an account. Persists via `prgn_demo_tier` cookie. Default: Paid
(most compelling first impression). `app/components/demo_toolbar.py`
- **Docker `web` service** — multi-stage nginx container serving the Vue SPA
`dist/` build. Added to `compose.yml` (port 8506), `compose.demo.yml`
(port 8507), `compose.cloud.yml` (port 8508). `manage.sh build` now includes
the `web` service alongside `app`.
### Changed
- **Caddy routing**`menagerie.circuitforge.tech` and
`demo.circuitforge.tech` peregrine blocks now inspect the `prgn_ui` cookie
and fan-out to the Vue SPA service or Streamlit accordingly.
---
## [0.6.2] — 2026-03-18
### Added

View file

@ -34,7 +34,7 @@ full instructions.
```bash
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine.git
cd peregrine
./install.sh # installs deps, activates git hooks
./setup.sh # installs deps, activates git hooks
./manage.sh start
```

View file

@ -6,7 +6,7 @@ WORKDIR /app
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libffi-dev curl libsqlcipher-dev git \
gcc libffi-dev curl libsqlcipher-dev \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .

View file

@ -1,60 +0,0 @@
# Dockerfile.cfcore — build context must be the PARENT directory of peregrine/
#
# Used when circuitforge-core is installed from source (not PyPI).
# Both repos must be siblings on the build host:
# /devl/peregrine/ → WORKDIR /app
# /devl/circuitforge-core/ → installed to /circuitforge-core
#
# Build manually:
# docker build -f peregrine/Dockerfile.cfcore -t peregrine-cfcore ..
#
# Via compose (compose.test-cfcore.yml sets context: ..):
# docker compose -f compose.test-cfcore.yml build
FROM python:3.11-slim
WORKDIR /app
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libffi-dev curl libsqlcipher-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy circuitforge-core and install it from the local path before requirements.txt.
# requirements.txt has a git+https:// fallback URL for CI (where circuitforge-core
# is not a sibling directory), but Docker always has the local copy available here.
COPY circuitforge-core/ /circuitforge-core/
RUN pip install --no-cache-dir /circuitforge-core
# circuitforge-orch client — needed for LLMRouter cf_orch allocation.
# Optional: if the directory doesn't exist the COPY will fail at build time; keep
# cf-orch as a sibling of peregrine in the build context.
COPY circuitforge-orch/ /circuitforge-orch/
RUN pip install --no-cache-dir /circuitforge-orch
COPY peregrine/requirements.txt .
# Skip the cfcore line — already installed above from the local copy
RUN grep -v 'circuitforge-core' requirements.txt | pip install --no-cache-dir -r /dev/stdin
# Install Playwright browser (cached separately from Python deps so requirements
# changes don't bust the ~600900 MB Chromium layer and vice versa)
RUN playwright install chromium && playwright install-deps chromium
# Bundle companyScraper (company research web scraper)
COPY peregrine/scrapers/ /app/scrapers/
COPY peregrine/ .
# Remove per-user config files that are gitignored but may exist locally.
# Defense-in-depth: the parent .dockerignore should already exclude these,
# but an explicit rm guarantees they never end up in the cloud image.
RUN rm -f config/user.yaml config/plain_text_resume.yaml config/notion.yaml \
config/email.yaml config/tokens.yaml config/craigslist.yaml \
config/adzuna.yaml .env
EXPOSE 8501
CMD ["streamlit", "run", "app/app.py", \
"--server.port=8501", \
"--server.headless=true", \
"--server.fileWatcherType=none"]

View file

@ -1,153 +0,0 @@
# Peregrine → xanderland.tv Setup Handoff
**Written from:** dev machine (CircuitForge dev env)
**Target:** xanderland.tv (beta tester, rootful Podman + systemd)
**Date:** 2026-02-27
---
## What we're doing
Getting Peregrine running on the beta tester's server as a Podman container managed by systemd. He already runs SearXNG and other services in the same style — rootful Podman with `--net=host`, `--restart=unless-stopped`, registered as systemd units.
The script `podman-standalone.sh` in the repo root handles the container setup.
---
## Step 1 — Get the repo onto xanderland.tv
From navi (or directly if you have a route):
```bash
ssh xanderland.tv "sudo git clone <repo-url> /opt/peregrine"
```
Or if it's already there, just pull:
```bash
ssh xanderland.tv "cd /opt/peregrine && sudo git pull"
```
---
## Step 2 — Verify /opt/peregrine looks right
```bash
ssh xanderland.tv "ls /opt/peregrine"
```
Expect to see: `Dockerfile`, `compose.yml`, `manage.sh`, `podman-standalone.sh`, `config/`, `app/`, `scripts/`, etc.
---
## Step 3 — Config
```bash
ssh xanderland.tv
cd /opt/peregrine
sudo mkdir -p data
sudo cp config/llm.yaml.example config/llm.yaml
sudo cp config/notion.yaml.example config/notion.yaml # only if he wants Notion sync
```
Then edit `config/llm.yaml` and set `searxng_url` to his existing SearXNG instance
(default is `http://localhost:8888` — confirm his actual port).
He won't need Anthropic/OpenAI keys to start — the setup wizard lets him pick local Ollama
or whatever he has running.
---
## Step 4 — Fix DOCS_DIR in the script
The script defaults `DOCS_DIR=/Library/Documents/JobSearch` which is the original user's path.
Update it to wherever his job search documents actually live, or a placeholder empty dir:
```bash
sudo mkdir -p /opt/peregrine/docs # placeholder if he has no docs yet
```
Then edit the script:
```bash
sudo sed -i 's|DOCS_DIR=.*|DOCS_DIR=/opt/peregrine/docs|' /opt/peregrine/podman-standalone.sh
```
---
## Step 5 — Build the image
```bash
ssh xanderland.tv "cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest ."
```
Takes a few minutes on first run (downloads python:3.11-slim, installs deps).
---
## Step 6 — Run the script
```bash
ssh xanderland.tv "sudo bash /opt/peregrine/podman-standalone.sh"
```
This starts a single container (`peregrine`) with `--net=host` and `--restart=unless-stopped`.
SearXNG is NOT included — his existing instance is used.
Verify it came up:
```bash
ssh xanderland.tv "sudo podman ps | grep peregrine"
ssh xanderland.tv "sudo podman logs peregrine"
```
Health check endpoint: `http://xanderland.tv:8501/_stcore/health`
---
## Step 7 — Register as a systemd service
```bash
ssh xanderland.tv
sudo podman generate systemd --new --name peregrine \
| sudo tee /etc/systemd/system/peregrine.service
sudo systemctl daemon-reload
sudo systemctl enable --now peregrine
```
Confirm:
```bash
sudo systemctl status peregrine
```
---
## Step 8 — First-run wizard
Open `http://xanderland.tv:8501` in a browser.
The setup wizard (page 0) will gate the app until `config/user.yaml` is created.
He'll fill in his profile — name, resume, LLM backend preferences. This writes
`config/user.yaml` and unlocks the rest of the UI.
---
## Troubleshooting
| Symptom | Check |
|---------|-------|
| Container exits immediately | `sudo podman logs peregrine` — usually a missing config file |
| Port 8501 already in use | `sudo ss -tlnp \| grep 8501` — something else on that port |
| SearXNG not reachable | Confirm `searxng_url` in `config/llm.yaml` and that JSON format is enabled in SearXNG settings |
| Wizard loops / won't save | `config/` volume mount permissions — `sudo chown -R 1000:1000 /opt/peregrine/config` |
---
## To update Peregrine later
```bash
cd /opt/peregrine
sudo git pull
sudo podman build -t localhost/peregrine:latest .
sudo podman restart peregrine
```
No need to touch the systemd unit — it launches fresh via `--new` in the generate step.

View file

@ -45,7 +45,7 @@ endif
PROFILE_ARG := $(if $(filter remote,$(PROFILE)),,--profile $(PROFILE))
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
@bash install.sh
@bash setup.sh
preflight: ## Check ports + system resources; write .env
@$(PYTHON) scripts/preflight.py

245
README.md
View file

@ -1,143 +1,173 @@
<div align="center">
<img src="web/public/peregrine.svg" alt="Peregrine" width="120" />
# Peregrine
<h1>Peregrine</h1>
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/pyr0ball/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
<p><strong>Job search pipeline — by <a href="https://circuitforge.tech">Circuit Forge LLC</a></strong></p>
[![License: BSL 1.1](https://img.shields.io/badge/License-BSL_1.1-blue.svg)](./LICENSE-BSL)
[![CI](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml/badge.svg)](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml)
<p><em>AI for the tasks the system made hard on purpose.</em></p>
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.tech)**
[![License: MIT / BSL 1.1](https://img.shields.io/badge/License-MIT%20%2F%20BSL%201.1-blue.svg)](#license)
[![CI](https://github.com/CircuitForgeLLC/peregrine/actions/workflows/ci.yml/badge.svg)](https://github.com/CircuitForgeLLC/peregrine/actions/workflows/ci.yml)
[![Docs](https://img.shields.io/badge/docs-docs.circuitforge.tech-orange)](https://docs.circuitforge.tech/peregrine/)
[![Version](https://img.shields.io/badge/version-0.9.0-green)](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/releases)
> *"Don't be evil, for real and forever."*
<p>
<a href="https://demo.circuitforge.tech/peregrine"><strong>Live Demo</strong></a>
no account required, nothing saved &nbsp;|&nbsp;
<a href="https://docs.circuitforge.tech/peregrine/">Docs</a> &nbsp;|&nbsp;
<a href="https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues">Issues</a>
</p>
<blockquote>
<strong>Primary development</strong> happens at
<a href="https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine">git.opensourcesolarpunk.com/Circuit-Forge/peregrine</a>.
GitHub and Codeberg are push mirrors. Issues and PRs are welcome on any platform.
</blockquote>
</div>
---
<table>
<tr>
<td><img src="docs/screenshots/01-dashboard.png" alt="Dashboard with pipeline stats and discovery controls"/></td>
<td><img src="docs/screenshots/02-review.png" alt="Job review — approve, skip, or reject with keyboard shortcuts"/></td>
</tr>
<tr>
<td><img src="docs/screenshots/03-apply.png" alt="Apply workspace with LLM-drafted cover letter"/></td>
<td><img src="docs/screenshots/04-interviews.png" alt="Interview kanban with company research and recruiter emails"/></td>
</tr>
</table>
---
## Why Peregrine?
Job search is a second job nobody hired you for. ATS (applicant tracking system) filters designed to reject. Boards that show the same listing eight times. Cover letter number forty-seven for a role that might already be filled. Hours of prep for a phone screen that lasts twelve minutes.
- **Handles the full pipeline.** Discover, filter, match, draft, track — one tool, one database, no duct tape.
- **LLM is optional and local-first.** Discovery and tracking work with no LLM at all. When you do configure one, it runs on your hardware by default. Cloud inference is a fallback, not the default path.
- **Ghost-post detection baked in.** Listings that have been open too long or look like sourcing traps get flagged before you spend time on them.
- **Human approval at every step.** LLM drafts cover letters and research briefs; you approve before anything goes anywhere. Peregrine never submits an application on your behalf.
- **Privacy · Safety · Accessibility** are architectural constraints, not aspirational copy. No PII (personally identifiable information) logging, no behavioral profiling, no dark patterns.
Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
Privacy-first, local-first. Your data never leaves your machine.
---
## Quick Start
One-line install:
**1. Clone and install dependencies** (Docker, NVIDIA toolkit if needed):
```bash
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/raw/branch/main/install.sh)
```
Or clone and run manually:
```bash
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine
cd peregrine
./manage.sh setup
./manage.sh start
```
Open **http://localhost:8502** — the setup wizard walks you through the rest.
> **macOS / Apple Silicon:** install Ollama natively via Homebrew before starting for Metal GPU-accelerated inference. `install.sh` handles this automatically.
> **Windows:** use WSL2 with Ubuntu.
### Inference profiles
**2. Start Peregrine:**
```bash
./manage.sh start # remote — no GPU; LLM calls go to Anthropic / OpenAI
./manage.sh start --profile cpu # local Ollama on CPU (or Metal via native Ollama on macOS)
./manage.sh start --profile single-gpu # Ollama + vision on GPU 0 (NVIDIA only)
./manage.sh start --profile dual-gpu # Ollama + vLLM on two NVIDIA GPUs
./manage.sh start # remote profile (API-only, no GPU)
./manage.sh start --profile cpu # local Ollama (CPU, or Metal GPU on Apple Silicon — see below)
./manage.sh start --profile single-gpu # Ollama + Vision on GPU 0 (NVIDIA only)
./manage.sh start --profile dual-gpu # Ollama + Vision + vLLM (GPU 0 + 1) (NVIDIA only)
```
Or use `make` directly:
```bash
make start # remote profile
make start PROFILE=single-gpu
```
**3.** Open http://localhost:8501 — the setup wizard guides you through the rest.
> **macOS / Apple Silicon:** Docker Desktop must be running. For Metal GPU-accelerated inference, install Ollama natively before starting — `setup.sh` will prompt you to do this. See [Apple Silicon GPU](#apple-silicon-gpu) below.
> **Windows:** Not supported — use WSL2 with Ubuntu.
### Installing to `/opt` or other system directories
If you clone into a root-owned directory (e.g. `sudo git clone ... /opt/peregrine`), two things need fixing:
**1. Git ownership warning** (`fatal: detected dubious ownership`) — `./manage.sh setup` fixes this automatically. If you need git to work *before* running setup:
```bash
git config --global --add safe.directory /opt/peregrine
```
**2. Preflight write access** — preflight writes `.env` and `compose.override.yml` into the repo directory. Fix ownership once:
```bash
sudo chown -R $USER:$USER /opt/peregrine
```
After that, run everything without `sudo`.
### Podman
Podman is rootless by default — **no `sudo` needed.** `./manage.sh setup` will configure `podman-compose` if it isn't already present.
### Docker
After `./manage.sh setup`, log out and back in for docker group membership to take effect. Until then, prefix commands with `sudo`. After re-login, `sudo` is no longer required.
---
## Inference Profiles
| Profile | Services started | Use case |
|---------|-----------------|----------|
| `remote` | app + searxng | No GPU; LLM calls go to Anthropic / OpenAI |
| `cpu` | app + ollama + searxng | No GPU; local models on CPU. On Apple Silicon, use with native Ollama for Metal acceleration — see below. |
| `single-gpu` | app + ollama + vision + searxng | One **NVIDIA** GPU: cover letters, research, vision |
| `dual-gpu` | app + ollama + vllm + vision + searxng | Two **NVIDIA** GPUs: GPU 0 = Ollama, GPU 1 = vLLM |
### Apple Silicon GPU
Docker Desktop on macOS runs in a Linux VM — it cannot access the Apple GPU. Metal-accelerated inference requires Ollama to run **natively** on the host.
`setup.sh` handles this automatically: it offers to install Ollama via Homebrew, starts it as a background service, and explains what happens next. If Ollama is running on port 11434 when you start Peregrine, preflight detects it, stubs out the Docker Ollama container, and routes inference through the native process — which uses Metal automatically.
To do it manually:
```bash
brew install ollama
brew services start ollama # starts at login, uses Metal GPU
./manage.sh start --profile cpu # preflight adopts native Ollama; Docker container is skipped
```
The `cpu` profile label is a slight misnomer in this context — Ollama will be running on the GPU. `single-gpu` and `dual-gpu` profiles are NVIDIA-specific and not applicable on Mac.
---
## First-Run Wizard
On first launch the setup wizard walks through seven steps:
1. **Hardware** — detects NVIDIA GPUs (Linux) or Apple Silicon GPU (macOS) and recommends a profile
2. **Tier** — choose free, paid, or premium (or use `dev_tier_override` for local testing)
3. **Identity** — name, email, phone, LinkedIn, career summary
4. **Resume** — upload a PDF/DOCX for LLM parsing, or use the guided form builder
5. **Inference** — configure LLM backends and API keys
6. **Search** — job titles, locations, boards, keywords, blocklist
7. **Integrations** — optional cloud storage, calendar, and notification services
Wizard state is saved after each step — a crash or browser close resumes where you left off.
Re-enter the wizard any time via **Settings → Developer → Reset wizard**.
---
## Features
| Feature | Tier |
|---------|------|
| Job discovery — LinkedIn, Indeed, Glassdoor, Adzuna, The Ladders | Free |
| Ghost-post detection | Free |
| Resume keyword matching and gap analysis | Free |
| Document storage sync (Google Drive, Dropbox, OneDrive, Nextcloud) | Free |
| Job discovery (JobSpy + custom boards) | Free |
| Resume keyword matching & gap analysis | Free |
| Document storage sync (Google Drive, Dropbox, OneDrive, MEGA, Nextcloud) | Free |
| Webhook notifications (Discord, Home Assistant) | Free |
| Vue 3 SPA — full UI with onboarding wizard, job board, apply workspace, interview kanban | Free |
| **Cover letter generation** | Free with LLM¹ |
| **Company research briefs** | Free with LLM¹ |
| **Interview prep and practice Q&A** | Free with LLM ¹ |
| **Interview prep & practice Q&A** | Free with LLM¹ |
| **Survey assistant** (culture-fit Q&A, screenshot analysis) | Free with LLM¹ |
| **AI wizard helpers** (career summary, bullet expansion, skill suggestions) | Free with LLM¹ |
| Managed cloud LLM (no API key needed) | Paid |
| Email sync and auto-classification | Paid |
| Email sync & auto-classification | Paid |
| Job tracking integrations (Notion, Airtable, Google Sheets) | Paid |
| Calendar sync (Google, Apple) | Paid |
| Slack notifications | Paid |
| CircuitForge shared cover-letter model | Paid |
| **Voice guidelines** (custom writing style and tone) | Premium with LLM ¹ |
| Cover letter model fine-tuning — your writing, your model | Premium |
| Cover letter model fine-tuning (your writing, your model) | Premium |
| Multi-user support | Premium |
| Human-in-the-loop operator (CAPTCHAs, phone calls, wet signatures) | Ultra |
¹ **BYOK (bring your own key) unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance, or your own API key (Anthropic, OpenAI-compatible) — and all "Free with LLM" and "Premium with LLM" features unlock at no charge.
¹ **BYOK unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance,
or your own API key (Anthropic, OpenAI-compatible) — and all AI features marked **Free with LLM**
unlock at no charge. The paid tier earns its price by providing managed cloud inference so you
don't need a key at all, plus integrations and email sync.
---
## What Peregrine does not do
## Email Sync
Peregrine does **not** submit job applications for you. You still click apply on the employer's site.
Monitors your inbox for job-related emails and automatically updates job stages (interview requests, rejections, survey links, offers).
This is intentional. Automated mass-applying is a bad experience for everyone and a trust violation with employers who posted a real role. The submit button is yours. The rest of the grind is ours.
Configure in **Settings → Email**. Requires IMAP access and, for Gmail, an App Password.
---
## Stack
## Integrations
| Layer | Technology |
|-------|-----------|
| Frontend | Vue 3 SPA (Vite) |
| Backend | FastAPI + Python |
| Database | SQLite (local, per-user) |
| Job scraping | [JobSpy](https://github.com/Bunsly/JobSpy) + custom board scrapers |
| LLM inference | Ollama, vLLM, Anthropic, OpenAI-compatible — configurable fallback chain |
| Vision | moondream2 (survey screenshot analysis) |
| Container | Docker / Podman |
Connect external services in **Settings → Integrations**:
- **Job tracking:** Notion, Airtable, Google Sheets
- **Document storage:** Google Drive, Dropbox, OneDrive, MEGA, Nextcloud
- **Calendar:** Google Calendar, Apple Calendar (CalDAV)
- **Notifications:** Slack, Discord (webhook), Home Assistant
---
## manage.sh reference
## CLI Reference (`manage.sh`)
`manage.sh` is the single entry point for all common operations — no need to remember Make targets or Docker commands.
```
./manage.sh setup Install Docker/Podman + NVIDIA toolkit
@ -146,38 +176,31 @@ This is intentional. Automated mass-applying is a bad experience for everyone an
./manage.sh restart Restart all services
./manage.sh status Show running containers
./manage.sh logs [service] Tail logs (default: app)
./manage.sh update Pull latest images and rebuild app container
./manage.sh update Pull latest images + rebuild app container
./manage.sh preflight Check ports + resources; write .env
./manage.sh test Run test suite
./manage.sh prepare-training Scan docs for cover letters — outputs training JSONL
./manage.sh finetune Run LoRA fine-tune (requires single-gpu profile or higher)
./manage.sh prepare-training Scan docs for cover letters training JSONL
./manage.sh finetune Run LoRA fine-tune (needs --profile single-gpu+)
./manage.sh open Open the web UI in your browser
./manage.sh clean Remove containers, images, volumes (asks to confirm)
```
---
## Documentation
## Developer Docs
Full docs at **[docs.circuitforge.tech/peregrine](https://docs.circuitforge.tech/peregrine)**
Full documentation at: https://docs.circuitforge.tech/peregrine
Bug reports and feature requests: [Forgejo issues](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues)
---
## Contributing
Contributions are welcome. The discovery pipeline — scrapers, board integrations, matching logic — is MIT-licensed. Fork it, extend it, send PRs. AI features are BSL 1.1. See the [contributing guide](https://docs.circuitforge.tech/peregrine/developer-guide/contributing/) for conventions.
- [Installation guide](https://docs.circuitforge.tech/peregrine/getting-started/installation/)
- [Adding a custom job board scraper](https://docs.circuitforge.tech/peregrine/developer-guide/adding-scrapers/)
- [Adding an integration](https://docs.circuitforge.tech/peregrine/developer-guide/adding-integrations/)
- [Contributing](https://docs.circuitforge.tech/peregrine/developer-guide/contributing/)
---
## License
Peregrine uses a split license:
| Component | License |
|-----------|---------|
| Discovery pipeline — scrapers, matching, tracking | [MIT](LICENSE-MIT) |
| LLM features — cover letter generation, company research, interview prep, survey assistant, fine-tuning | [BSL 1.1](LICENSE-BSL) — free for personal non-commercial self-hosting; commercial use or SaaS re-hosting requires a paid license; converts to MIT after four years |
Fine-tuned model weights are proprietary and per-user — not redistributable.
Core discovery pipeline: [MIT](LICENSE-MIT)
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](LICENSE-BSL)
© 2026 Circuit Forge LLC

View file

@ -14,22 +14,24 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
from scripts.db import init_db, get_job_counts, purge_jobs, purge_email_data, \
purge_non_remote, archive_jobs, kill_stuck_tasks, cancel_task, \
get_task_for_job, get_active_tasks, insert_job, get_existing_urls
purge_non_remote, archive_jobs, kill_stuck_tasks, get_task_for_job, get_active_tasks, \
insert_job, get_existing_urls
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path, get_config_dir
from app.cloud_session import resolve_session, get_db_path
_CONFIG_DIR = Path(__file__).parent.parent / "config"
_NOTION_CONNECTED = (_CONFIG_DIR / "integrations" / "notion.yaml").exists()
resolve_session("peregrine")
init_db(get_db_path())
_CONFIG_DIR = get_config_dir()
_USER_YAML = _CONFIG_DIR / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
def _email_configured() -> bool:
_e = get_config_dir() / "email.yaml"
_e = Path(__file__).parent.parent / "config" / "email.yaml"
if not _e.exists():
return False
import yaml as _yaml
@ -37,7 +39,7 @@ def _email_configured() -> bool:
return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host"))
def _notion_configured() -> bool:
_n = get_config_dir() / "notion.yaml"
_n = Path(__file__).parent.parent / "config" / "notion.yaml"
if not _n.exists():
return False
import yaml as _yaml
@ -45,7 +47,7 @@ def _notion_configured() -> bool:
return bool(_cfg.get("token"))
def _keywords_configured() -> bool:
_k = get_config_dir() / "resume_keywords.yaml"
_k = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
if not _k.exists():
return False
import yaml as _yaml
@ -132,7 +134,7 @@ def _queue_url_imports(db_path: Path, urls: list) -> int:
st.title(f"🔍 {_name}'s Job Search")
st.caption("Discover → Review → Sync" + (" to Notion" if _notion_configured() else ""))
st.caption("Discover → Review → Sync to Notion")
st.divider()
@ -144,7 +146,7 @@ def _live_counts():
col1.metric("Pending Review", counts.get("pending", 0))
col2.metric("Approved", counts.get("approved", 0))
col3.metric("Applied", counts.get("applied", 0))
col4.metric("Synced" + (" to Notion" if _notion_configured() else ""), counts.get("synced", 0))
col4.metric("Synced to Notion", counts.get("synced", 0))
col5.metric("Rejected", counts.get("rejected", 0))
@ -235,7 +237,7 @@ with mid:
with right:
approved_count = get_job_counts(get_db_path()).get("approved", 0)
if _notion_configured():
if _NOTION_CONNECTED:
st.subheader("Send to Notion")
st.caption("Push all approved jobs to your Notion tracking database.")
if approved_count == 0:
@ -374,144 +376,177 @@ _scrape_status()
st.divider()
# ── Danger zone ───────────────────────────────────────────────────────────────
# ── Danger zone: purge + re-scrape ────────────────────────────────────────────
with st.expander("⚠️ Danger Zone", expanded=False):
# ── Queue reset (the common case) ─────────────────────────────────────────
st.markdown("**Queue reset**")
st.caption(
"Archive clears your review queue while keeping job URLs for dedup, "
"so the same listings won't resurface on the next discovery run. "
"Use hard purge only if you want a full clean slate including dedup history."
"**Purge** permanently deletes jobs from the local database. "
"Applied and synced jobs are never touched."
)
_scope = st.radio(
"Clear scope",
["Pending only", "Pending + approved (stale search)"],
horizontal=True,
label_visibility="collapsed",
)
_scope_statuses = (
["pending"] if _scope == "Pending only" else ["pending", "approved"]
)
purge_col, rescrape_col, email_col, tasks_col = st.columns(4)
_qc1, _qc2, _qc3 = st.columns([2, 2, 4])
if _qc1.button("📦 Archive & reset", use_container_width=True, type="primary"):
st.session_state["confirm_dz"] = "archive"
if _qc2.button("🗑 Hard purge (delete)", use_container_width=True):
st.session_state["confirm_dz"] = "purge"
with purge_col:
st.markdown("**Purge pending & rejected**")
st.caption("Removes all _pending_ and _rejected_ listings so the next discovery starts fresh.")
if st.button("🗑 Purge Pending + Rejected", use_container_width=True):
st.session_state["confirm_purge"] = "partial"
if st.session_state.get("confirm_dz") == "archive":
st.info(
f"Archive **{', '.join(_scope_statuses)}** jobs? "
"URLs are kept for dedup — nothing is permanently deleted."
)
_dc1, _dc2 = st.columns(2)
if _dc1.button("Yes, archive", type="primary", use_container_width=True, key="dz_archive_confirm"):
n = archive_jobs(get_db_path(), statuses=_scope_statuses)
st.success(f"Archived {n} jobs.")
st.session_state.pop("confirm_dz", None)
if st.session_state.get("confirm_purge") == "partial":
st.warning("Are you sure? This cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge", type="primary", use_container_width=True):
deleted = purge_jobs(get_db_path(), statuses=["pending", "rejected"])
st.success(f"Purged {deleted} jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if _dc2.button("Cancel", use_container_width=True, key="dz_archive_cancel"):
st.session_state.pop("confirm_dz", None)
if c2.button("Cancel", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
if st.session_state.get("confirm_dz") == "purge":
st.warning(
f"Permanently delete **{', '.join(_scope_statuses)}** jobs? "
"This removes the URLs from dedup history too. Cannot be undone."
)
_dc1, _dc2 = st.columns(2)
if _dc1.button("Yes, delete", type="primary", use_container_width=True, key="dz_purge_confirm"):
n = purge_jobs(get_db_path(), statuses=_scope_statuses)
st.success(f"Deleted {n} jobs.")
st.session_state.pop("confirm_dz", None)
with email_col:
st.markdown("**Purge email data**")
st.caption("Clears all email thread logs and email-sourced pending jobs so the next sync starts fresh.")
if st.button("📧 Purge Email Data", use_container_width=True):
st.session_state["confirm_purge"] = "email"
if st.session_state.get("confirm_purge") == "email":
st.warning("This deletes all email contacts and email-sourced jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge emails", type="primary", use_container_width=True):
contacts, jobs = purge_email_data(get_db_path())
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if _dc2.button("Cancel", use_container_width=True, key="dz_purge_cancel"):
st.session_state.pop("confirm_dz", None)
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
# ── Background tasks ──────────────────────────────────────────────────────
with tasks_col:
_active = get_active_tasks(get_db_path())
st.markdown(f"**Background tasks** — {len(_active)} active")
if _active:
_task_icons = {"cover_letter": "✉️", "research": "🔍", "discovery": "🌐", "enrich_descriptions": "📝"}
for _t in _active:
_tc1, _tc2, _tc3 = st.columns([3, 4, 2])
_icon = _task_icons.get(_t["task_type"], "⚙️")
_tc1.caption(f"{_icon} `{_t['task_type']}`")
_job_label = f"{_t['title']} @ {_t['company']}" if _t.get("title") else f"job #{_t['job_id']}"
_tc2.caption(_job_label)
_tc3.caption(f"_{_t['status']}_")
if st.button("✕ Cancel", key=f"dz_cancel_task_{_t['id']}", use_container_width=True):
cancel_task(get_db_path(), _t["id"])
st.rerun()
st.caption("")
_kill_col, _ = st.columns([2, 6])
if _kill_col.button("⏹ Kill all stuck", use_container_width=True, disabled=len(_active) == 0):
st.markdown("**Kill stuck tasks**")
st.caption(f"Force-fail all queued/running background tasks. Currently **{len(_active)}** active.")
if st.button("⏹ Kill All Tasks", use_container_width=True, disabled=len(_active) == 0):
killed = kill_stuck_tasks(get_db_path())
st.success(f"Killed {killed} task(s).")
st.rerun()
st.divider()
with rescrape_col:
st.markdown("**Purge all & re-scrape**")
st.caption("Wipes _all_ non-applied, non-synced jobs then immediately runs a fresh discovery.")
if st.button("🔄 Purge All + Re-scrape", use_container_width=True):
st.session_state["confirm_purge"] = "full"
# ── Rarely needed (collapsed) ─────────────────────────────────────────────
with st.expander("More options", expanded=False):
_rare1, _rare2, _rare3 = st.columns(3)
with _rare1:
st.markdown("**Purge email data**")
st.caption("Clears all email thread logs and email-sourced pending jobs.")
if st.button("📧 Purge Email Data", use_container_width=True):
st.session_state["confirm_dz"] = "email"
if st.session_state.get("confirm_dz") == "email":
st.warning("Deletes all email contacts and email-sourced jobs. Cannot be undone.")
_ec1, _ec2 = st.columns(2)
if _ec1.button("Yes, purge emails", type="primary", use_container_width=True, key="dz_email_confirm"):
contacts, jobs = purge_email_data(get_db_path())
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
st.session_state.pop("confirm_dz", None)
st.rerun()
if _ec2.button("Cancel", use_container_width=True, key="dz_email_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with _rare2:
st.markdown("**Purge non-remote**")
st.caption("Removes pending/approved/rejected on-site listings from the DB.")
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
st.session_state["confirm_dz"] = "non_remote"
if st.session_state.get("confirm_dz") == "non_remote":
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
_rc1, _rc2 = st.columns(2)
if _rc1.button("Yes, purge on-site", type="primary", use_container_width=True, key="dz_nonremote_confirm"):
deleted = purge_non_remote(get_db_path())
st.success(f"Purged {deleted} non-remote jobs.")
st.session_state.pop("confirm_dz", None)
st.rerun()
if _rc2.button("Cancel", use_container_width=True, key="dz_nonremote_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with _rare3:
st.markdown("**Wipe all + re-scrape**")
st.caption("Deletes all non-applied jobs then immediately runs a fresh discovery.")
if st.button("🔄 Wipe + Re-scrape", use_container_width=True):
st.session_state["confirm_dz"] = "rescrape"
if st.session_state.get("confirm_dz") == "rescrape":
st.warning("Wipes ALL pending, approved, and rejected jobs, then re-scrapes. Applied and synced records are kept.")
_wc1, _wc2 = st.columns(2)
if _wc1.button("Yes, wipe + scrape", type="primary", use_container_width=True, key="dz_rescrape_confirm"):
if st.session_state.get("confirm_purge") == "full":
st.warning("This will delete ALL pending, approved, and rejected jobs, then re-scrape. Applied and synced records are kept.")
c1, c2 = st.columns(2)
if c1.button("Yes, wipe + scrape", type="primary", use_container_width=True):
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"])
submit_task(get_db_path(), "discovery", 0)
st.session_state.pop("confirm_dz", None)
st.session_state.pop("confirm_purge", None)
st.rerun()
if _wc2.button("Cancel", use_container_width=True, key="dz_rescrape_cancel"):
st.session_state.pop("confirm_dz", None)
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
pending_col, nonremote_col, approved_col, _ = st.columns(4)
with pending_col:
st.markdown("**Purge pending review**")
st.caption("Removes only _pending_ listings, keeping your rejected history intact.")
if st.button("🗑 Purge Pending Only", use_container_width=True):
st.session_state["confirm_purge"] = "pending_only"
if st.session_state.get("confirm_purge") == "pending_only":
st.warning("Deletes all pending jobs. Rejected jobs are kept. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge pending", type="primary", use_container_width=True):
deleted = purge_jobs(get_db_path(), statuses=["pending"])
st.success(f"Purged {deleted} pending jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with nonremote_col:
st.markdown("**Purge non-remote**")
st.caption("Removes pending/approved/rejected jobs where remote is not set. Keeps anything already in the pipeline.")
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
st.session_state["confirm_purge"] = "non_remote"
if st.session_state.get("confirm_purge") == "non_remote":
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge on-site", type="primary", use_container_width=True):
deleted = purge_non_remote(get_db_path())
st.success(f"Purged {deleted} non-remote jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with approved_col:
st.markdown("**Purge approved (unapplied)**")
st.caption("Removes _approved_ jobs you haven't applied to yet — e.g. to reset after a review pass.")
if st.button("🗑 Purge Approved", use_container_width=True):
st.session_state["confirm_purge"] = "approved_only"
if st.session_state.get("confirm_purge") == "approved_only":
st.warning("Deletes all approved-but-not-applied jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge approved", type="primary", use_container_width=True):
deleted = purge_jobs(get_db_path(), statuses=["approved"])
st.success(f"Purged {deleted} approved jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
archive_col1, archive_col2, _, _ = st.columns(4)
with archive_col1:
st.markdown("**Archive remaining**")
st.caption(
"Move all _pending_ and _rejected_ jobs to archived status. "
"Archived jobs stay in the DB for dedup — they just won't appear in Job Review."
)
if st.button("📦 Archive Pending + Rejected", use_container_width=True):
st.session_state["confirm_purge"] = "archive_remaining"
if st.session_state.get("confirm_purge") == "archive_remaining":
st.info("Jobs will be archived (not deleted) — URLs are kept for dedup.")
c1, c2 = st.columns(2)
if c1.button("Yes, archive", type="primary", use_container_width=True):
archived = archive_jobs(get_db_path(), statuses=["pending", "rejected"])
st.success(f"Archived {archived} jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with archive_col2:
st.markdown("**Archive approved (unapplied)**")
st.caption("Archive _approved_ listings you decided to skip — keeps history without cluttering the apply queue.")
if st.button("📦 Archive Approved", use_container_width=True):
st.session_state["confirm_purge"] = "archive_approved"
if st.session_state.get("confirm_purge") == "archive_approved":
st.info("Approved jobs will be archived (not deleted).")
c1, c2 = st.columns(2)
if c1.button("Yes, archive approved", type="primary", use_container_width=True):
archived = archive_jobs(get_db_path(), statuses=["approved"])
st.success(f"Archived {archived} approved jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
# ── Setup banners ─────────────────────────────────────────────────────────────

View file

@ -17,39 +17,22 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s")
# Load .env before any os.environ reads — safe to call inside Docker too
# (uses setdefault, so Docker-injected vars take precedence over .env values)
from circuitforge_core.config.settings import load_env as _load_env
_load_env(Path(__file__).parent.parent / ".env")
IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
import streamlit as st
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
from scripts.db_migrate import migrate_db
from app.feedback import inject_feedback_button
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
from app.cloud_session import resolve_session, get_db_path, get_config_dir
import sqlite3
_LOGO_CIRCLE = Path(__file__).parent / "static" / "peregrine_logo_circle.png"
_LOGO_FULL = Path(__file__).parent / "static" / "peregrine_logo.png"
st.set_page_config(
page_title="Peregrine",
page_icon=str(_LOGO_CIRCLE) if _LOGO_CIRCLE.exists() else "💼",
page_icon="💼",
layout="wide",
)
resolve_session("peregrine")
init_db(get_db_path())
migrate_db(Path(get_db_path()))
# Demo tier — initialize once per session (cookie persistence handled client-side)
if IS_DEMO and "simulated_tier" not in st.session_state:
st.session_state["simulated_tier"] = "paid"
if _LOGO_CIRCLE.exists():
st.logo(str(_LOGO_CIRCLE), icon_image=str(_LOGO_CIRCLE))
# ── Startup cleanup — runs once per server process via cache_resource ──────────
@st.cache_resource
@ -106,15 +89,6 @@ _show_wizard = not IS_DEMO and (
if _show_wizard:
_setup_page = st.Page("pages/0_Setup.py", title="Setup", icon="👋")
st.navigation({"": [_setup_page]}).run()
# Sync UI cookie even during wizard so vue preference redirects correctly.
# Tier not yet computed here — use cloud tier (or "free" fallback).
try:
from app.components.ui_switcher import sync_ui_cookie as _sync_wizard_cookie
from app.cloud_session import get_cloud_tier as _gctr
_wizard_tier = _gctr() if _gctr() != "local" else "free"
_sync_wizard_cookie(_USER_YAML, _wizard_tier)
except Exception:
pass
st.stop()
# ── Navigation ─────────────────────────────────────────────────────────────────
@ -139,21 +113,6 @@ pg = st.navigation(pages)
# ── Background task sidebar indicator ─────────────────────────────────────────
# Fragment polls every 3s so stage labels update live without a full page reload.
# The sidebar context WRAPS the fragment call — do not write to st.sidebar inside it.
_TASK_LABELS = {
"cover_letter": "Cover letter",
"company_research": "Research",
"email_sync": "Email sync",
"discovery": "Discovery",
"enrich_descriptions": "Enriching descriptions",
"score": "Scoring matches",
"scrape_url": "Scraping listing",
"enrich_craigslist": "Enriching listing",
"wizard_generate": "Wizard generation",
"prepare_training": "Training data",
}
_DISCOVERY_PIPELINE = ["discovery", "enrich_descriptions", "score"]
@st.fragment(run_every=3)
def _task_indicator():
tasks = get_active_tasks(get_db_path())
@ -161,30 +120,27 @@ def _task_indicator():
return
st.divider()
st.markdown(f"**⏳ {len(tasks)} task(s) running**")
pipeline_set = set(_DISCOVERY_PIPELINE)
pipeline_tasks = [t for t in tasks if t["task_type"] in pipeline_set]
other_tasks = [t for t in tasks if t["task_type"] not in pipeline_set]
# Discovery pipeline: render as ordered sub-queue with indented steps
if pipeline_tasks:
ordered = [
next((t for t in pipeline_tasks if t["task_type"] == typ), None)
for typ in _DISCOVERY_PIPELINE
]
ordered = [t for t in ordered if t is not None]
for i, t in enumerate(ordered):
for t in tasks:
icon = "" if t["status"] == "running" else "🕐"
label = _TASK_LABELS.get(t["task_type"], t["task_type"].replace("_", " ").title())
stage = t.get("stage") or ""
detail = f" · {stage}" if stage else ""
prefix = "" if i == 0 else ""
st.caption(f"{prefix}{icon} {label}{detail}")
# All other tasks (cover letter, email sync, etc.) as individual rows
for t in other_tasks:
icon = "" if t["status"] == "running" else "🕐"
label = _TASK_LABELS.get(t["task_type"], t["task_type"].replace("_", " ").title())
task_type = t["task_type"]
if task_type == "cover_letter":
label = "Cover letter"
elif task_type == "company_research":
label = "Research"
elif task_type == "email_sync":
label = "Email sync"
elif task_type == "discovery":
label = "Discovery"
elif task_type == "enrich_descriptions":
label = "Enriching"
elif task_type == "scrape_url":
label = "Scraping URL"
elif task_type == "wizard_generate":
label = "Wizard generation"
elif task_type == "enrich_craigslist":
label = "Enriching listing"
else:
label = task_type.replace("_", " ").title()
stage = t.get("stage") or ""
detail = f" · {stage}" if stage else (f"{t.get('company')}" if t.get("company") else "")
st.caption(f"{icon} {label}{detail}")
@ -200,13 +156,6 @@ def _get_version() -> str:
except Exception:
return "dev"
# ── Effective tier (resolved before sidebar so switcher can use it) ──────────
# get_cloud_tier() returns "local" in dev/self-hosted mode, real tier in cloud.
_ui_profile = _UserProfile(_USER_YAML) if _UserProfile.exists(_USER_YAML) else None
_ui_yaml_tier = _ui_profile.effective_tier if _ui_profile else "free"
_ui_cloud_tier = get_cloud_tier()
_ui_tier = _ui_cloud_tier if _ui_cloud_tier != "local" else _ui_yaml_tier
with st.sidebar:
if IS_DEMO:
st.info(
@ -236,31 +185,7 @@ with st.sidebar:
)
st.divider()
try:
from app.components.ui_switcher import render_sidebar_switcher
render_sidebar_switcher(_USER_YAML, _ui_tier)
except Exception:
pass # never crash the app over the sidebar switcher
st.caption(f"Peregrine {_get_version()}")
inject_feedback_button(page=pg.title)
# ── Demo toolbar (DEMO_MODE only) ───────────────────────────────────────────
if IS_DEMO:
from app.components.demo_toolbar import render_demo_toolbar
render_demo_toolbar()
# ── UI switcher banner (paid tier; or all visitors in demo mode) ─────────────
try:
from app.components.ui_switcher import render_banner
render_banner(_USER_YAML, _ui_tier)
except Exception:
pass # never crash the app over the banner
pg.run()
# ── UI preference cookie sync (runs after page render) ──────────────────────
try:
from app.components.ui_switcher import sync_ui_cookie
sync_ui_cookie(_USER_YAML, _ui_tier)
except Exception:
pass # never crash the app over cookie sync

View file

@ -203,16 +203,8 @@ def get_config_dir() -> Path:
isolated and never shared across tenants.
Local: repo-level config/ directory.
"""
if CLOUD_MODE:
db_path = st.session_state.get("db_path")
if db_path:
return Path(db_path).parent / "config"
# Session not resolved yet (resolve_session() should have called st.stop() already).
# Return an isolated empty temp dir rather than the repo config, which may contain
# another user's data baked into the image.
_safe = Path("/tmp/peregrine-cloud-noconfig")
_safe.mkdir(exist_ok=True)
return _safe
if CLOUD_MODE and st.session_state.get("db_path"):
return Path(st.session_state["db_path"]).parent / "config"
return Path(__file__).parent.parent / "config"

View file

@ -1,72 +0,0 @@
"""Demo toolbar — tier simulation for DEMO_MODE instances.
Renders a slim full-width bar above the Streamlit nav showing
Free / Paid / Premium pills. Clicking a pill sets a prgn_demo_tier
cookie (for persistence across reloads) and st.session_state.simulated_tier
(for immediate use within the current render pass).
Only ever rendered when DEMO_MODE=true.
"""
from __future__ import annotations
import os
import streamlit as st
import streamlit.components.v1 as components
_VALID_TIERS = ("free", "paid", "premium")
_DEFAULT_TIER = "paid" # most compelling first impression
_DEMO_MODE = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
_COOKIE_JS = """
<script>
(function() {{
document.cookie = 'prgn_demo_tier={tier}; path=/; SameSite=Lax';
}})();
</script>
"""
def get_simulated_tier() -> str:
"""Return the current simulated tier, defaulting to 'paid'."""
return st.session_state.get("simulated_tier", _DEFAULT_TIER)
def set_simulated_tier(tier: str) -> None:
"""Set simulated tier in session state + cookie. Reruns the page."""
if tier not in _VALID_TIERS:
return
st.session_state["simulated_tier"] = tier
components.html(_COOKIE_JS.format(tier=tier), height=0)
st.rerun()
def render_demo_toolbar() -> None:
"""Render the demo mode toolbar.
Shows a dismissible info bar with tier-selection pills.
Call this at the TOP of app.py's render pass, before pg.run().
"""
current = get_simulated_tier()
labels = {t: t.capitalize() + ("" if t == current else "") for t in _VALID_TIERS}
with st.container():
cols = st.columns([3, 1, 1, 1, 2])
with cols[0]:
st.caption("🎭 **Demo mode** — exploring as:")
for i, tier in enumerate(_VALID_TIERS):
with cols[i + 1]:
is_active = tier == current
if st.button(
labels[tier],
key=f"_demo_tier_{tier}",
type="primary" if is_active else "secondary",
use_container_width=True,
):
if not is_active:
set_simulated_tier(tier)
with cols[4]:
st.caption("[Get your own →](https://circuitforge.tech/software/peregrine)")
st.divider()

View file

@ -1,256 +0,0 @@
"""UI switcher component for Peregrine.
Manages the prgn_ui cookie (Caddy routing signal) and user.yaml
ui_preference (durability across browser clears).
Cookie mechanics
----------------
Streamlit cannot read HTTP cookies server-side. Instead:
- sync_ui_cookie() injects a JS snippet that sets document.cookie.
- Vue SPA switch-back appends ?prgn_switch=streamlit to the redirect URL.
sync_ui_cookie() reads this param via st.query_params and uses it as
an override signal, then writes user.yaml to match.
Call sync_ui_cookie() in the app.py render pass (after pg.run()).
"""
from __future__ import annotations
import os
from pathlib import Path
import streamlit as st
import streamlit.components.v1 as components
from scripts.user_profile import UserProfile
from app.wizard.tiers import can_use
_DEMO_MODE = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
# When set, the app is running without a Caddy reverse proxy in front
# (local dev, direct port exposure). Switch to Vue by navigating directly
# to this URL instead of relying on cookie-based Caddy routing.
# Example: PEREGRINE_VUE_URL=http://localhost:8506
_VUE_URL = os.environ.get("PEREGRINE_VUE_URL", "").strip().rstrip("/")
# When True, a window.location.reload() after setting prgn_ui=vue will be
# intercepted by Caddy and routed to the Vue SPA. When False (no Caddy in the
# traffic path — e.g. test instances, direct Docker exposure), reloading just
# comes back to Streamlit and creates an infinite loop. Only set this in
# production/staging compose files where Caddy is actually in front.
_CADDY_PROXY = os.environ.get("PEREGRINE_CADDY_PROXY", "").lower() in ("1", "true", "yes")
_COOKIE_JS = """
<script>
(function() {{
document.cookie = 'prgn_ui={value}; path=/; SameSite=Lax';
{navigate_js}
}})();
</script>
"""
def _set_cookie_js(value: str, navigate: bool = False) -> None:
"""Inject JS to set the prgn_ui cookie.
When PEREGRINE_VUE_URL is set (local dev, no Caddy): navigating to Vue
uses window.parent.location.href to jump directly to the Vue container
port. Without this, reload() just sends the request back to the same
Streamlit port with no router in between to inspect the cookie.
When PEREGRINE_CADDY_PROXY is set (production/staging): navigate=True
triggers window.location.reload() so Caddy sees the updated cookie on
the next HTTP request and routes accordingly.
When neither is set (test instances, bare Docker): navigate is suppressed
entirely the cookie is written silently, but no reload is attempted.
Reloading without a proxy just bounces back to Streamlit and loops.
"""
# components.html() renders in an iframe — window.parent navigates the host page
if navigate and value == "vue" and _VUE_URL:
nav_js = f"window.parent.location.href = '{_VUE_URL}';"
elif navigate and _CADDY_PROXY:
nav_js = "window.parent.location.reload();"
else:
nav_js = ""
components.html(_COOKIE_JS.format(value=value, navigate_js=nav_js), height=0)
def sync_ui_cookie(yaml_path: Path, tier: str) -> None:
"""Sync the prgn_ui cookie to match user.yaml ui_preference.
Also handles:
- ?prgn_switch=<value> param (Vue SPA switch-back signal): overrides yaml,
writes yaml to match, clears the param.
- Tier downgrade: resets vue preference to streamlit for ineligible users.
- ?ui_fallback=1 param: Vue SPA was down reinforce streamlit cookie and
return early to avoid immediately navigating back to a broken Vue SPA.
When the resolved preference is "vue", this function navigates (full page
reload) rather than silently setting the cookie. Without navigate=True,
Streamlit would set prgn_ui=vue mid-page-load; subsequent HTTP requests
made by Streamlit's own frontend (lazy JS chunks, WebSocket upgrade) would
carry the new cookie and Caddy would misroute them to the Vue nginx
container, causing TypeError: error loading dynamically imported module.
"""
# ── ?ui_fallback=1 — Vue SPA was down, Caddy bounced us back ──────────────
# Return early: reinforce the streamlit cookie so we don't immediately
# navigate back to a Vue SPA that may still be down.
if st.query_params.get("ui_fallback"):
st.toast("⚠️ New UI temporarily unavailable — switched back to Classic", icon="⚠️")
st.query_params.pop("ui_fallback", None)
_set_cookie_js("streamlit")
return
# ── ?prgn_switch param — Vue SPA sent us here to switch back ──────────────
switch_param = st.query_params.get("prgn_switch")
if switch_param in ("streamlit", "vue"):
try:
profile = UserProfile(yaml_path)
profile.ui_preference = switch_param
profile.save()
except Exception:
# UI components must not crash the app — silent fallback
pass
st.query_params.pop("prgn_switch", None)
_set_cookie_js(switch_param)
return
# ── Normal path: read yaml, enforce tier, inject cookie ───────────────────
profile = None
try:
profile = UserProfile(yaml_path)
pref = profile.ui_preference
except Exception:
# UI components must not crash the app — silent fallback to default
pref = "streamlit"
# Tier downgrade protection (skip in demo — demo bypasses tier gate)
if pref == "vue" and not _DEMO_MODE and not can_use(tier, "vue_ui_beta"):
if profile is not None:
try:
profile.ui_preference = "streamlit"
profile.save()
except Exception:
# UI components must not crash the app — silent fallback
pass
pref = "streamlit"
# Navigate (full reload) when switching to Vue so Caddy re-routes on the
# next HTTP request before Streamlit serves any more content. Silent
# cookie-only set is safe for streamlit since we're already on that origin.
_set_cookie_js(pref, navigate=(pref == "vue"))
def switch_ui(yaml_path: Path, to: str, tier: str) -> None:
"""Write user.yaml, set cookie, and navigate.
to: "vue" | "streamlit"
Switching to Vue triggers window.location.reload() so Caddy sees the
updated prgn_ui cookie and routes to the Vue SPA. st.rerun() alone is
not sufficient it operates over WebSocket and produces no HTTP request.
Switching back to streamlit uses st.rerun() (no full reload needed since
we're already on the Streamlit origin and no Caddy re-routing is required).
"""
if to not in ("vue", "streamlit"):
return
try:
profile = UserProfile(yaml_path)
profile.ui_preference = to
profile.save()
except Exception:
# UI components must not crash the app — silent fallback
pass
if to == "vue":
# navigate=True triggers window.location.reload() after setting cookie
_set_cookie_js("vue", navigate=True)
else:
sync_ui_cookie(yaml_path, tier=tier)
st.rerun()
def render_banner(yaml_path: Path, tier: str) -> None:
"""Show the 'Try the new UI' banner once per session.
Dismissed flag stored in user.yaml dismissed_banners list so it
persists across sessions (uses the existing dismissed_banners pattern).
Eligible: paid+ tier, OR demo mode. Not shown if already on vue.
"""
eligible = _DEMO_MODE or can_use(tier, "vue_ui_beta")
if not eligible:
return
try:
profile = UserProfile(yaml_path)
except Exception:
# UI components must not crash the app — silent fallback
return
if profile.ui_preference == "vue":
return
if "ui_switcher_beta" in (profile.dismissed_banners or []):
return
col1, col2, col3 = st.columns([8, 1, 1])
with col1:
st.info("✨ **New Peregrine UI available** — try the modern Vue interface (Beta)")
with col2:
if st.button("Try it", key="_ui_banner_try"):
switch_ui(yaml_path, to="vue", tier=tier)
with col3:
if st.button("Dismiss", key="_ui_banner_dismiss"):
profile.dismissed_banners = list(profile.dismissed_banners or []) + ["ui_switcher_beta"]
profile.save()
st.rerun()
def render_sidebar_switcher(yaml_path: Path, tier: str) -> None:
"""Persistent sidebar button to switch to the Vue UI.
Shown when the user is eligible (paid+ or demo) and currently on Streamlit.
This is always visible unlike the banner which can be dismissed.
"""
eligible = _DEMO_MODE or can_use(tier, "vue_ui_beta")
if not eligible:
return
try:
profile = UserProfile(yaml_path)
if profile.ui_preference == "vue":
return
except Exception:
pass
if st.button("✨ Switch to New UI", key="_sidebar_switch_vue", use_container_width=True):
switch_ui(yaml_path, to="vue", tier=tier)
def render_settings_toggle(yaml_path: Path, tier: str) -> None:
"""Toggle in Settings → System → Deployment expander."""
eligible = _DEMO_MODE or can_use(tier, "vue_ui_beta")
if not eligible:
return
try:
profile = UserProfile(yaml_path)
current = profile.ui_preference
except Exception:
# UI components must not crash the app — silent fallback to default
current = "streamlit"
options = ["streamlit", "vue"]
labels = ["Classic (Streamlit)", "✨ New UI (Vue, Beta)"]
current_idx = options.index(current) if current in options else 0
st.markdown("**UI Version**")
chosen = st.radio(
"UI Version",
options=labels,
index=current_idx,
key="_ui_toggle_radio",
label_visibility="collapsed",
)
chosen_val = options[labels.index(chosen)]
if chosen_val != current:
switch_ui(yaml_path, to=chosen_val, tier=tier)

View file

@ -457,11 +457,6 @@ elif step == 5:
from app.wizard.step_inference import validate
st.subheader("Step 5 \u2014 Inference & API Keys")
st.info(
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
"Peregrine auto-detects it, no config file needed. "
"Or use the fields below to configure API keys and endpoints."
)
profile = saved_yaml.get("inference_profile", "remote")
if profile == "remote":
@ -471,18 +466,8 @@ elif step == 5:
placeholder="https://api.together.xyz/v1")
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
key="oai_key") if openai_url else ""
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
placeholder="http://localhost:11434",
key="ollama_host_input")
ollama_model = st.text_input("Ollama model (optional)",
value="llama3.2:3b",
key="ollama_model_input")
else:
st.info(f"Local mode ({profile}): Ollama provides inference.")
import os
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
if _ollama_host_env:
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
anthropic_key = openai_url = openai_key = ""
with st.expander("Advanced \u2014 Service Ports & Hosts"):
@ -561,14 +546,6 @@ elif step == 5:
if anthropic_key or openai_url:
env_path.write_text("\n".join(env_lines) + "\n")
if profile == "remote":
if ollama_host:
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
if ollama_model:
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
if ollama_host or ollama_model:
env_path.write_text("\n".join(env_lines) + "\n")
_save_yaml({"services": svc, "wizard_step": 5})
st.session_state.wizard_step = 6
st.rerun()
@ -654,7 +631,7 @@ elif step == 6:
)
default_profile = {
"name": "default",
"titles": titles,
"job_titles": titles,
"locations": locations,
"remote_only": False,
"boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],

View file

@ -12,15 +12,12 @@ from scripts.db import (
DEFAULT_DB, init_db, get_jobs_by_status, update_job_status,
update_cover_letter, mark_applied, get_email_leads,
)
from app.cloud_session import resolve_session, get_db_path
resolve_session("peregrine")
st.title("📋 Job Review")
init_db(get_db_path())
init_db(DEFAULT_DB)
_email_leads = get_email_leads(get_db_path())
_email_leads = get_email_leads(DEFAULT_DB)
# ── Sidebar filters ────────────────────────────────────────────────────────────
with st.sidebar:
@ -40,7 +37,7 @@ with st.sidebar:
index=0,
)
jobs = get_jobs_by_status(get_db_path(), show_status)
jobs = get_jobs_by_status(DEFAULT_DB, show_status)
if remote_only:
jobs = [j for j in jobs if j.get("is_remote")]
@ -89,11 +86,11 @@ if show_status == "pending" and _email_leads:
with right_l:
if st.button("✅ Approve", key=f"el_approve_{lead_id}",
type="primary", use_container_width=True):
update_job_status(get_db_path(), [lead_id], "approved")
update_job_status(DEFAULT_DB, [lead_id], "approved")
st.rerun()
if st.button("❌ Reject", key=f"el_reject_{lead_id}",
use_container_width=True):
update_job_status(get_db_path(), [lead_id], "rejected")
update_job_status(DEFAULT_DB, [lead_id], "rejected")
st.rerun()
st.divider()
@ -165,7 +162,7 @@ for job in jobs:
)
save_col, _ = st.columns([2, 5])
if save_col.button("💾 Save draft", key=f"save_cl_{job_id}"):
update_cover_letter(get_db_path(), job_id, st.session_state[_cl_key])
update_cover_letter(DEFAULT_DB, job_id, st.session_state[_cl_key])
st.success("Saved!")
# Applied date + cover letter preview (applied/synced)
@ -185,11 +182,11 @@ for job in jobs:
if show_status == "pending":
if st.button("✅ Approve", key=f"approve_{job_id}",
type="primary", use_container_width=True):
update_job_status(get_db_path(), [job_id], "approved")
update_job_status(DEFAULT_DB, [job_id], "approved")
st.rerun()
if st.button("❌ Reject", key=f"reject_{job_id}",
use_container_width=True):
update_job_status(get_db_path(), [job_id], "rejected")
update_job_status(DEFAULT_DB, [job_id], "rejected")
st.rerun()
elif show_status == "approved":
@ -201,6 +198,6 @@ for job in jobs:
use_container_width=True):
cl_text = st.session_state.get(f"cl_{job_id}", "")
if cl_text:
update_cover_letter(get_db_path(), job_id, cl_text)
mark_applied(get_db_path(), [job_id])
update_cover_letter(DEFAULT_DB, job_id, cl_text)
mark_applied(DEFAULT_DB, [job_id])
st.rerun()

View file

@ -323,26 +323,6 @@ with tab_search:
_run_suggest = st.button("✨ Suggest", key="sp_suggest_btn",
help="Ask the LLM to suggest additional titles and smarter exclude keywords — using your blocklist, mission values, and career background.")
_title_sugg_count = len((st.session_state.get("_sp_suggestions") or {}).get("suggested_titles", []))
if _title_sugg_count:
st.markdown(f"""<style>
@keyframes _pg_arrow_float {{
0%, 100% {{
transform: translateY(0px);
filter: drop-shadow(0 0 2px #4fc3f7);
}}
50% {{
transform: translateY(4px);
filter: drop-shadow(0 0 8px #4fc3f7);
}}
}}
/* Target the expand-arrow SVG inside the multiselect dropdown indicator */
.stMultiSelect [data-baseweb="select"] > div + div svg {{
animation: _pg_arrow_float 1.3s ease-in-out infinite;
cursor: pointer;
}}
</style>""", unsafe_allow_html=True)
st.multiselect(
"Job titles",
options=st.session_state.get("_sp_title_options", p.get("titles", [])),
@ -350,14 +330,6 @@ with tab_search:
help="Select from known titles. Suggestions from ✨ Suggest appear here — pick the ones you want.",
label_visibility="collapsed",
)
if _title_sugg_count:
st.markdown(
f'<div style="font-size:0.8em; color:#4fc3f7; margin-top:-10px; margin-bottom:4px;">'
f'&nbsp;↑&nbsp;{_title_sugg_count} new suggestion{"s" if _title_sugg_count != 1 else ""} '
f'added — open the dropdown to browse</div>',
unsafe_allow_html=True,
)
_add_t_col, _add_t_btn = st.columns([5, 1])
with _add_t_col:
st.text_input("Add a title", key="_sp_new_title", label_visibility="collapsed",
@ -401,31 +373,21 @@ with tab_search:
with st.spinner("Asking LLM for suggestions…"):
try:
suggestions = _suggest_search_terms(_current_titles, RESUME_PATH, _blocklist, _user_profile)
except Exception as _e:
_err_msg = str(_e)
if "exhausted" in _err_msg.lower() or isinstance(_e, RuntimeError):
except RuntimeError as _e:
st.warning(
f"No LLM backend available: {_err_msg}. "
f"No LLM backend available: {_e}. "
"Check that Ollama is running and has GPU access, or enable a cloud backend in Settings → System → LLM.",
icon="⚠️",
)
else:
st.error(f"Suggestion failed: {_err_msg}", icon="🚨")
suggestions = None
if suggestions is not None:
# Add suggested titles to options list (not auto-selected — user picks from dropdown)
_opts = list(st.session_state.get("_sp_title_options", []))
_new_titles = [_t for _t in suggestions.get("suggested_titles", []) if _t not in _opts]
_opts.extend(_new_titles)
for _t in suggestions.get("suggested_titles", []):
if _t not in _opts:
_opts.append(_t)
st.session_state["_sp_title_options"] = _opts
st.session_state["_sp_suggestions"] = suggestions
if not _new_titles and not suggestions.get("suggested_excludes"):
_resume_hint = " Upload your resume in Settings → Resume Profile for better results." if not RESUME_PATH.exists() else ""
st.info(
f"No new suggestions found — the LLM didn't generate anything new for these titles.{_resume_hint}",
icon="",
)
else:
st.rerun()
if st.session_state.get("_sp_suggestions"):
@ -851,13 +813,6 @@ with tab_resume:
kw_current: list[str] = kw_data.get(kw_category, [])
kw_suggestions = _load_sugg(kw_category)
# If a custom tag was added last render, clear the multiselect's session
# state key NOW (before the widget is created) so Streamlit uses `default`
# instead of the stale session state that lacks the new tag.
_reset_key = f"_kw_reset_{kw_category}"
if st.session_state.pop(_reset_key, False):
st.session_state.pop(f"kw_ms_{kw_category}", None)
# Merge: suggestions first, then any custom tags not in suggestions
kw_custom = [t for t in kw_current if t not in kw_suggestions]
kw_options = kw_suggestions + kw_custom
@ -878,7 +833,6 @@ with tab_resume:
label_visibility="collapsed",
placeholder=f"Custom: {kw_placeholder}",
)
_tag_just_added = False
if kw_btn_col.button("", key=f"kw_add_{kw_category}", help="Add custom tag"):
cleaned = _filter_tag(kw_raw)
if cleaned is None:
@ -886,19 +840,13 @@ with tab_resume:
elif cleaned in kw_options:
st.info(f"'{cleaned}' is already in the list — select it above.")
else:
# Save to YAML and set a reset flag so the multiselect session
# state is cleared before the widget renders on the next rerun,
# allowing `default` (which includes the new tag) to take effect.
# Persist custom tag: add to YAML and session state so it appears in options
kw_new_list = kw_selected + [cleaned]
st.session_state[_reset_key] = True
kw_data[kw_category] = kw_new_list
kw_changed = True
_tag_just_added = True
# Detect multiselect changes. Skip when a tag was just added — the change
# detection would otherwise overwrite kw_data with the old kw_selected
# (which doesn't include the new tag) in the same render.
if not _tag_just_added and sorted(kw_selected) != sorted(kw_current):
# Detect multiselect changes
if sorted(kw_selected) != sorted(kw_current):
kw_data[kw_category] = kw_selected
kw_changed = True
@ -1051,11 +999,6 @@ with tab_system:
_env_path.write_text("\n".join(_env_lines) + "\n")
st.success("Deployment settings saved. Run `./manage.sh restart` to apply.")
st.divider()
from app.components.ui_switcher import render_settings_toggle as _render_ui_toggle
_ui_tier = _profile.tier if _profile else "free"
_render_ui_toggle(yaml_path=_USER_YAML, tier=_ui_tier)
st.divider()
# ── LLM Backends ─────────────────────────────────────────────────────────

View file

@ -15,28 +15,28 @@ import streamlit.components.v1 as components
import yaml
from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
from scripts.db import (
DEFAULT_DB, init_db, get_jobs_by_status,
update_cover_letter, mark_applied, update_job_status,
get_task_for_job,
)
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path, get_config_dir
from app.cloud_session import resolve_session, get_db_path
from app.telemetry import log_usage_event
DOCS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
RESUME_YAML = Path(__file__).parent.parent.parent / "config" / "plain_text_resume.yaml"
st.title("🚀 Apply Workspace")
resolve_session("peregrine")
init_db(get_db_path())
_CONFIG_DIR = get_config_dir()
_USER_YAML = _CONFIG_DIR / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
DOCS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
RESUME_YAML = _CONFIG_DIR / "plain_text_resume.yaml"
# ── PDF generation ─────────────────────────────────────────────────────────────
def _make_cover_letter_pdf(job: dict, cover_letter: str, output_dir: Path) -> Path:
from reportlab.lib.pagesizes import letter

View file

@ -36,9 +36,6 @@ from scripts.db import (
get_unread_stage_signals, dismiss_stage_signal,
)
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path
resolve_session("peregrine")
_CONFIG_DIR = Path(__file__).parent.parent.parent / "config"
_CALENDAR_INTEGRATIONS = ("apple_calendar", "google_calendar")
@ -49,23 +46,23 @@ _calendar_connected = any(
st.title("🎯 Interviews")
init_db(get_db_path())
init_db(DEFAULT_DB)
# ── Sidebar: Email sync ────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("### 📧 Email Sync")
_email_task = get_task_for_job(get_db_path(), "email_sync", 0)
_email_task = get_task_for_job(DEFAULT_DB, "email_sync", 0)
_email_running = _email_task and _email_task["status"] in ("queued", "running")
if st.button("🔄 Sync Emails", use_container_width=True, type="primary",
disabled=bool(_email_running)):
submit_task(get_db_path(), "email_sync", 0)
submit_task(DEFAULT_DB, "email_sync", 0)
st.rerun()
if _email_running:
@st.fragment(run_every=4)
def _email_sidebar_status():
t = get_task_for_job(get_db_path(), "email_sync", 0)
t = get_task_for_job(DEFAULT_DB, "email_sync", 0)
if t and t["status"] in ("queued", "running"):
st.info("⏳ Syncing…")
else:
@ -102,7 +99,7 @@ STAGE_NEXT_LABEL = {
}
# ── Data ──────────────────────────────────────────────────────────────────────
jobs_by_stage = get_interview_jobs(get_db_path())
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _days_ago(date_str: str | None) -> str:
@ -123,8 +120,8 @@ def _days_ago(date_str: str | None) -> str:
def _research_modal(job: dict) -> None:
job_id = job["id"]
st.caption(f"**{job.get('company')}** — {job.get('title')}")
research = get_research(get_db_path(), job_id=job_id)
task = get_task_for_job(get_db_path(), "company_research", job_id)
research = get_research(DEFAULT_DB, job_id=job_id)
task = get_task_for_job(DEFAULT_DB, "company_research", job_id)
running = task and task["status"] in ("queued", "running")
if running:
@ -147,7 +144,7 @@ def _research_modal(job: dict) -> None:
"inaccuracies. SearXNG is now available — re-run to get verified facts."
)
if st.button("🔄 Re-run with live data", key=f"modal_rescrape_{job_id}", type="primary"):
submit_task(get_db_path(), "company_research", job_id)
submit_task(DEFAULT_DB, "company_research", job_id)
st.rerun()
st.divider()
else:
@ -163,14 +160,14 @@ def _research_modal(job: dict) -> None:
)
st.markdown(research["raw_output"])
if st.button("🔄 Refresh", key=f"modal_regen_{job_id}", disabled=bool(running)):
submit_task(get_db_path(), "company_research", job_id)
submit_task(DEFAULT_DB, "company_research", job_id)
st.rerun()
else:
st.info("No research brief yet.")
if task and task["status"] == "failed":
st.error(f"Last attempt failed: {task.get('error', '')}")
if st.button("🔬 Generate now", key=f"modal_gen_{job_id}"):
submit_task(get_db_path(), "company_research", job_id)
submit_task(DEFAULT_DB, "company_research", job_id)
st.rerun()
@ -178,7 +175,7 @@ def _research_modal(job: dict) -> None:
def _email_modal(job: dict) -> None:
job_id = job["id"]
st.caption(f"**{job.get('company')}** — {job.get('title')}")
contacts = get_contacts(get_db_path(), job_id=job_id)
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
if not contacts:
st.info("No emails logged yet. Use the form below to add one.")
@ -249,7 +246,7 @@ def _email_modal(job: dict) -> None:
body_text = st.text_area("Body / notes", height=80, key=f"body_modal_{job_id}")
if st.form_submit_button("📧 Save contact"):
add_contact(
get_db_path(), job_id=job_id,
DEFAULT_DB, job_id=job_id,
direction=direction, subject=subject,
from_addr=from_addr, body=body_text, received_at=recv_at,
)
@ -258,7 +255,7 @@ def _email_modal(job: dict) -> None:
def _render_card(job: dict, stage: str, compact: bool = False) -> None:
"""Render a single job card appropriate for the given stage."""
job_id = job["id"]
contacts = get_contacts(get_db_path(), job_id=job_id)
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
last_contact = contacts[-1] if contacts else None
with st.container(border=True):
@ -281,7 +278,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
format="YYYY-MM-DD",
)
if st.form_submit_button("📅 Save date"):
set_interview_date(get_db_path(), job_id=job_id, date_str=str(new_date))
set_interview_date(DEFAULT_DB, job_id=job_id, date_str=str(new_date))
st.success("Saved!")
st.rerun()
@ -291,7 +288,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
_cal_label = "🔄 Update Calendar" if _has_event else "📅 Add to Calendar"
if st.button(_cal_label, key=f"cal_push_{job_id}", use_container_width=True):
from scripts.calendar_push import push_interview_event
result = push_interview_event(get_db_path(), job_id=job_id, config_dir=_CONFIG_DIR)
result = push_interview_event(DEFAULT_DB, job_id=job_id, config_dir=_CONFIG_DIR)
if result["ok"]:
st.success(f"Event {'updated' if _has_event else 'added'} ({result['provider'].replace('_', ' ').title()})")
st.rerun()
@ -300,7 +297,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
if not compact:
if stage in ("applied", "phone_screen", "interviewing"):
signals = get_unread_stage_signals(get_db_path(), job_id=job_id)
signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
if signals:
sig = signals[-1]
_SIGNAL_TO_STAGE = {
@ -321,23 +318,23 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
if sig["stage_signal"] == "rejected":
if b1.button("✗ Reject", key=f"sig_rej_{sig['id']}",
use_container_width=True):
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
dismiss_stage_signal(get_db_path(), sig["id"])
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
dismiss_stage_signal(DEFAULT_DB, sig["id"])
st.rerun(scope="app")
elif target_stage and b1.button(
f"{target_label}", key=f"sig_adv_{sig['id']}",
use_container_width=True, type="primary",
):
if target_stage == "phone_screen" and stage == "applied":
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
submit_task(get_db_path(), "company_research", job_id)
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
submit_task(DEFAULT_DB, "company_research", job_id)
elif target_stage:
advance_to_stage(get_db_path(), job_id=job_id, stage=target_stage)
dismiss_stage_signal(get_db_path(), sig["id"])
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
dismiss_stage_signal(DEFAULT_DB, sig["id"])
st.rerun(scope="app")
if b2.button("Dismiss", key=f"sig_dis_{sig['id']}",
use_container_width=True):
dismiss_stage_signal(get_db_path(), sig["id"])
dismiss_stage_signal(DEFAULT_DB, sig["id"])
st.rerun()
# Advance / Reject buttons
@ -349,16 +346,16 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
f"{next_label}", key=f"adv_{job_id}",
use_container_width=True, type="primary",
):
advance_to_stage(get_db_path(), job_id=job_id, stage=next_stage)
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=next_stage)
if next_stage == "phone_screen":
submit_task(get_db_path(), "company_research", job_id)
submit_task(DEFAULT_DB, "company_research", job_id)
st.rerun(scope="app") # full rerun — card must appear in new column
if c2.button(
"✗ Reject", key=f"rej_{job_id}",
use_container_width=True,
):
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
st.rerun() # fragment-scope rerun — card disappears without scroll-to-top
if job.get("url"):
@ -388,7 +385,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
@st.fragment
def _card_fragment(job_id: int, stage: str) -> None:
"""Re-fetches the job on each fragment rerun; renders nothing if moved/rejected."""
job = get_job_by_id(get_db_path(), job_id)
job = get_job_by_id(DEFAULT_DB, job_id)
if job is None or job.get("status") != stage:
return
_render_card(job, stage)
@ -397,11 +394,11 @@ def _card_fragment(job_id: int, stage: str) -> None:
@st.fragment
def _pre_kanban_row_fragment(job_id: int) -> None:
"""Pre-kanban compact row for applied and survey-stage jobs."""
job = get_job_by_id(get_db_path(), job_id)
job = get_job_by_id(DEFAULT_DB, job_id)
if job is None or job.get("status") not in ("applied", "survey"):
return
stage = job["status"]
contacts = get_contacts(get_db_path(), job_id=job_id)
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
last_contact = contacts[-1] if contacts else None
with st.container(border=True):
@ -417,7 +414,7 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
_email_modal(job)
# Stage signal hint (email-detected next steps)
signals = get_unread_stage_signals(get_db_path(), job_id=job_id)
signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
if signals:
sig = signals[-1]
_SIGNAL_TO_STAGE = {
@ -440,15 +437,15 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
use_container_width=True, type="primary",
):
if target_stage == "phone_screen":
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
submit_task(get_db_path(), "company_research", job_id)
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
submit_task(DEFAULT_DB, "company_research", job_id)
else:
advance_to_stage(get_db_path(), job_id=job_id, stage=target_stage)
dismiss_stage_signal(get_db_path(), sig["id"])
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
dismiss_stage_signal(DEFAULT_DB, sig["id"])
st.rerun(scope="app")
if s2.button("Dismiss", key=f"sig_dis_pre_{sig['id']}",
use_container_width=True):
dismiss_stage_signal(get_db_path(), sig["id"])
dismiss_stage_signal(DEFAULT_DB, sig["id"])
st.rerun()
with right:
@ -456,24 +453,24 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
"→ 📞 Phone Screen", key=f"adv_pre_{job_id}",
use_container_width=True, type="primary",
):
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
submit_task(get_db_path(), "company_research", job_id)
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
submit_task(DEFAULT_DB, "company_research", job_id)
st.rerun(scope="app")
col_a, col_b = st.columns(2)
if stage == "applied" and col_a.button(
"📋 Survey", key=f"to_survey_{job_id}", use_container_width=True,
):
advance_to_stage(get_db_path(), job_id=job_id, stage="survey")
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="survey")
st.rerun(scope="app")
if col_b.button("✗ Reject", key=f"rej_pre_{job_id}", use_container_width=True):
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
st.rerun()
@st.fragment
def _hired_card_fragment(job_id: int) -> None:
"""Compact hired job card — shown in the Offer/Hired column."""
job = get_job_by_id(get_db_path(), job_id)
job = get_job_by_id(DEFAULT_DB, job_id)
if job is None or job.get("status") != "hired":
return
with st.container(border=True):

View file

@ -25,14 +25,11 @@ from scripts.db import (
get_task_for_job,
)
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path
resolve_session("peregrine")
init_db(get_db_path())
init_db(DEFAULT_DB)
# ── Job selection ─────────────────────────────────────────────────────────────
jobs_by_stage = get_interview_jobs(get_db_path())
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
active_stages = ["phone_screen", "interviewing", "offer"]
active_jobs = [
j for stage in active_stages
@ -103,10 +100,10 @@ col_prep, col_context = st.columns([2, 3])
# ════════════════════════════════════════════════
with col_prep:
research = get_research(get_db_path(), job_id=selected_id)
research = get_research(DEFAULT_DB, job_id=selected_id)
# Refresh / generate research
_res_task = get_task_for_job(get_db_path(), "company_research", selected_id)
_res_task = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
_res_running = _res_task and _res_task["status"] in ("queued", "running")
if not research:
@ -115,13 +112,13 @@ with col_prep:
if _res_task and _res_task["status"] == "failed":
st.error(f"Last attempt failed: {_res_task.get('error', '')}")
if st.button("🔬 Generate research brief", type="primary", use_container_width=True):
submit_task(get_db_path(), "company_research", selected_id)
submit_task(DEFAULT_DB, "company_research", selected_id)
st.rerun()
if _res_running:
@st.fragment(run_every=3)
def _res_status_initial():
t = get_task_for_job(get_db_path(), "company_research", selected_id)
t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
if t and t["status"] in ("queued", "running"):
stage = t.get("stage") or ""
lbl = "Queued…" if t["status"] == "queued" else (stage or "Generating… this may take 3060 seconds")
@ -136,13 +133,13 @@ with col_prep:
col_ts, col_btn = st.columns([3, 1])
col_ts.caption(f"Research generated: {generated_at}")
if col_btn.button("🔄 Refresh", use_container_width=True, disabled=bool(_res_running)):
submit_task(get_db_path(), "company_research", selected_id)
submit_task(DEFAULT_DB, "company_research", selected_id)
st.rerun()
if _res_running:
@st.fragment(run_every=3)
def _res_status_refresh():
t = get_task_for_job(get_db_path(), "company_research", selected_id)
t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
if t and t["status"] in ("queued", "running"):
stage = t.get("stage") or ""
lbl = "Queued…" if t["status"] == "queued" else (stage or "Refreshing research…")
@ -314,7 +311,7 @@ with col_context:
st.markdown(job.get("description") or "_No description saved for this listing._")
with tab_emails:
contacts = get_contacts(get_db_path(), job_id=selected_id)
contacts = get_contacts(DEFAULT_DB, job_id=selected_id)
if not contacts:
st.info("No contacts logged yet. Use the Interviews page to log emails.")
else:

View file

@ -22,13 +22,10 @@ from scripts.db import (
insert_survey_response, get_survey_responses,
)
from scripts.llm_router import LLMRouter
from app.cloud_session import resolve_session, get_db_path
resolve_session("peregrine")
st.title("📋 Survey Assistant")
init_db(get_db_path())
init_db(DEFAULT_DB)
# ── Vision service health check ────────────────────────────────────────────────
@ -43,7 +40,7 @@ def _vision_available() -> bool:
vision_up = _vision_available()
# ── Job selector ───────────────────────────────────────────────────────────────
jobs_by_stage = get_interview_jobs(get_db_path())
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
survey_jobs = jobs_by_stage.get("survey", [])
other_jobs = (
jobs_by_stage.get("applied", []) +
@ -64,7 +61,7 @@ selected_job_id = st.selectbox(
format_func=lambda jid: job_labels[jid],
index=0,
)
selected_job = get_job_by_id(get_db_path(), selected_job_id)
selected_job = get_job_by_id(DEFAULT_DB, selected_job_id)
# ── LLM prompt builders ────────────────────────────────────────────────────────
_SURVEY_SYSTEM = (
@ -239,7 +236,7 @@ with right_col:
image_path = str(img_file)
insert_survey_response(
get_db_path(),
DEFAULT_DB,
job_id=selected_job_id,
survey_name=survey_name,
source=source,
@ -259,7 +256,7 @@ with right_col:
# ── History ────────────────────────────────────────────────────────────────────
st.divider()
st.subheader("📂 Response History")
history = get_survey_responses(get_db_path(), job_id=selected_job_id)
history = get_survey_responses(DEFAULT_DB, job_id=selected_job_id)
if not history:
st.caption("No saved responses for this job yet.")

Binary file not shown.

Before

Width:  |  Height:  |  Size: 298 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 276 KiB

View file

@ -1,7 +1,7 @@
"""
Tier definitions and feature gates for Peregrine.
Tiers: free < paid < premium < ultra (ultra reserved; no Peregrine features use it yet)
Tiers: free < paid < premium
FEATURES maps feature key minimum tier required.
Features not in FEATURES are available to all tiers (free).
@ -22,14 +22,9 @@ Features that stay gated even with BYOK:
"""
from __future__ import annotations
import os as _os
from pathlib import Path
from circuitforge_core.tiers import (
can_use as _core_can_use,
TIERS,
tier_label as _core_tier_label,
)
TIERS = ["free", "paid", "premium"]
# Maps feature key → minimum tier string required.
# Features absent from this dict are free (available to all).
@ -49,7 +44,6 @@ FEATURES: dict[str, str] = {
"company_research": "paid",
"interview_prep": "paid",
"survey_assistant": "paid",
"llm_reply_draft": "paid",
# Orchestration / infrastructure — stays gated
"email_classifier": "paid",
@ -64,9 +58,6 @@ FEATURES: dict[str, str] = {
"google_calendar_sync": "paid",
"apple_calendar_sync": "paid",
"slack_notifications": "paid",
# Beta UI access — open to all tiers (access management, not compute)
"vue_ui_beta": "free",
}
# Features that unlock when the user supplies any LLM backend (local or BYOK).
@ -82,16 +73,8 @@ BYOK_UNLOCKABLE: frozenset[str] = frozenset({
"company_research",
"interview_prep",
"survey_assistant",
"llm_reply_draft",
})
# Demo mode flag — read from environment at module load time.
# Allows demo toolbar to override tier without accessing st.session_state (thread-safe).
# _DEMO_MODE is immutable after import for the process lifetime.
# DEMO_MODE must be set in the environment before the process starts (e.g., via
# Docker Compose environment:). Runtime toggling is not supported.
_DEMO_MODE = _os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
# Free integrations (not in FEATURES):
# google_drive_sync, dropbox_sync, onedrive_sync, mega_sync,
# nextcloud_sync, discord_notifications, home_assistant
@ -118,40 +101,34 @@ def has_configured_llm(config_path: Path | None = None) -> bool:
return False
def can_use(
tier: str,
feature: str,
has_byok: bool = False,
*,
demo_tier: str | None = None,
) -> bool:
def can_use(tier: str, feature: str, has_byok: bool = False) -> bool:
"""Return True if the given tier has access to the feature.
has_byok: pass has_configured_llm() to unlock BYOK_UNLOCKABLE features
for users who supply their own LLM backend regardless of tier.
demo_tier: when set AND _DEMO_MODE is True, substitutes for `tier`.
Read from st.session_state by the *caller*, not here keeps
this function thread-safe for background tasks and tests.
Returns True for unknown features (not gated).
Returns False for unknown/invalid tier strings.
"""
effective_tier = demo_tier if (demo_tier is not None and _DEMO_MODE) else tier
# Pass Peregrine's BYOK_UNLOCKABLE via has_byok collapse — core's frozenset is empty
required = FEATURES.get(feature)
if required is None:
return True # not gated — available to all
if has_byok and feature in BYOK_UNLOCKABLE:
return True
return _core_can_use(feature, effective_tier, _features=FEATURES)
try:
return TIERS.index(tier) >= TIERS.index(required)
except ValueError:
return False # invalid tier string
def tier_label(feature: str, has_byok: bool = False) -> str:
"""Return a display label for a locked feature, or '' if free/unlocked."""
if has_byok and feature in BYOK_UNLOCKABLE:
return ""
raw = _core_tier_label(feature, _features=FEATURES)
if not raw or raw == "free":
required = FEATURES.get(feature)
if required is None:
return ""
return "🔒 Paid" if raw == "paid" else "⭐ Premium"
return "🔒 Paid" if required == "paid" else "⭐ Premium"
def effective_tier(

View file

@ -6,59 +6,39 @@
# Caddy injects the Directus session cookie as X-CF-Session header before forwarding.
# cloud_session.py resolves user_id → per-user db_path at session init.
#
# Services: api (FastAPI :8601), web (Vue :8508), searxng (internal)
# Streamlit app service removed — Vue+FastAPI is the only frontend (peregrine#104).
#
# Usage:
# docker compose -f compose.cloud.yml --project-name peregrine-cloud up -d
# docker compose -f compose.cloud.yml --project-name peregrine-cloud down
# docker compose -f compose.cloud.yml --project-name peregrine-cloud logs api -f
# docker compose -f compose.cloud.yml --project-name peregrine-cloud logs app -f
services:
api:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
command: >
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
app:
build: .
container_name: peregrine-cloud
ports:
- "8601:8601" # LAN-accessible — Caddy gates the public route; Kuma monitors this port directly
- "8505:8501"
volumes:
- /devl/menagerie-data:/devl/menagerie-data
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro
- /devl/menagerie-data:/devl/menagerie-data # per-user data trees
environment:
- CLOUD_MODE=true
- CLOUD_DATA_ROOT=/devl/menagerie-data
- STAGING_DB=/devl/menagerie-data/cloud-default.db
- DIRECTUS_JWT_SECRET=${DIRECTUS_JWT_SECRET}
- CF_SERVER_SECRET=${CF_SERVER_SECRET}
- PLATFORM_DB_URL=${PLATFORM_DB_URL}
- HEIMDALL_URL=${HEIMDALL_URL:-http://cf-license:8000}
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
- STAGING_DB=/devl/menagerie-data/cloud-default.db # fallback only — never used
- DOCS_DIR=/tmp/cloud-docs
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
- PYTHONUNBUFFERED=1
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
- CF_APP_NAME=peregrine
- DEMO_MODE=false
depends_on:
searxng:
condition: service_healthy
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
web:
build:
context: .
dockerfile: docker/web/Dockerfile
args:
VITE_BASE_PATH: /peregrine/
ports:
- "8508:80"
depends_on:
- api
restart: unless-stopped
# cf-orch-agent: not needed in cloud — a host-native agent already runs on :7701
# and is registered with the coordinator. app/api reach it via CF_ORCH_URL.
searxng:
image: searxng/searxng:latest
volumes:

View file

@ -15,21 +15,19 @@
services:
api:
app:
build: .
command: >
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
ports:
- "8504:8501"
volumes:
- ./demo/config:/app/config
- ./demo:/app/demo:ro # seed.sql lives here; read-only
# /app/data is tmpfs — ephemeral, resets on every container start
tmpfs:
- /app/data
- ./demo/data:/app/data
# No /docs mount — demo has no personal documents
environment:
- DEMO_MODE=true
- STAGING_DB=/app/data/staging.db
- DEMO_SEED_FILE=/app/demo/seed.sql
- DOCS_DIR=/tmp/demo-docs
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
- PYTHONUNBUFFERED=1
- PYTHONLOGGING=WARNING
# No API keys — inference is blocked by DEMO_MODE before any key is needed
@ -39,19 +37,6 @@ services:
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
# No host port — nginx proxies /api/ → api:8601 internally
web:
build:
context: .
dockerfile: docker/web/Dockerfile
args:
VITE_BASE_PATH: /peregrine/
ports:
- "8504:80" # demo.circuitforge.tech/peregrine* → host:8504
depends_on:
- api
restart: unless-stopped
searxng:
image: searxng/searxng:latest

View file

@ -1,36 +0,0 @@
# compose.test-cfcore.yml — single-user test instance for circuitforge-core integration
#
# Run from the PARENT directory of peregrine/ (the build context must include
# both peregrine/ and circuitforge-core/ as siblings):
#
# cd /devl (or /Library/Development/CircuitForge on dev)
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test up -d
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test logs -f
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test down
#
# UI: http://localhost:8516
# Purpose: smoke-test circuitforge-core shims (db, llm_router, tiers, task_scheduler)
# before promoting cfcore integration to the production cloud instance.
services:
app:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
container_name: peregrine-test-cfcore
ports:
- "8516:8501"
volumes:
- /devl/job-seeker:/devl/job-seeker
- /devl/job-seeker/config:/app/config
- /devl/job-seeker/config/llm.docker.yaml:/app/config/llm.yaml:ro
- /devl/job-seeker/config/user.docker.yaml:/app/config/user.yaml:ro
environment:
- STAGING_DB=/devl/job-seeker/staging.db
- PYTHONUNBUFFERED=1
- STREAMLIT_SERVER_BASE_URL_PATH=
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
extra_hosts:
- "host.docker.internal:host-gateway"
restart: "no"

View file

@ -1,17 +1,23 @@
# compose.yml — Peregrine by Circuit Forge LLC
# Streamlit (app service) removed — Vue+FastAPI is the only frontend (#104)
# Profiles: remote | cpu | single-gpu | dual-gpu-ollama | dual-gpu-vllm | dual-gpu-mixed
services:
api:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
app:
build: .
command: >
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
bash -c "streamlit run app/app.py
--server.port=8501
--server.headless=true
--server.fileWatcherType=none
2>&1 | tee /app/data/.streamlit.log"
ports:
- "${STREAMLIT_PORT:-8501}:8501"
volumes:
- ./config:/app/config
- ./data:/app/data
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
- /var/run/docker.sock:/var/run/docker.sock
- /usr/bin/docker:/usr/bin/docker:ro
environment:
- STAGING_DB=/app/data/staging.db
- DOCS_DIR=/docs
@ -20,24 +26,20 @@ services:
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
- GPU_SERVER_URL=${GPU_SERVER_URL:-${CF_ORCH_URL:-http://host.docker.internal:7700}}
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
- CF_APP_NAME=peregrine
- RECOMMENDED_PROFILE=${RECOMMENDED_PROFILE:-remote}
- STREAMLIT_SERVER_BASE_URL_PATH=${STREAMLIT_BASE_URL_PATH:-}
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
- FORGEJO_REPO=${FORGEJO_REPO:-}
- FORGEJO_API_URL=${FORGEJO_API_URL:-}
- PYTHONUNBUFFERED=1
- PYTHONLOGGING=WARNING
depends_on:
searxng:
condition: service_healthy
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
web:
build:
context: .
dockerfile: docker/web/Dockerfile
ports:
- "${VUE_PORT:-8506}:80"
depends_on:
- api
restart: unless-stopped
searxng:
image: searxng/searxng:latest
ports:
@ -91,29 +93,21 @@ services:
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
restart: unless-stopped
cf-orch-agent:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
command: ["/bin/sh", "/app/docker/cf-orch-agent/start.sh"]
vllm:
image: vllm/vllm-openai:latest
ports:
- "${CF_ORCH_AGENT_PORT:-7701}:7701"
environment:
- CF_ORCH_COORDINATOR_URL=${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700}
- CF_ORCH_NODE_ID=${CF_ORCH_NODE_ID:-peregrine}
- CF_ORCH_AGENT_PORT=${CF_ORCH_AGENT_PORT:-7701}
- CF_ORCH_ADVERTISE_HOST=${CF_ORCH_ADVERTISE_HOST:-}
- PYTHONUNBUFFERED=1
extra_hosts:
- "host.docker.internal:host-gateway"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
- "${VLLM_PORT:-8000}:8000"
volumes:
- ${VLLM_MODELS_DIR:-~/models/vllm}:/models
command: >
--model /models/${VLLM_MODEL:-Ouro-1.4B}
--trust-remote-code
--max-model-len 4096
--gpu-memory-utilization 0.75
--enforce-eager
--max-num-seqs 8
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
profiles: [dual-gpu-vllm, dual-gpu-mixed]
restart: unless-stopped
finetune:

View file

@ -1,23 +0,0 @@
# config/label_tool.yaml — Multi-account IMAP config for the email label tool
# Copy to config/label_tool.yaml and fill in your credentials.
# This file is gitignored.
accounts:
- name: "Gmail"
host: "imap.gmail.com"
port: 993
username: "you@gmail.com"
password: "your-app-password" # Use an App Password, not your login password
folder: "INBOX"
days_back: 90
- name: "Outlook"
host: "outlook.office365.com"
port: 993
username: "you@outlook.com"
password: "your-app-password"
folder: "INBOX"
days_back: 90
# Optional: limit emails fetched per account per run (0 = unlimited)
max_per_account: 500

View file

@ -1,72 +0,0 @@
backends:
anthropic:
api_key_env: ANTHROPIC_API_KEY
enabled: false
model: claude-sonnet-4-6
supports_images: true
type: anthropic
claude_code:
api_key: any
base_url: http://localhost:3009/v1
enabled: false
model: claude-code-terminal
supports_images: true
type: openai_compat
github_copilot:
api_key: any
base_url: http://localhost:3010/v1
enabled: false
model: gpt-4o
supports_images: false
type: openai_compat
ollama:
api_key: ollama
base_url: http://host.docker.internal:11434/v1
enabled: true
model: llama3.1:8b # generic — no personal fine-tunes in cloud
supports_images: false
type: openai_compat
ollama_research:
api_key: ollama
base_url: http://host.docker.internal:11434/v1
enabled: true
model: llama3.1:8b
supports_images: false
type: openai_compat
vision_service:
base_url: http://host.docker.internal:8002
enabled: true
supports_images: true
type: vision_service
vllm:
api_key: ''
base_url: http://host.docker.internal:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
vllm_research:
api_key: ''
base_url: http://host.docker.internal:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
fallback_order:
- vllm
- ollama
research_fallback_order:
- vllm_research
- ollama_research
vision_fallback_order:
- vision_service

View file

@ -1,11 +1,4 @@
backends:
cf_text:
api_key: any
base_url: http://host.docker.internal:8006/v1
enabled: true
model: cf-text
supports_images: false
type: openai_compat
anthropic:
api_key_env: ANTHROPIC_API_KEY
enabled: false
@ -35,13 +28,13 @@ backends:
type: openai_compat
ollama_research:
api_key: ollama
base_url: http://ollama_research:11434/v1
base_url: http://host.docker.internal:11434/v1
enabled: true
model: llama3.1:8b
model: llama3.2:3b
supports_images: false
type: openai_compat
vision_service:
base_url: http://vision:8002
base_url: http://host.docker.internal:8002
enabled: true
supports_images: true
type: vision_service
@ -52,11 +45,6 @@ backends:
model: __auto__
supports_images: false
type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
vllm_research:
api_key: ''
base_url: http://host.docker.internal:8000/v1
@ -65,7 +53,6 @@ backends:
supports_images: false
type: openai_compat
fallback_order:
- cf_text
- ollama
- claude_code
- vllm
@ -75,7 +62,6 @@ research_fallback_order:
- claude_code
- vllm_research
- ollama_research
- cf_text
- github_copilot
- anthropic
vision_fallback_order:

View file

@ -45,89 +45,6 @@ backends:
enabled: false
type: vision_service
supports_images: true
# ── cf-orch task-routed backends (preferred for GPU inference) ────────────
# Use these when GPU_SERVER_URL is configured. The coordinator resolves
# product+task → model_id → node via assignments.yaml; no model IDs needed here.
# Set enabled: true once GPU_SERVER_URL is configured.
cf_cover_letter:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1 # fallback when cf-orch is unavailable
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: cover_letter
ttl_s: 3600
cf_ats_rewrite:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: ats_rewrite
ttl_s: 3600
cf_job_research:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: job_research
ttl_s: 3600
cf_interview_prep:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: interview_prep
ttl_s: 3600
# ── cf-orch trunk services (service-based, legacy) ─────────────────────────
# Generic service allocation — use the task-routed backends above when possible.
# Set GPU_SERVER_URL (env) or url below; leave enabled: false if cf-orch is
# not deployed in your environment.
cf_text:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1 # fallback when cf-orch is not available
model: __auto__
api_key: any
supports_images: false
cf_orch:
service: cf-text
# model_candidates: leave empty to use the service's default_model,
# or specify an alias from the node's catalog (e.g. "qwen2.5-3b").
model_candidates: []
ttl_s: 3600
cf_voice:
type: openai_compat
enabled: false
base_url: http://localhost:8009/v1 # fallback when cf-orch is not available
model: __auto__
api_key: any
supports_images: false
cf_orch:
service: cf-voice
model_candidates: []
ttl_s: 3600
fallback_order:
- ollama
- claude_code

View file

@ -1,258 +0,0 @@
# Mission domain signal configuration for cover letter generation.
#
# When a job description or company name matches signals in a domain,
# the cover letter prompt injects a Para 3 hint to reflect genuine personal
# alignment. Dict order = match priority (first match wins).
#
# Users can add custom domains under `mission_preferences` in user.yaml.
# Any key in mission_preferences that is NOT listed here is treated as a
# user-defined domain: no signal detection, custom note only (skipped if
# the job description doesn't contain the key as a literal word).
#
# Schema per domain:
# signals: list[str] — lowercase keywords to scan for in "company + JD"
# default_note: str — hint injected when user has no custom note for domain
domains:
music:
signals:
- music
- spotify
- tidal
- soundcloud
- bandcamp
- apple music
- distrokid
- cd baby
- landr
- beatport
- reverb
- vinyl
- streaming
- artist
- label
- live nation
- ticketmaster
- aeg
- songkick
- concert
- venue
- festival
- audio
- podcast
- studio
- record
- musician
- playlist
default_note: >
This company is in the music industry — an industry the candidate finds genuinely
compelling. Para 3 should warmly and specifically reflect this authentic alignment,
not as a generic fan statement, but as an honest statement of where they'd love to
apply their skills.
animal_welfare:
signals:
- animal
- shelter
- rescue
- humane society
- spca
- aspca
- veterinary
- "vet "
- wildlife
- "pet "
- adoption
- foster
- dog
- cat
- feline
- canine
- sanctuary
- zoo
default_note: >
This organization works in animal welfare/rescue — a mission the candidate finds
genuinely meaningful. Para 3 should reflect this authentic connection warmly and
specifically, tying their skills to this mission.
education:
signals:
- education
- school
- learning
- student
- edtech
- classroom
- curriculum
- tutoring
- academic
- university
- kids
- children
- youth
- literacy
- khan academy
- duolingo
- chegg
- coursera
- instructure
- canvas lms
- clever
- district
- teacher
- k-12
- k12
- grade
- pedagogy
default_note: >
This company works in education or EdTech — a domain that resonates with the
candidate's values. Para 3 should reflect this authentic connection specifically
and warmly.
social_impact:
signals:
- nonprofit
- non-profit
- "501(c)"
- social impact
- mission-driven
- public benefit
- community
- underserved
- equity
- justice
- humanitarian
- advocacy
- charity
- foundation
- ngo
- social good
- civic
- public health
- mental health
- food security
- housing
- homelessness
- poverty
- workforce development
default_note: >
This organization is mission-driven / social impact focused — exactly the kind of
cause the candidate cares deeply about. Para 3 should warmly reflect their genuine
desire to apply their skills to work that makes a real difference in people's lives.
# Health listed last — genuine but lower-priority connection.
health:
signals:
- patient
- patients
- healthcare
- health tech
- healthtech
- pharma
- pharmaceutical
- clinical
- medical
- hospital
- clinic
- therapy
- therapist
- rare disease
- life sciences
- life science
- treatment
- prescription
- biotech
- biopharma
- medtech
- behavioral health
- population health
- care management
- care coordination
- oncology
- specialty pharmacy
- provider network
- payer
- health plan
- benefits administration
- ehr
- emr
- fhir
- hipaa
default_note: >
This company works in healthcare, life sciences, or patient care.
Do NOT write about the candidate's passion for pharmaceuticals or healthcare as an
industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies
exist to serve: those navigating complex, often invisible, or unusual health journeys;
patients facing rare or poorly understood conditions; individuals whose situations don't
fit a clean category. The connection is to the humans behind the data, not the industry.
If the user has provided a personal note, use that to anchor Para 3 specifically.
# Extended domains — added 2026-04-12
privacy:
signals:
- privacy
- data rights
- surveillance
- gdpr
- ccpa
- anonymity
- end-to-end encryption
- open source
- decentralized
- self-hosted
- zero knowledge
- data sovereignty
- digital rights
- eff
- electronic frontier
default_note: >
This company operates in the privacy, data rights, or digital rights space —
a domain the candidate genuinely cares about. Para 3 should reflect their
authentic belief in user autonomy and data sovereignty, not as abstract principle
but as something that shapes how they approach their work.
accessibility:
signals:
- accessibility
- assistive technology
- a11y
- wcag
- screen reader
- adaptive technology
- disability
- neurodivergent
- neurodiversity
- adhd
- autism
- inclusive design
- universal design
- accommodations
- ada compliance
default_note: >
This company works in accessibility or assistive technology — a mission the
candidate feels genuine, personal alignment with. Para 3 should reflect authentic
investment in building tools and systems that work for everyone, especially those
whose needs are most often overlooked in mainstream product development.
open_source:
signals:
- open source
- open-source
- linux foundation
- apache foundation
- free software
- gnu
- contributor
- maintainer
- upstream
- community-driven
- innersource
- copyleft
- mozilla
- wikimedia
default_note: >
This organization is rooted in open source culture — a community the candidate
actively participates in and believes in. Para 3 should reflect genuine investment
in the collaborative, transparent, and community-driven approach to building
software that lasts.

View file

@ -43,7 +43,6 @@ dev_tier_override: null # overrides tier locally (for testing only)
wizard_complete: false
wizard_step: 0
dismissed_banners: []
ui_preference: streamlit # UI preference — "streamlit" (default) or "vue" (Beta: Paid tier)
docs_dir: "~/Documents/JobSearch"
ollama_models_dir: "~/models/ollama"

View file

@ -1,11 +1,9 @@
candidate_accessibility_focus: false
candidate_lgbtq_focus: false
candidate_voice: Clear, direct, and human. Focuses on impact over jargon. Avoids
buzzwords and lets the work speak.
career_summary: 'Senior UX Designer with 6 years of experience designing for music,
education, and media products. Strong background in cross-platform design systems,
user research, and 0-to-1 feature development. Passionate about making complex
digital experiences feel effortless.
candidate_voice: Clear, direct, and human. Focuses on impact over jargon.
career_summary: 'Experienced software engineer with a background in full-stack development,
cloud infrastructure, and data pipelines. Passionate about building tools that help
people navigate complex systems.
'
dev_tier_override: null
@ -18,13 +16,13 @@ inference_profile: remote
linkedin: ''
mission_preferences:
animal_welfare: ''
education: Education technology is where design decisions have long-term impact on how people learn.
education: ''
health: ''
music: Love designing for music and audio discovery — it combines craft with genuine emotional resonance.
music: ''
social_impact: Want my work to reach people who need it most.
name: Demo User
nda_companies: []
ollama_models_dir: /root/models/ollama
ollama_models_dir: ~/models/ollama
phone: ''
services:
ollama_host: localhost
@ -41,7 +39,6 @@ services:
vllm_ssl: false
vllm_ssl_verify: true
tier: free
ui_preference: streamlit
vllm_models_dir: /root/models/vllm
vllm_models_dir: ~/models/vllm
wizard_complete: true
wizard_step: 0

View file

@ -1,259 +0,0 @@
-- jobs
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Spotify', 'https://www.linkedin.com/jobs/view/1000001', 'linkedin', 'Remote', '1', '$110k$140k', '94.0', 'approved', '2026-04-14', '2026-04-12', 'Dear Hiring Manager,
I''m excited to apply for the UX Designer role at Spotify. With five years of
experience designing for music discovery and cross-platform experiences, I''ve
consistently shipped features that make complex audio content feel effortless to
navigate. At my last role I led a redesign of the playlist creation flow that
reduced drop-off by 31%.
Spotify''s commitment to artist and listener discovery and its recent push into
audiobooks and podcast tooling aligns directly with the kind of cross-format
design challenges I''m most energised by.
I''d love to bring that focus to your product design team.
Warm regards,
[Your name]
', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Duolingo', 'https://www.linkedin.com/jobs/view/1000002', 'linkedin', 'Pittsburgh, PA', '0', '$95k$120k', '87.0', 'approved', '2026-04-13', '2026-04-10', 'Draft in progress — cover letter generating…', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Lead', 'NPR', 'https://www.indeed.com/viewjob?jk=1000003', 'indeed', 'Washington, DC', '1', '$120k$150k', '81.0', 'approved', '2026-04-12', '2026-04-08', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior UX Designer', 'Mozilla', 'https://www.linkedin.com/jobs/view/1000004', 'linkedin', 'Remote', '1', '$105k$130k', '81.0', 'pending', '2026-04-13', '2026-03-12', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Interaction Designer', 'Figma', 'https://www.indeed.com/viewjob?jk=1000005', 'indeed', 'San Francisco, CA', '1', '$115k$145k', '78.0', 'pending', '2026-04-11', '2026-04-09', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer II', 'Notion', 'https://www.linkedin.com/jobs/view/1000006', 'linkedin', 'Remote', '1', '$100k$130k', '76.0', 'pending', '2026-04-10', '2026-04-07', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Stripe', 'https://www.linkedin.com/jobs/view/1000007', 'linkedin', 'Remote', '1', '$120k$150k', '74.0', 'pending', '2026-04-09', '2026-04-06', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UI/UX Designer', 'Canva', 'https://www.indeed.com/viewjob?jk=1000008', 'indeed', 'Remote', '1', '$90k$115k', '72.0', 'pending', '2026-04-08', '2026-04-05', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior Product Designer', 'Asana', 'https://www.linkedin.com/jobs/view/1000009', 'linkedin', 'San Francisco, CA', '1', '$125k$155k', '69.0', 'pending', '2026-04-07', '2026-04-04', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Researcher', 'Intercom', 'https://www.indeed.com/viewjob?jk=1000010', 'indeed', 'Remote', '1', '$95k$120k', '67.0', 'pending', '2026-04-06', '2026-04-03', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Linear', 'https://www.linkedin.com/jobs/view/1000011', 'linkedin', 'Remote', '1', '$110k$135k', '65.0', 'pending', '2026-04-05', '2026-04-02', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Loom', 'https://www.indeed.com/viewjob?jk=1000012', 'indeed', 'Remote', '1', '$90k$110k', '62.0', 'pending', '2026-04-04', '2026-04-01', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior Product Designer', 'Asana', 'https://www.asana.com/jobs/1000013', 'linkedin', 'San Francisco, CA', '1', '$125k$155k', '91.0', 'phone_screen', '2026-04-01', '2026-03-30', NULL, '2026-04-08', '2026-04-15', NULL, NULL, NULL, '2026-04-15T14:00:00', NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Notion', 'https://www.notion.so/jobs/1000014', 'indeed', 'Remote', '1', '$100k$130k', '88.0', 'interviewing', '2026-03-25', '2026-03-23', NULL, '2026-04-01', '2026-04-05', '2026-04-12', NULL, NULL, '2026-04-22T10:00:00', NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Design Systems Designer', 'Figma', 'https://www.figma.com/jobs/1000015', 'linkedin', 'San Francisco, CA', '1', '$130k$160k', '96.0', 'hired', '2026-03-01', '2026-02-27', NULL, '2026-03-08', '2026-03-14', '2026-03-21', '2026-04-01', '2026-04-08', NULL, NULL, '{"factors":["clear_scope","great_manager","mission_aligned"],"notes":"Excited about design systems work. Salary met expectations."}');
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Slack', 'https://slack.com/jobs/1000016', 'indeed', 'Remote', '1', '$115k$140k', '79.0', 'applied', '2026-03-18', '2026-03-16', NULL, '2026-03-28', NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-- job_contacts
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (1, 'inbound', 'Excited to connect — UX Designer role at Spotify', 'jamie.chen@spotify.com', 'you@example.com', '2026-04-12', 'positive_response');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (1, 'outbound', 'Re: Excited to connect — UX Designer role at Spotify', 'you@example.com', 'jamie.chen@spotify.com', '2026-04-13', NULL);
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (13, 'inbound', 'Interview Confirmation — Senior Product Designer', 'recruiting@asana.com', 'you@example.com', '2026-04-13', 'interview_scheduled');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (14, 'inbound', 'Your panel interview is confirmed for Apr 22', 'recruiting@notion.so', 'you@example.com', '2026-04-12', 'interview_scheduled');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (14, 'inbound', 'Pre-interview prep resources', 'marcus.webb@notion.so', 'you@example.com', '2026-04-13', 'positive_response');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'inbound', 'Figma Design Systems — Offer Letter', 'offers@figma.com', 'you@example.com', '2026-04-01', 'offer_received');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'outbound', 'Re: Figma Design Systems — Offer Letter (acceptance)', 'you@example.com', 'offers@figma.com', '2026-04-05', NULL);
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'inbound', 'Welcome to Figma! Onboarding next steps', 'onboarding@figma.com', 'you@example.com', '2026-04-08', NULL);
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (16, 'inbound', 'Thanks for applying to Slack', 'noreply@slack.com', 'you@example.com', '2026-03-28', NULL);
-- references_
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Dr. Priya Nair', 'priya.nair@example.com', 'Director of Design', 'Acme Corp', 'former_manager', 'Managed me for 3 years on the consumer app redesign. Enthusiastic reference.', '["manager","design"]', 'Hi Priya,
I hope you''re doing well! I''m currently interviewing for a few senior UX roles and would be so grateful if you''d be willing to serve as a reference.
Thank you!
[Your name]');
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Sam Torres', 'sam.torres@example.com', 'Senior Product Designer', 'Acme Corp', 'former_colleague', 'Worked together on design systems. Great at speaking to collaborative process.', '["colleague","design_systems"]', NULL);
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Jordan Kim', 'jordan.kim@example.com', 'VP of Product', 'Streamline Inc', 'former_manager', 'Led the product team I was embedded in. Can speak to business impact of design work.', '["manager","product"]', NULL);
-- resumes
INSERT INTO resumes (name, source, job_id, text, struct_json, word_count, is_default) VALUES (
'Base Resume',
'uploaded',
NULL,
'ALEX RIVERA
UX Designer · Product Design · Design Systems
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
Senior UX Designer with 6 years of experience designing for music, education, and media platforms. Led 0-to-1 product work and redesigned high-traffic flows used by tens of millions of users. Deep background in user research, interaction design, and cross-platform design systems. Strong collaborator with engineering and product comfortable in ambiguity, methodical about process.
EXPERIENCE
Senior UX Designer StreamNote (2023present)
- Led redesign of the core listening queue, reducing abandonment by 31% across mobile and web
- Built and maintained a component library (Figma tokens + React) used by 8 product squads
- Ran 60+ moderated user research sessions; findings shaped 3 major product bets
- Partnered with ML team to design recommendation transparency features for power users
UX Designer EduPath (20212023)
- Designed the onboarding and early-habit loop for a K12 learning app (2.4M DAU)
- Shipped streak redesign that improved D7 retention by 18%
- Drove accessibility audit and remediation (WCAG 2.1 AA); filed and closed 47 issues
- Mentored 2 junior designers; led weekly design critique
Product Designer Signal Media (20192021)
- Designed editorial tools and reader-facing article experiences for a digital news publisher
- Prototyped and shipped a "read later" feature that became the #2 most-used feature within 90 days
- Collaborated with editorial and engineering to establish a shared component system (reduces new-story design time by 60%)
SKILLS
Figma · Prototyping · User Research · Usability Testing · Design Systems · Interaction Design
Accessibility (WCAG 2.1) · Cross-Platform (iOS/Android/Web) · React (collaboration-level) · SQL (basic)
Workshop Facilitation · Stakeholder Communication
EDUCATION
B.F.A. Graphic Design, Minor in Human-Computer Interaction State University of the Arts, 2019
SELECTED PROJECTS
Playlist Flow Redesign (StreamNote) reduced creation drop-off 31%, won internal design award
D7 Retention Streak (EduPath) +18% weekly retention; featured in company all-hands
Accessibility Audit (EduPath) full WCAG 2.1 AA remediation across iOS, Android, web',
'{"contact":{"name":"Alex Rivera","email":"alex.rivera@example.com","linkedin":"linkedin.com/in/alexrivera","portfolio":"alexrivera.design"},"summary":"Senior UX Designer with 6 years of experience designing for music, education, and media platforms.","experience":[{"company":"StreamNote","title":"Senior UX Designer","dates":"2023present","bullets":["Led redesign of core listening queue, reducing abandonment by 31%","Built component library used by 8 product squads","Ran 60+ moderated user research sessions"]},{"company":"EduPath","title":"UX Designer","dates":"20212023","bullets":["Designed onboarding and early-habit loop for K12 app (2.4M DAU)","Shipped streak redesign that improved D7 retention by 18%","Drove accessibility audit (WCAG 2.1 AA)"]},{"company":"Signal Media","title":"Product Designer","dates":"20192021","bullets":["Designed editorial tools and reader-facing article experiences","Prototyped and shipped read-later feature (top 2 used within 90 days)"]}],"education":[{"institution":"State University of the Arts","degree":"B.F.A. Graphic Design, Minor in HCI","year":"2019"}],"skills":["Figma","Prototyping","User Research","Usability Testing","Design Systems","Interaction Design","Accessibility (WCAG 2.1)","Cross-Platform","React","SQL","Workshop Facilitation"]}',
320,
1
);
-- ATS resume optimizer data for approved jobs (Spotify=1, Duolingo=2, NPR=3)
-- Spotify: gap report highlights audio/podcast tooling keywords; optimized resume tailored
UPDATE jobs SET
ats_gap_report = '[{"term":"audio UX","section":"experience","priority":3,"rationale":"Spotify''s JD emphasizes audio product experience; resume mentions music broadly but not audio-specific UX patterns"},{"term":"podcast design","section":"experience","priority":2,"rationale":"Spotify is investing heavily in podcast tooling; related experience at Signal Media could be framed around audio content"},{"term":"cross-platform mobile","section":"skills","priority":2,"rationale":"JD specifies iOS and Android explicitly; resume lists cross-platform but not mobile-first framing"},{"term":"A/B testing","section":"experience","priority":1,"rationale":"JD mentions data-driven iteration; resume does not reference experimentation framework"}]',
optimized_resume = 'ALEX RIVERA
UX Designer · Audio Product · Cross-Platform Design
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
Senior UX Designer specializing in audio and media product design. 6 years of experience shipping cross-platform features used by millions with a focus on music discovery, content navigation, and habit-forming interactions. Comfortable moving from user research to pixel-perfect specs to cross-functional alignment.
EXPERIENCE
Senior UX Designer StreamNote (2023present)
- Led redesign of the core listening queue (audio UX) reduced abandonment 31% across iOS, Android, and web
- Designed podcast chapter navigation prototype; validated with 8 user sessions, handed off to eng in Q3
- Built Figma component library (tokens + variants) used by 8 product squads cut design-to-dev handoff time by 40%
- Drove A/B test framework with data team: 12 experiments shipped; 7 reached statistical significance
UX Designer EduPath (20212023)
- Designed cross-platform onboarding (iOS/Android/web) for K12 learning app, 2.4M DAU
- Shipped streak redesign with 3 A/B variants winning variant improved D7 retention by 18%
- Full WCAG 2.1 AA remediation across all platforms; filed and closed 47 issues
Product Designer Signal Media (20192021)
- Designed audio and editorial experiences for a digital media publisher
- Prototyped and shipped "listen later" feature for podcast content #2 most-used feature within 90 days
- Established shared design system that reduced new-story design time by 60%
SKILLS
Figma · Audio UX · Podcast Design · Cross-Platform (iOS/Android/Web) · Design Systems
A/B Testing · User Research · Usability Testing · Accessibility (WCAG 2.1) · Interaction Design
EDUCATION
B.F.A. Graphic Design, Minor in HCI State University of the Arts, 2019'
WHERE id = 1;
-- Duolingo: gap report highlights gamification, retention, and learning science keywords
UPDATE jobs SET
ats_gap_report = '[{"term":"gamification","section":"experience","priority":3,"rationale":"Duolingo''s entire product is built on gamification mechanics; streak work at EduPath is highly relevant but not explicitly framed"},{"term":"streak mechanics","section":"experience","priority":3,"rationale":"Duolingo invented the streak; EduPath streak redesign is directly applicable and should be foregrounded"},{"term":"learning science","section":"experience","priority":2,"rationale":"JD references behavioral psychology; resume does not mention research-backed habit design"},{"term":"localization","section":"skills","priority":1,"rationale":"Duolingo ships to 40+ languages; internationalization experience or awareness would strengthen application"}]',
optimized_resume = 'ALEX RIVERA
UX Designer · Gamification · Learning Products
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
UX Designer with 6 years of experience in education and media products. Designed habit-forming experiences grounded in behavioral research streak systems, onboarding flows, and retention mechanics for apps with millions of daily active users. Passionate about learning products that feel like play.
EXPERIENCE
UX Designer EduPath (20212023)
- Redesigned streak and gamification mechanics for K12 learning app (2.4M DAU) D7 retention +18%
- Applied behavioral science principles (variable reward, loss aversion, social proof) to onboarding flow redesign
- Led 30+ user research sessions with students, parents, and teachers; findings shaped product roadmap for 2 quarters
- Drove WCAG 2.1 AA accessibility remediation 47 issues filed and closed across iOS, Android, web
Senior UX Designer StreamNote (2023present)
- Designed habit-reinforcing listening queue with personalized recommendations surface abandonment -31%
- Built and scaled Figma design system used by 8 squads; reduced design-to-dev cycle by 40%
- Ran A/B tests with data team; 12 experiments across retention and discovery features
Product Designer Signal Media (20192021)
- Designed reader engagement and content-return mechanics for digital news platform
- "Read later" feature reached #2 usage within 90 days of launch
SKILLS
Figma · Gamification Design · Habit & Retention Mechanics · User Research · Behavioral UX
Learning Products · Accessibility (WCAG 2.1) · Cross-Platform (iOS/Android/Web) · Design Systems
EDUCATION
B.F.A. Graphic Design, Minor in HCI State University of the Arts, 2019'
WHERE id = 2;
-- NPR: gap report highlights public media, accessibility, and editorial tool experience
UPDATE jobs SET
ats_gap_report = '[{"term":"public media","section":"experience","priority":3,"rationale":"NPR is a public media org; framing experience around mission-driven media rather than commercial products strengthens fit"},{"term":"editorial tools","section":"experience","priority":3,"rationale":"NPR''s UX Lead role includes internal tools for journalists; Signal Media editorial tools work is directly applicable"},{"term":"accessibility standards","section":"experience","priority":2,"rationale":"NPR serves a broad public audience including listeners with disabilities; WCAG work at EduPath should be prominent"},{"term":"content discovery","section":"experience","priority":2,"rationale":"NPR''s JD mentions listener discovery; StreamNote queue redesign is relevant framing"}]',
optimized_resume = 'ALEX RIVERA
UX Lead · Public Media · Accessible Design
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
Senior UX Designer with 6 years of experience in media, education, and content platforms. Led design for editorial tools, content discovery surfaces, and accessible experiences for mission-driven organizations. Believes design has an obligation to reach all users especially the ones the industry tends to forget.
EXPERIENCE
Senior UX Designer StreamNote (2023present)
- Led content discovery redesign (listening queue, personalized surfaces) abandonment -31%
- Designed and shipped podcast chapter navigation as a 0-to-1 feature
- Built scalable Figma component library used by 8 cross-functional squads
- Ran 60+ moderated research sessions; regularly presented findings to CPO and VP Product
Product Designer Signal Media (20192021)
- Designed editorial authoring tools used daily by 120+ journalists reduced story publish time by 35%
- Shipped "read later" feature for a digital news publisher #2 most-used feature within 90 days
- Established shared design system that cut new-template design time by 60%
UX Designer EduPath (20212023)
- Led full WCAG 2.1 AA accessibility audit and remediation across iOS, Android, and web
- Designed onboarding and retention flows for a public K12 learning app (2.4M DAU)
- D7 retention +18% following streak redesign; results shared at company all-hands
SKILLS
Figma · Editorial & Publishing Tools · Content Discovery UX · Accessibility (WCAG 2.1 AA)
Public-Facing Product Design · User Research · Cross-Platform · Design Systems
EDUCATION
B.F.A. Graphic Design, Minor in HCI State University of the Arts, 2019'
WHERE id = 3;
-- company_research for interview-stage jobs
-- Job 13: Asana (phone_screen, interview 2026-04-15)
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
13,
'2026-04-14T09:00:00',
'Asana is a work management platform founded in 2008 by Dustin Moskovitz and Justin Rosenstein (both ex-Facebook). Headquartered in San Francisco, Asana went public on the NYSE in September 2020 via a direct listing. The product focuses on project and task management for teams, with a strong emphasis on clarity of ownership and cross-functional coordination. It serves over 130,000 paying customers across 190+ countries. Asana''s design philosophy centers on removing ambiguity from work — a principle that directly shapes product design decisions. The company has made significant investments in AI-assisted task management through its "AI Studio" features, launched in 2024.',
'Dustin Moskovitz, co-founder and CEO, is known for a thoughtful management style and genuine interest in org design and well-being at work. He is a co-founder of the effective altruism movement and the Open Philanthropy Project. Expect questions and conversation that reflect a values-driven culture — mission alignment matters here. Anne Raimondi is COO and a well-regarded operations leader.',
'["Asana''s design team works closely with the Core Product and Platform squads — ask how design embeds with engineering","Recent focus on AI features (AI Studio, smart task assignment) — familiarity with AI UX patterns will land well","Asana''s brand voice is unusually distinct — understand their design language before the call","Ask about the cross-functional collaboration model: how does design influence roadmap priority?","The role is hybrid SF — clarify expectations around in-office days upfront"]',
'Asana is built primarily on React (frontend), Python and PHP (backend), and uses a proprietary data model (the Asana object graph) that drives their real-time sync. Their design team uses Figma heavily. They have invested in their own design system ("Alchemy") which underpins the entire product.',
'Asana went public via direct listing (NYSE: ASAN) in September 2020. Revenue in FY2025 was approximately $726M, with consistent double-digit YoY growth. The company has been investing in profitability — operating losses have narrowed significantly. No recent acquisition activity.',
'Primary competitors: Monday.com, ClickUp, Notion (project management use cases), Jira (for engineering teams), and Microsoft Project. Asana differentiates on simplicity, clear ownership model, and enterprise reliability over raw feature count.',
NULL,
'Asana has published an accessibility statement and maintains WCAG 2.1 AA compliance across their core product. Their employee ERGs include groups for disability and neurodiversity. The company scores above average on Glassdoor for work-life balance. Their San Francisco HQ has dedicated quiet spaces and standing desks.',
0,
'Asana company research generated for phone screen 2026-04-15. Sources: public filings, company blog, Glassdoor.'
);
-- Job 14: Notion (interviewing, panel 2026-04-22)
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
14,
'2026-04-11T14:30:00',
'Notion is an all-in-one workspace tool combining notes, docs, wikis, and project management. Founded in 2013, relaunched in 2018 after a near-failure. Headquartered in San Francisco, with a significant remote-first culture. Notion reached a $10B valuation in its 2021 funding round and has since focused on consolidation and profitability. The product is unusually design-forward — Notion''s UI is considered a benchmark in the industry for flexibility without overwhelming complexity. Their 20232024 push into AI (Notion AI) added LLM-powered writing and summarization directly into the workspace. The product design team is small-but-influential and works closely with the founders.',
'Ivan Zhao is co-founder and CEO, known for being deeply product-focused and aesthetically driven. He has described Notion as an attempt to make software feel like a craftsman''s tool. Akshay Kothari is co-founder and COO. The culture reflects the founders'' values: deliberate, high-craft, opinionated. Expect the panel to include designers or PMs who will probe your design sensibility and taste.',
'["Notion''s design team is small and influential — expect ownership of end-to-end features, not component-level work","AI features (Notion AI) are a major current initiative — come with opinions on how AI should integrate into a workspace without disrupting user flow","Notion''s design language is a competitive moat — study it carefully before the panel","Panel likely includes a PM, a senior designer, and possibly a founder — tailor your portfolio walk to each audience","Ask about the product design team structure: how many designers, how do they embed with eng, what does the IC path look like?"]',
'Notion is built on a React frontend with a custom block-based data model. Their backend uses Postgres and Kafka for real-time sync. Notion AI uses third-party LLM providers (Anthropic, OpenAI) via API. The design team uses Figma and maintains a well-documented internal design system.',
'Notion raised $275M at a $10B valuation in October 2021 (led by Sequoia and Coatue). The company has not announced further funding rounds; public commentary suggests a path to profitability. ARR estimated at $300500M as of 2024.',
'Competitors include Confluence (Atlassian), Coda, Linear (for engineering-focused workflows), Obsidian (local-first notes), and increasingly Asana and ClickUp for project management use cases. Notion''s differentiator is its flexible block model and strong brand identity with knowledge workers.',
'Some employee reviews mention that the small team size means high ownership but also that projects can pivot quickly. Design headcount has been stable post-2022 layoffs. Worth asking about team stability in the panel.',
'Notion has made public commitments to WCAG 2.1 AA compliance but has received community feedback that keyboard navigation in the block editor has gaps. Their 2024 accessibility roadmap addressed the most commonly reported issues. The company has a neurodiversity ERG and remote-first culture (async-friendly).',
0,
'Notion company research generated for panel interview 2026-04-22. Sources: public filings, company blog, community accessibility reports.'
);
-- Job 15: Figma (hired — research used during interview cycle)
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
15,
'2026-03-13T11:00:00',
'Figma is the leading browser-based design tool, founded in 2012 by Dylan Field and Evan Wallace. Headquartered in San Francisco. Figma disrupted the design tool market with its collaborative, multiplayer approach — Google Docs for design. The product includes Figma Design, FigJam (whiteboarding), and Dev Mode (engineering handoff). Adobe''s attempted $20B acquisition was blocked by UK and EU regulators in 2023; Figma received a $1B termination fee. Post-Adobe, Figma has accelerated independent investment in AI features and a new "Figma Make" prototyping tool. The Design Systems team (the role you accepted) is responsible for the core component and token infrastructure used across all Figma products.',
'Dylan Field, co-founder and CEO, is known for being deeply technical and product-obsessed. He joined the board of OpenAI. Post-Adobe-deal fallout, Field has been publicly focused on Figma''s independent growth trajectory. Expect a culture of high standards and genuine product craft. Noah Levin leads the design org.',
'["You are joining the Design Systems team — the infrastructure team for Figma''s own product design","Your work will directly impact every other designer at Figma — high visibility, high leverage","Figma uses its own product (dogfooding) — you will be designing in Figma for Figma","Key initiative: AI-assisted component generation in Figma Make — design systems input is critical","You are the first external hire in this role since the Adobe deal fell through — ask about team direction post-acquisition"]',
'Figma''s frontend is React with a custom WebGL rendering engine (written in Rust + WASM) for the canvas. This is some of the most sophisticated browser-based graphics code in production. Dev Mode connects to GitHub, Storybook, and VS Code. The design system team works in Figma and outputs tokens that connect to code via Figma''s token pipeline.',
'Figma received a $1B termination fee from Adobe when the acquisition was blocked in late 2023. The company raised $200M at a $10B valuation in 2021. With the termination fee and strong ARR, Figma is well-capitalized for independent growth. No IPO timeline announced publicly.',
'Primary competitor is Sketch (declining market share), with Adobe XD effectively sunset. Framer is a growing competitor for prototyping. Penpot (open-source) is gaining traction in privacy-conscious and European markets. Figma''s multiplayer and browser-based approach remains a strong moat.',
NULL,
'Figma has an active accessibility team and public blog posts on designing accessible components. Their design system (the one you will be contributing to) includes built-in accessibility annotations and ARIA guidance. The company has disability and neurodiversity ERGs. Remote-friendly with SF HQ.',
0,
'Figma company research generated for interviewing stage 2026-03-13. Sources: company blog, public filings, design community.'
);

3224
dev-api.py

File diff suppressed because it is too large Load diff

View file

@ -1,14 +0,0 @@
#!/bin/sh
# Start the cf-orch agent. Adds --advertise-host only when CF_ORCH_ADVERTISE_HOST is set.
set -e
ARGS="--coordinator ${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700} \
--node-id ${CF_ORCH_NODE_ID:-peregrine} \
--host 0.0.0.0 \
--port ${CF_ORCH_AGENT_PORT:-7701}"
if [ -n "${CF_ORCH_ADVERTISE_HOST}" ]; then
ARGS="$ARGS --advertise-host ${CF_ORCH_ADVERTISE_HOST}"
fi
exec cf-orch agent $ARGS

View file

@ -1,15 +0,0 @@
# Stage 1: build
FROM node:20-alpine AS build
WORKDIR /app
COPY web/package*.json ./
RUN npm ci --prefer-offline
COPY web/ ./
ARG VITE_BASE_PATH=/
ENV VITE_BASE_PATH=${VITE_BASE_PATH}
RUN npm run build
# Stage 2: serve
FROM nginx:alpine
COPY docker/web/nginx.conf /etc/nginx/conf.d/default.conf
COPY --from=build /app/dist /usr/share/nginx/html
EXPOSE 80

View file

@ -1,42 +0,0 @@
server {
listen 80;
server_name _;
client_max_body_size 20m;
root /usr/share/nginx/html;
index index.html;
# Proxy API calls to the FastAPI backend service
location /api/ {
proxy_pass http://api:8601;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_read_timeout 120s;
}
# Cache static assets
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
# Handle /peregrine/ base path used when accessed directly (no Caddy prefix stripping).
# ^~ blocks regex location matches so assets at /peregrine/assets/... are served correctly.
location ^~ /peregrine/assets/ {
alias /usr/share/nginx/html/assets/;
expires 1y;
add_header Cache-Control "public, immutable";
}
location /peregrine/ {
alias /usr/share/nginx/html/;
try_files $uri $uri/ /index.html;
}
# SPA fallback must come after API and assets
location / {
try_files $uri $uri/ /index.html;
}
}

View file

@ -144,7 +144,7 @@ Shipped in v0.4.0. Ongoing maintenance and known decisions:
## Container Runtime
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `install.sh` detects existing Podman and skips Docker install.
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
---

View file

@ -11,7 +11,7 @@ Thank you for your interest in contributing to Peregrine. This guide covers the
## Fork and Clone
```bash
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
git clone https://git.circuitforge.io/circuitforge/peregrine
cd peregrine
```
@ -102,23 +102,6 @@ Before opening a pull request:
---
## Database Migrations
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
### Adding a migration
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
### Rollbacks
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
---
## What NOT to Do
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored

View file

@ -7,7 +7,7 @@ This page walks through a full Peregrine installation from scratch.
## Prerequisites
- **Git** — to clone the repository
- **Internet connection**`install.sh` downloads Docker and other dependencies
- **Internet connection**`setup.sh` downloads Docker and other dependencies
- **Operating system**: Ubuntu/Debian, Fedora/RHEL, Arch Linux, or macOS (with Docker Desktop)
!!! warning "Windows"
@ -18,19 +18,19 @@ This page walks through a full Peregrine installation from scratch.
## Step 1 — Clone the repository
```bash
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
git clone https://git.circuitforge.io/circuitforge/peregrine
cd peregrine
```
---
## Step 2 — Run install.sh
## Step 2 — Run setup.sh
```bash
bash install.sh
bash setup.sh
```
`install.sh` performs the following automatically:
`setup.sh` performs the following automatically:
1. **Detects your platform** (Ubuntu/Debian, Fedora/RHEL, Arch, macOS)
2. **Installs Git** if not already present
@ -40,10 +40,10 @@ bash install.sh
6. **Creates `.env` from `.env.example`** — edit `.env` to customise ports and model storage paths before starting
!!! note "macOS"
`install.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script.
`setup.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script.
!!! note "GPU requirement"
For GPU support, `nvidia-smi` must return output before you run `install.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present.
For GPU support, `nvidia-smi` must return output before you run `setup.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present.
---
@ -107,7 +107,7 @@ The first-run wizard launches automatically. See [First-Run Wizard](first-run-wi
Only NVIDIA GPUs are supported. AMD ROCm is not currently supported.
Requirements:
- NVIDIA driver installed and `nvidia-smi` working before running `install.sh`
- NVIDIA driver installed and `nvidia-smi` working before running `setup.sh`
- CUDA 12.x recommended (CUDA 11.x may work but is untested)
- Minimum 8 GB VRAM for `single-gpu` profile with default models
- For `dual-gpu`: GPU 0 is assigned to Ollama, GPU 1 to vLLM

View file

@ -4,17 +4,15 @@
Peregrine automates the full job search lifecycle: discovery, matching, cover letter generation, application tracking, and interview preparation. It is privacy-first and local-first — your data never leaves your machine unless you configure an external integration.
![Peregrine dashboard](screenshots/01-dashboard.png)
---
## Quick Start
```bash
# 1. Clone and install dependencies
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
git clone https://git.circuitforge.io/circuitforge/peregrine
cd peregrine
bash install.sh
bash setup.sh
# 2. Start Peregrine
make start # no GPU, API-only
@ -31,23 +29,20 @@ The first-run wizard guides you through hardware detection, tier selection, iden
## Feature Overview
| Feature | Free | Paid | Premium |
|---------|------|-------|---------|
| Feature | Free | Paid | Premium |
|---------|------|------|---------|
| Job discovery (JobSpy + custom boards) | Yes | Yes | Yes |
| Resume keyword matching | Yes | Yes | Yes |
| Cover letter generation | BYOK‡ | Yes | Yes |
| Company research briefs | BYOK‡ | Yes | Yes |
| Interview prep & practice Q&A | BYOK‡ | Yes | Yes |
| Cover letter generation | - | Yes | Yes |
| Company research briefs | - | Yes | Yes |
| Interview prep & practice Q&A | - | Yes | Yes |
| Email sync & auto-classification | - | Yes | Yes |
| Survey assistant (culture-fit Q&A) | BYOK‡ | Yes | Yes |
| Survey assistant (culture-fit Q&A) | - | Yes | Yes |
| Integration connectors (Notion, Airtable, etc.) | Partial | Yes | Yes |
| Calendar sync (Google, Apple) | - | Yes | Yes |
| Cover letter model fine-tuning | - | - | Yes |
| Multi-user support | - | - | Yes |
**Paid** gives access to CircuitForge's hosted inference — no API key required.
**BYOK** — configure any LLM backend in `config/llm.yaml` (local Ollama/vLLM or an API key) and these features unlock at no charge, regardless of tier.
See [Tier System](reference/tier-system.md) for the full feature gate table.
---
@ -63,8 +58,8 @@ See [Tier System](reference/tier-system.md) for the full feature gate table.
## License
Core discovery pipeline: [MIT](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/src/branch/main/LICENSE-MIT)
Core discovery pipeline: [MIT](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-MIT)
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/src/branch/main/LICENSE-BSL)
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-BSL)
© 2026 Circuit Forge LLC

View file

@ -1 +0,0 @@
(function(){var s=document.createElement("script");s.defer=true;s.dataset.domain="docs.circuitforge.tech,circuitforge.tech";s.dataset.api="https://analytics.circuitforge.tech/api/event";s.src="https://analytics.circuitforge.tech/js/script.js";document.head.appendChild(s);})();

View file

@ -337,7 +337,7 @@ webhook_url: "https://discord.com/api/webhooks/..."
## .env
Docker port and path overrides. Created from `.env.example` by `install.sh`. Gitignored.
Docker port and path overrides. Created from `.env.example` by `setup.sh`. Gitignored.
```bash
# Ports (change if defaults conflict with existing services)

View file

@ -1,157 +0,0 @@
# Forgejo Feedback API — Schema & Bug Bot Setup
## API Endpoints Used
| Operation | Method | Endpoint |
|-----------|--------|----------|
| List labels | GET | `/repos/{owner}/{repo}/labels` |
| Create label | POST | `/repos/{owner}/{repo}/labels` |
| Create issue | POST | `/repos/{owner}/{repo}/issues` |
| Upload attachment | POST | `/repos/{owner}/{repo}/issues/{index}/assets` |
| Post comment | POST | `/repos/{owner}/{repo}/issues/{index}/comments` |
Base URL: `https://git.opensourcesolarpunk.com/api/v1`
---
## Issue Creation Payload
```json
POST /repos/{owner}/{repo}/issues
{
"title": "string",
"body": "markdown string",
"labels": [1, 2, 3] // array of label IDs (not names)
}
```
Response (201):
```json
{
"number": 42,
"html_url": "https://git.opensourcesolarpunk.com/pyr0ball/peregrine/issues/42"
}
```
---
## Issue Body Structure
The `build_issue_body()` function produces this markdown layout:
```markdown
## 🐛 Bug | ✨ Feature Request | 💬 Other
<user description>
### Reproduction Steps ← bug type only, when repro provided
<repro steps>
### Context
- **page:** Home
- **version:** v0.2.5-61-ga6d787f ← from `git describe`; "dev" inside Docker
- **tier:** free | paid | premium
- **llm_backend:** ollama | vllm | claude_code | ...
- **os:** Linux-6.8.0-65-generic-x86_64-with-glibc2.39
- **timestamp:** 2026-03-06T15:58:29Z
<details>
<summary>App Logs (last 100 lines)</summary>
```
... log content (PII masked) ...
```
</details>
### Recent Listings ← only when include_diag = True
- [Title @ Company](url)
---
*Submitted by: Name <email>* ← only when attribution consent checked
```
---
## Screenshot Attachment
Screenshots are uploaded as issue assets, then embedded inline via a follow-up comment:
```markdown
### Screenshot
![screenshot](https://git.opensourcesolarpunk.com/attachments/<uuid>)
```
This keeps the issue body clean and puts the screenshot in a distinct comment.
---
## Labels
| Label | Color | Applied when |
|-------|-------|-------------|
| `beta-feedback` | `#0075ca` | Always |
| `needs-triage` | `#e4e669` | Always |
| `bug` | `#d73a4a` | Type = Bug |
| `feature-request` | `#a2eeef` | Type = Feature Request |
| `question` | `#d876e3` | Type = Other |
Labels are looked up by name on each submission; missing ones are auto-created via `_ensure_labels()`.
---
## Bug Bot Account Setup
The token currently bundled in `.env` is pyr0ball's personal token. For beta distribution,
create a dedicated bot account so the token has limited scope and can be rotated independently.
### Why a bot account?
- Token gets bundled in beta testers' `.env` — shouldn't be tied to the repo owner's account
- Bot can be limited to issue write only (cannot push code, see private repos, etc.)
- Token rotation doesn't affect the owner's other integrations
### Steps (requires Forgejo admin panel — API admin access not available on this token)
1. **Create bot account** at `https://git.opensourcesolarpunk.com/-/admin/users/new`
- Username: `peregrine-bot` (or `cf-bugbot`)
- Email: a real address you control (e.g. `bot+peregrine@circuitforge.tech`)
- Set a strong password (store in your password manager)
- Check "Prohibit login" if you want a pure API-only account
2. **Add as collaborator** on `pyr0ball/peregrine`:
- Settings → Collaborators → Add `peregrine-bot` with **Write** access
- Write access is required to create labels; issue creation alone would need only Read+Comment
3. **Generate API token** (log in as the bot, or use admin impersonation):
- User Settings → Applications → Generate New Token
- Name: `peregrine-feedback`
- Scopes: `issue` (write) — no repo code access needed
- Copy the token — it won't be shown again
4. **Update environment**:
```
FORGEJO_API_TOKEN=<new bot token>
FORGEJO_REPO=pyr0ball/peregrine
FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
```
Update both `.env` (dev machine) and any beta tester `.env` files.
5. **Verify** the bot can create issues:
```bash
curl -s -X POST https://git.opensourcesolarpunk.com/api/v1/repos/pyr0ball/peregrine/issues \
-H "Authorization: token <bot-token>" \
-H "Content-Type: application/json" \
-d '{"title":"[TEST] bot token check","body":"safe to close","labels":[]}'
```
Expected: HTTP 201 with `number` and `html_url` in response.
### Future: Heimdall token management
Once Heimdall is live, the bot token should be served by the license server rather than
bundled in `.env`. The app fetches it at startup using the user's license key → token is
never stored on disk and can be rotated server-side. Track as a future Heimdall feature.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 220 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

View file

@ -1,7 +1,5 @@
# Apply Workspace
![Peregrine apply workspace with cover letter generator and ATS optimizer](../screenshots/03-apply.png)
The Apply Workspace is where you generate cover letters, export application documents, and record that you have applied to a job.
---

View file

@ -1,7 +1,5 @@
# Job Review
![Peregrine job review triage](../screenshots/02-review.png)
The Job Review page is where you approve or reject newly discovered jobs before they enter the application pipeline.
---

View file

@ -1,4 +1,4 @@
name: cf
name: job-seeker
# Recreate: conda env create -f environment.yml
# Update pinned snapshot: conda env export --no-builds > environment.yml
channels:

View file

@ -15,11 +15,6 @@ cd "$SCRIPT_DIR"
PROFILE="${PROFILE:-remote}"
# ── Compose engine detection ──────────────────────────────────────────────────
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
# ── Usage ────────────────────────────────────────────────────────────────────
usage() {
echo ""
@ -33,12 +28,10 @@ usage() {
echo -e " ${GREEN}start${NC} Start Peregrine (preflight → up)"
echo -e " ${GREEN}stop${NC} Stop all services"
echo -e " ${GREEN}restart${NC} Restart all services"
echo -e " ${GREEN}build [service]${NC} Rebuild image(s) without restarting (default: api web)"
echo -e " ${GREEN}status${NC} Show running containers"
echo -e " ${GREEN}logs [service]${NC} Tail logs (default: api)"
echo -e " ${GREEN}update${NC} Pull latest images + rebuild"
echo -e " ${GREEN}logs [service]${NC} Tail logs (default: app)"
echo -e " ${GREEN}update${NC} Pull latest images + rebuild app"
echo -e " ${GREEN}preflight${NC} Check ports + resources; write .env"
echo -e " ${GREEN}models${NC} Check ollama models in config; pull any missing"
echo -e " ${GREEN}test${NC} Run test suite"
echo -e " ${GREEN}e2e [mode]${NC} Run E2E tests (mode: demo|cloud|local, default: demo)"
echo -e " Set E2E_HEADLESS=false to run headed via Xvfb"
@ -47,12 +40,6 @@ usage() {
echo -e " ${GREEN}clean${NC} Remove containers, images, volumes (DESTRUCTIVE)"
echo -e " ${GREEN}open${NC} Open the web UI in your browser"
echo ""
echo -e " Cloud / demo commands:"
echo -e " ${GREEN}cloud-start${NC} Start the cloud stack (peregrine-cloud)"
echo -e " ${GREEN}cloud-restart${NC} Rebuild + restart the cloud stack"
echo -e " ${GREEN}demo-start${NC} Start the demo stack (peregrine-demo)"
echo -e " ${GREEN}demo-restart${NC} Rebuild + restart the demo stack"
echo ""
echo " Profiles (set via --profile or PROFILE env var):"
echo " remote API-only, no local inference (default)"
echo " cpu Local Ollama inference on CPU"
@ -82,7 +69,7 @@ while [[ $# -gt 0 ]]; do
esac
done
SERVICE="${1:-api}" # used by `logs` command
SERVICE="${1:-app}" # used by `logs` command
# ── Dependency guard ──────────────────────────────────────────────────────────
# Commands that delegate to make; others (status, logs, update, open, setup) run fine without it.
@ -96,7 +83,7 @@ case "$CMD" in
setup)
info "Running dependency installer..."
bash install.sh
bash setup.sh
;;
preflight)
@ -104,16 +91,10 @@ case "$CMD" in
make preflight PROFILE="$PROFILE"
;;
models)
info "Checking ollama models..."
conda run -n cf python scripts/preflight.py --models-only
success "Model check complete."
;;
start)
info "Starting Peregrine (PROFILE=${PROFILE})..."
make start PROFILE="$PROFILE"
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)"
PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
success "Peregrine is up → http://localhost:${PORT}"
;;
@ -126,30 +107,33 @@ case "$CMD" in
restart)
info "Restarting (PROFILE=${PROFILE})..."
make restart PROFILE="$PROFILE"
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)"
PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
success "Peregrine restarted → http://localhost:${PORT}"
;;
status)
# Auto-detect compose engine same way Makefile does
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
$COMPOSE ps
;;
logs)
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
info "Tailing logs for: ${SERVICE}"
$COMPOSE logs -f "$SERVICE"
;;
build)
BUILD_SVC="$([[ "${SERVICE}" == "api" ]] && echo "api web" || echo "${SERVICE}")"
info "Building ${BUILD_SVC}..."
$COMPOSE build $BUILD_SVC
success "Build complete. Run './manage.sh restart' to apply."
;;
update)
info "Pulling latest images and rebuilding..."
info "Pulling latest images and rebuilding app..."
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
$COMPOSE pull searxng ollama 2>/dev/null || true
$COMPOSE build api web
$COMPOSE build app
success "Update complete. Run './manage.sh restart' to apply."
;;
@ -176,7 +160,7 @@ case "$CMD" in
;;
open)
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)"
PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
URL="http://localhost:${PORT}"
info "Opening ${URL}"
if command -v xdg-open &>/dev/null; then
@ -199,39 +183,13 @@ case "$CMD" in
RUNNER=""
fi
info "Running E2E tests (mode=${MODE}, headless=${HEADLESS})..."
$RUNNER conda run -n cf pytest tests/e2e/ \
$RUNNER conda run -n job-seeker pytest tests/e2e/ \
--mode="${MODE}" \
--json-report \
--json-report-file="${RESULTS_DIR}/report.json" \
-v "${@:3}"
;;
cloud-start)
info "Starting cloud stack (peregrine-cloud)..."
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud up -d
success "Cloud stack up → http://localhost:8508"
;;
cloud-restart)
info "Rebuilding + restarting cloud stack (peregrine-cloud)..."
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud build api web
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud up -d
success "Cloud stack restarted → http://localhost:8508"
;;
demo-start)
info "Starting demo stack (peregrine-demo)..."
$COMPOSE -f compose.demo.yml --project-name peregrine-demo up -d
success "Demo stack up → http://localhost:8504"
;;
demo-restart)
info "Rebuilding + restarting demo stack (peregrine-demo)..."
$COMPOSE -f compose.demo.yml --project-name peregrine-demo build api web
$COMPOSE -f compose.demo.yml --project-name peregrine-demo up -d
success "Demo stack restarted → http://localhost:8504"
;;
help|--help|-h)
usage
;;

View file

@ -1,97 +0,0 @@
-- Migration 001: Baseline schema
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
company TEXT,
url TEXT UNIQUE,
source TEXT,
location TEXT,
is_remote INTEGER DEFAULT 0,
salary TEXT,
description TEXT,
match_score REAL,
keyword_gaps TEXT,
date_found TEXT,
status TEXT DEFAULT 'pending',
notion_page_id TEXT,
cover_letter TEXT,
applied_at TEXT,
interview_date TEXT,
rejection_stage TEXT,
phone_screen_at TEXT,
interviewing_at TEXT,
offer_at TEXT,
hired_at TEXT,
survey_at TEXT,
calendar_event_id TEXT,
optimized_resume TEXT,
ats_gap_report TEXT
);
CREATE TABLE IF NOT EXISTS job_contacts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
direction TEXT,
subject TEXT,
from_addr TEXT,
to_addr TEXT,
body TEXT,
received_at TEXT,
is_response_needed INTEGER DEFAULT 0,
responded_at TEXT,
message_id TEXT,
stage_signal TEXT,
suggestion_dismissed INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS company_research (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER UNIQUE,
generated_at TEXT,
company_brief TEXT,
ceo_brief TEXT,
talking_points TEXT,
raw_output TEXT,
tech_brief TEXT,
funding_brief TEXT,
competitors_brief TEXT,
red_flags TEXT,
scrape_used INTEGER DEFAULT 0,
accessibility_brief TEXT
);
CREATE TABLE IF NOT EXISTS background_tasks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_type TEXT,
job_id INTEGER,
params TEXT,
status TEXT DEFAULT 'pending',
error TEXT,
created_at TEXT,
started_at TEXT,
finished_at TEXT,
stage TEXT,
updated_at TEXT
);
CREATE TABLE IF NOT EXISTS survey_responses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
survey_name TEXT,
received_at TEXT,
source TEXT,
raw_input TEXT,
image_path TEXT,
mode TEXT,
llm_output TEXT,
reported_score REAL,
created_at TEXT
);
CREATE TABLE IF NOT EXISTS digest_queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_contact_id INTEGER UNIQUE,
created_at TEXT
);

View file

@ -1,7 +0,0 @@
-- Add ATS resume optimizer columns introduced in v0.8.x.
-- Existing DBs that were created before the baseline included these columns
-- need this migration to add them. Safe to run on new DBs: IF NOT EXISTS guards
-- are not available for ADD COLUMN in SQLite, so we use a try/ignore pattern
-- at the application level (db_migrate.py wraps each migration in a transaction).
ALTER TABLE jobs ADD COLUMN optimized_resume TEXT;
ALTER TABLE jobs ADD COLUMN ats_gap_report TEXT;

View file

@ -1,3 +0,0 @@
-- Resume review draft and version archive columns (migration 003)
ALTER TABLE jobs ADD COLUMN resume_draft_json TEXT;
ALTER TABLE jobs ADD COLUMN resume_archive_json TEXT;

View file

@ -1,5 +0,0 @@
-- Migration 004: add resume_final_struct to jobs table
-- Stores the approved resume as a structured JSON dict alongside the plain text
-- (resume_optimized_text). Enables YAML export and future re-processing without
-- re-parsing the plain text.
ALTER TABLE jobs ADD COLUMN resume_final_struct TEXT;

View file

@ -1,17 +0,0 @@
-- 005_resumes_table.sql
-- Resume library: named saved resumes per user (optimizer output, imports, manual)
CREATE TABLE IF NOT EXISTS resumes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
source TEXT NOT NULL DEFAULT 'manual',
job_id INTEGER REFERENCES jobs(id),
text TEXT NOT NULL,
struct_json TEXT,
word_count INTEGER,
is_default INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
ALTER TABLE jobs ADD COLUMN resume_id INTEGER REFERENCES resumes(id);

View file

@ -1,6 +0,0 @@
-- 006_date_posted.sql
-- Add date_posted column for shadow listing detection (stale/shadow score feature).
-- New DBs already have this column from the CREATE TABLE statement in db.py;
-- this migration adds it to existing user DBs.
ALTER TABLE jobs ADD COLUMN date_posted TEXT;

View file

@ -1,22 +0,0 @@
-- Migration 006: Add columns and tables present in the live DB but missing from migrations
-- These were added via direct ALTER TABLE after the v0.8.5 baseline was written.
-- date_posted: used for ghost-post shadow-score detection
ALTER TABLE jobs ADD COLUMN date_posted TEXT;
-- hired_feedback: JSON blob saved when a job reaches the 'hired' outcome
ALTER TABLE jobs ADD COLUMN hired_feedback TEXT;
-- references_ table: contacts who can provide references for applications
CREATE TABLE IF NOT EXISTS references_ (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
relationship TEXT,
company TEXT,
email TEXT,
phone TEXT,
notes TEXT,
tags TEXT,
prep_email TEXT,
role TEXT
);

View file

@ -1,3 +0,0 @@
-- 007_resume_sync.sql
-- Add synced_at to resumes: ISO datetime of last library↔profile sync, null = never synced.
ALTER TABLE resumes ADD COLUMN synced_at TEXT;

View file

@ -1,97 +0,0 @@
-- messages: manual log entries and LLM drafts
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER REFERENCES jobs(id) ON DELETE SET NULL,
job_contact_id INTEGER REFERENCES job_contacts(id) ON DELETE SET NULL,
type TEXT NOT NULL DEFAULT 'email',
direction TEXT,
subject TEXT,
body TEXT,
from_addr TEXT,
to_addr TEXT,
logged_at TEXT NOT NULL DEFAULT (datetime('now')),
approved_at TEXT,
template_id INTEGER REFERENCES message_templates(id) ON DELETE SET NULL,
osprey_call_id TEXT
);
-- message_templates: built-in seeds and user-created templates
CREATE TABLE IF NOT EXISTS message_templates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key TEXT UNIQUE,
title TEXT NOT NULL,
category TEXT NOT NULL DEFAULT 'custom',
subject_template TEXT,
body_template TEXT NOT NULL,
is_builtin INTEGER NOT NULL DEFAULT 0,
is_community INTEGER NOT NULL DEFAULT 0,
community_source TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
INSERT OR IGNORE INTO message_templates
(key, title, category, subject_template, body_template, is_builtin)
VALUES
(
'follow_up',
'Following up on my application',
'follow_up',
'Following up — {{role}} application',
'Hi {{recruiter_name}},
I wanted to follow up on my application for the {{role}} position at {{company}}. I remain very interested in the opportunity and would welcome the chance to discuss my background further.
Please let me know if there is anything else you need from me.
Best regards,
{{name}}',
1
),
(
'thank_you',
'Thank you for the interview',
'thank_you',
'Thank you — {{role}} interview',
'Hi {{recruiter_name}},
Thank you for taking the time to speak with me about the {{role}} role at {{company}}. I enjoyed learning more about the team and the work you are doing.
I am very excited about this opportunity and look forward to hearing about the next steps.
Best regards,
{{name}}',
1
),
(
'accommodation_request',
'Accommodation request',
'accommodation',
'Accommodation request — {{role}} interview',
'Hi {{recruiter_name}},
I am writing to request a reasonable accommodation for my upcoming interview for the {{role}} position. Specifically, I would appreciate:
{{accommodation_details}}
Please let me know if you need any additional information. I am happy to discuss this further.
Thank you,
{{name}}',
1
),
(
'withdrawal',
'Withdrawing my application',
'withdrawal',
'Application withdrawal — {{role}}',
'Hi {{recruiter_name}},
I am writing to let you know that I would like to withdraw my application for the {{role}} position at {{company}}.
Thank you for your time and consideration. I wish you and the team all the best.
Best regards,
{{name}}',
1
)

View file

@ -1 +0,0 @@
ALTER TABLE jobs ADD COLUMN excluded_from_training INTEGER DEFAULT 0;

View file

@ -70,6 +70,3 @@ nav:
- Tier System: reference/tier-system.md
- LLM Router: reference/llm-router.md
- Config Files: reference/config-files.md
extra_javascript:
- plausible.js

View file

@ -1,92 +0,0 @@
#!/usr/bin/env bash
# podman-standalone.sh — Peregrine rootful Podman setup (no Compose)
#
# For beta testers running system Podman (non-rootless) with systemd.
# Mirrors the manage.sh "remote" profile: app + SearXNG only.
# Ollama/vLLM/vision are expected as host services if needed.
#
# ── Prerequisites ────────────────────────────────────────────────────────────
# 1. Clone the repo:
# sudo git clone <repo-url> /opt/peregrine
#
# 2. Build the app image:
# cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest .
#
# 3. Create a config directory and copy the example configs:
# sudo mkdir -p /opt/peregrine/{config,data}
# sudo cp /opt/peregrine/config/*.example /opt/peregrine/config/
# # Edit /opt/peregrine/config/llm.yaml, notion.yaml, etc. as needed
#
# 4. Run this script:
# sudo bash /opt/peregrine/podman-standalone.sh
#
# ── After setup — generate systemd unit files ────────────────────────────────
# sudo podman generate systemd --new --name peregrine-searxng \
# | sudo tee /etc/systemd/system/peregrine-searxng.service
# sudo podman generate systemd --new --name peregrine \
# | sudo tee /etc/systemd/system/peregrine.service
# sudo systemctl daemon-reload
# sudo systemctl enable --now peregrine-searxng peregrine
#
# ── SearXNG ──────────────────────────────────────────────────────────────────
# Peregrine expects a SearXNG instance with JSON format enabled.
# If you already run one, skip the SearXNG container and set the URL in
# config/llm.yaml (searxng_url key). The default is http://localhost:8888.
#
# ── Ports ────────────────────────────────────────────────────────────────────
# Peregrine UI → http://localhost:8501
#
# ── To use a different Streamlit port ────────────────────────────────────────
# Uncomment the CMD override at the bottom of the peregrine run block and
# set PORT= to your desired port. The Dockerfile default is 8501.
#
set -euo pipefail
REPO_DIR=/opt/peregrine
DATA_DIR=/opt/peregrine/data
DOCS_DIR=/Library/Documents/JobSearch # ← adjust to your docs path
TZ=America/Los_Angeles
# ── Peregrine App ─────────────────────────────────────────────────────────────
# Image is built locally — no registry auto-update label.
# To update: sudo podman build -t localhost/peregrine:latest /opt/peregrine
# sudo podman restart peregrine
#
# Env vars: ANTHROPIC_API_KEY, OPENAI_COMPAT_URL, OPENAI_COMPAT_KEY are
# optional — only needed if you're using those backends in config/llm.yaml.
#
sudo podman run -d \
--name=peregrine \
--restart=unless-stopped \
--net=host \
-v ${REPO_DIR}/config:/app/config:Z \
-v ${DATA_DIR}:/app/data:Z \
-v ${DOCS_DIR}:/docs:z \
-e STAGING_DB=/app/data/staging.db \
-e DOCS_DIR=/docs \
-e PYTHONUNBUFFERED=1 \
-e PYTHONLOGGING=WARNING \
-e TZ=${TZ} \
--health-cmd="curl -f http://localhost:8501/_stcore/health || exit 1" \
--health-interval=30s \
--health-timeout=10s \
--health-start-period=60s \
--health-retries=3 \
localhost/peregrine:latest
# To override the default port (8501), uncomment and edit the line below,
# then remove the image name above and place it at the end of the CMD:
# streamlit run app/app.py --server.port=8501 --server.headless=true --server.fileWatcherType=none
echo ""
echo "Peregrine is starting up."
echo " App: http://localhost:8501"
echo ""
echo "Check container health with:"
echo " sudo podman ps"
echo " sudo podman logs peregrine"
echo ""
echo "To register as a systemd service:"
echo " sudo podman generate systemd --new --name peregrine \\"
echo " | sudo tee /etc/systemd/system/peregrine.service"
echo " sudo systemctl daemon-reload"
echo " sudo systemctl enable --now peregrine"

View file

@ -1,33 +0,0 @@
[tool.ruff]
# app/ is the deprecated Streamlit UI (replaced by Vue+FastAPI).
# No new work goes there; exclude from linting rather than accumulate suppressions.
exclude = ["app/"]
[tool.ruff.lint.per-file-ignores]
# dev-api.py / dev_api.py (symlink): E702 semicolons in compact Pydantic model
# definitions — intentional style for dense data models with many simple fields.
# E402: mid-file module-level imports are intentional in dev-api.py for test patchability.
"dev-api.py" = ["E702", "E402"]
"dev_api.py" = ["E702", "E402"]
# finetune_local.py: E402 ML libs (torch, datasets, trl) are imported after
# runtime CUDA / Unsloth availability checks — conditional import pattern.
"scripts/finetune_local.py" = ["E402", "E741"]
# scripts/: E402 mid-file imports used for lazy loading or post-env-setup imports.
"scripts/task_runner.py" = ["E402"]
"scripts/migrate.py" = ["E741"]
# scrapers/: third-party script; minimal changes policy.
"scrapers/companyScraper.py" = ["E722"]
# tools/: deprecated label tool copy (canonical in avocet); suppress style warnings.
"tools/label_tool.py" = ["E741"]
# tests/: F841 unused variables are the standard mock-patch capture pattern
# (e.g., `original_fn = obj.method` before monkeypatching).
# E741 ambiguous `l` names and E402 conditional imports are common in test fixtures.
# E702 compact `con.commit(); con.close()` is a common SQLite test helper idiom.
"tests/**" = ["F841", "E741", "E402", "E702"]
"tests/test_wizard_steps.py" = ["F841", "E741", "E402", "E702"]
"scripts/test_email_classify.py" = ["E402", "F841"]

View file

@ -2,15 +2,6 @@
# Extracted from environment.yml for Docker pip installs
# Keep in sync with environment.yml
# ── CircuitForge shared core ───────────────────────────────────────────────
# Requires circuitforge-core >= 0.8.0 (config.load_env, db, tasks; resources moved to circuitforge-orch).
# Local dev / Docker (parent-context build): path install works because
# circuitforge-core/ is a sibling directory.
# CI / fresh checkouts: falls back to the Forgejo VCS URL below.
# To use local editable install run: pip install -e ../circuitforge-core
# TODO: pin to @v0.7.0 tag once cf-core cuts a release tag.
git+https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git@main
# ── Web UI ────────────────────────────────────────────────────────────────
streamlit>=1.35
watchdog
@ -87,10 +78,3 @@ lxml
# ── Documentation ────────────────────────────────────────────────────────
mkdocs>=1.5
mkdocs-material>=9.5
# ── Vue SPA API backend ──────────────────────────────────────────────────
fastapi>=0.100.0
uvicorn[standard]>=0.20.0
PyJWT>=2.8.0
cryptography>=40.0.0
python-multipart>=0.0.6

View file

@ -1,89 +0,0 @@
"""
Peregrine cloud session thin wrapper around cf_core.cloud_session.
Sets request-scoped ContextVars with the authenticated user_id, tier, and
custom writing model so that _allocate_orch_async in llm.py can forward them
to cf-orch without any service function signature changes.
Usage add to main.py once:
from app.cloud_session import session_middleware_dep
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
From that point, any route (and every service/llm function it calls)
has access to the current user context via llm.get_request_*() helpers.
Writing model resolution order (first match wins):
1. USER_WRITING_MODELS env var JSON dict mapping Directus UUID model name
e.g. USER_WRITING_MODELS={"5b99ca9f-...": "meghan-letter-writer:latest"}
Use this for Monday; no Heimdall changes required.
2. session.meta["custom_writing_model"] returned by Heimdall resolve endpoint
once Heimdall is updated to expose user_preferences fields.
"""
from __future__ import annotations
import json
import logging
import os
from fastapi import Depends, Request, Response
from circuitforge_core.cloud_session import CloudSessionFactory, CloudUser, detect_byok
log = logging.getLogger(__name__)
__all__ = ["CloudUser", "get_session", "require_tier", "session_middleware_dep"]
# JSON dict mapping Directus user UUID → custom writing model name.
# Used until Heimdall's resolve endpoint exposes user_preferences.
def _load_user_writing_models() -> dict[str, str]:
raw = os.environ.get("USER_WRITING_MODELS", "").strip()
if not raw:
return {}
try:
return json.loads(raw)
except json.JSONDecodeError:
log.warning("USER_WRITING_MODELS is not valid JSON — ignoring")
return {}
_USER_WRITING_MODELS: dict[str, str] = _load_user_writing_models()
_factory = CloudSessionFactory(
product="peregrine",
byok_detector=detect_byok,
)
get_session = _factory.dependency()
require_tier = _factory.require_tier
def session_middleware_dep(request: Request, response: Response) -> None:
"""Global FastAPI dependency — resolves the session and sets request-scoped
ContextVars so llm._allocate_orch_async can forward them to cf-orch.
Sets:
- user_id: real cloud UUID, or None for local/anon sessions
- tier: the resolved tier string (free/paid/premium/ultra/local)
- writing_model: custom fine-tuned model from Heimdall meta, or None
Add as a global dependency in main.py:
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
"""
from app.llm import set_request_tier, set_request_user_id, set_request_writing_model
session = _factory.resolve(request, response)
user_id = session.user_id
# Only forward real cloud UUIDs — local/dev/anon sessions use the shared catalog
if user_id in (None, "local", "local-dev") or (user_id or "").startswith("anon-"):
user_id = None
set_request_user_id(user_id)
set_request_tier(session.tier)
# Resolution order: env-var map (Monday path) → Heimdall meta (future path)
writing_model = (
_USER_WRITING_MODELS.get(session.user_id)
or session.meta.get("custom_writing_model")
)
set_request_writing_model(writing_model)

View file

@ -1,843 +0,0 @@
"""LiteLLM wrapper for multi-provider AI support."""
import json
import logging
import os
import re
from contextlib import asynccontextmanager
from contextvars import ContextVar
from dataclasses import dataclass
from typing import Any
import httpx
import litellm
from pydantic import BaseModel
from app.config import settings
# LLM timeout configuration (seconds) - base values
LLM_TIMEOUT_HEALTH_CHECK = 30
LLM_TIMEOUT_COMPLETION = 120
LLM_TIMEOUT_JSON = 180 # JSON completions may take longer
# LLM-004: OpenRouter JSON-capable models (explicit allowlist)
OPENROUTER_JSON_CAPABLE_MODELS = {
# Anthropic models
"anthropic/claude-3-opus",
"anthropic/claude-3-sonnet",
"anthropic/claude-3-haiku",
"anthropic/claude-3.5-sonnet",
"anthropic/claude-3.5-haiku",
"anthropic/claude-haiku-4-5-20251001",
"anthropic/claude-sonnet-4-20250514",
"anthropic/claude-opus-4-20250514",
# OpenAI models
"openai/gpt-4-turbo",
"openai/gpt-4",
"openai/gpt-4o",
"openai/gpt-4o-mini",
"openai/gpt-3.5-turbo",
"openai/gpt-5-nano-2025-08-07",
# Google models
"google/gemini-pro",
"google/gemini-1.5-pro",
"google/gemini-1.5-flash",
"google/gemini-2.0-flash",
"google/gemini-3-flash-preview",
# DeepSeek models
"deepseek/deepseek-chat",
"deepseek/deepseek-reasoner",
# Mistral models
"mistralai/mistral-large",
"mistralai/mistral-medium",
}
# JSON-010: JSON extraction safety limits
MAX_JSON_EXTRACTION_RECURSION = 10
MAX_JSON_CONTENT_SIZE = 1024 * 1024 # 1MB
# Request-scoped user_id — set once by session_middleware_dep, read inside _allocate_orch_async.
# ContextVar is safe for concurrent async requests: each request task gets its own copy.
_request_user_id: ContextVar[str | None] = ContextVar("request_user_id", default=None)
_request_tier: ContextVar[str | None] = ContextVar("request_tier", default=None)
# Custom writing model for premium/ultra users — populated from Heimdall license key meta.
# Set to None for all other tiers; complete() falls back to the shared base model.
_request_writing_model: ContextVar[str | None] = ContextVar("request_writing_model", default=None)
_PREMIUM_TIERS: frozenset[str] = frozenset({"premium", "ultra"})
def set_request_user_id(user_id: str | None) -> None:
_request_user_id.set(user_id)
def get_request_user_id() -> str | None:
return _request_user_id.get()
def set_request_tier(tier: str | None) -> None:
_request_tier.set(tier)
def get_request_tier() -> str | None:
return _request_tier.get()
def set_request_writing_model(model: str | None) -> None:
_request_writing_model.set(model)
def get_request_writing_model() -> str | None:
return _request_writing_model.get()
class LLMConfig(BaseModel):
"""LLM configuration model."""
provider: str
model: str
api_key: str
api_base: str | None = None
@dataclass
class _OrchAllocation:
allocation_id: str
url: str
service: str
@asynccontextmanager
async def _allocate_orch_async(
coordinator_url: str,
service: str,
model_candidates: list[str],
ttl_s: float,
caller: str,
):
"""Async context manager that allocates a cf-orch service and releases on exit."""
async with httpx.AsyncClient(timeout=120.0) as client:
payload: dict[str, Any] = {
"model_candidates": model_candidates,
"ttl_s": ttl_s,
"caller": caller,
}
uid = get_request_user_id()
if uid:
payload["user_id"] = uid
resp = await client.post(
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocate",
json=payload,
)
if not resp.is_success:
raise RuntimeError(
f"cf-orch allocation failed for {service!r}: "
f"HTTP {resp.status_code}{resp.text[:200]}"
)
data = resp.json()
alloc = _OrchAllocation(
allocation_id=data["allocation_id"],
url=data["url"],
service=service,
)
try:
yield alloc
finally:
try:
await client.delete(
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocations/{alloc.allocation_id}",
timeout=10.0,
)
except Exception as exc:
logging.debug("cf-orch release failed (non-fatal): %s", exc)
def _normalize_api_base(provider: str, api_base: str | None) -> str | None:
"""Normalize api_base for LiteLLM provider-specific expectations.
When using proxies/aggregators, users often paste a base URL that already
includes a version segment (e.g., `/v1`). Some LiteLLM provider handlers
append those segments internally, which can lead to duplicated paths like
`/v1/v1/...` and cause 404s.
"""
if not api_base:
return None
base = api_base.strip()
if not base:
return None
base = base.rstrip("/")
# Anthropic handler appends '/v1/messages'. If base already ends with '/v1',
# strip it to avoid '/v1/v1/messages'.
if provider == "anthropic" and base.endswith("/v1"):
base = base[: -len("/v1")].rstrip("/")
# Gemini handler appends '/v1/models/...'. If base already ends with '/v1',
# strip it to avoid '/v1/v1/models/...'.
if provider == "gemini" and base.endswith("/v1"):
base = base[: -len("/v1")].rstrip("/")
return base or None
def _extract_text_parts(value: Any, depth: int = 0, max_depth: int = 10) -> list[str]:
"""Recursively extract text segments from nested response structures.
Handles strings, lists, dicts with 'text'/'content'/'value' keys, and objects
with text/content attributes. Limits recursion depth to avoid cycles.
Args:
value: Input value that may contain text in strings, lists, dicts, or objects.
depth: Current recursion depth.
max_depth: Maximum recursion depth before returning no content.
Returns:
A list of extracted text segments.
"""
if depth >= max_depth:
return []
if value is None:
return []
if isinstance(value, str):
return [value]
if isinstance(value, list):
parts: list[str] = []
next_depth = depth + 1
for item in value:
parts.extend(_extract_text_parts(item, next_depth, max_depth))
return parts
if isinstance(value, dict):
next_depth = depth + 1
if "text" in value:
return _extract_text_parts(value.get("text"), next_depth, max_depth)
if "content" in value:
return _extract_text_parts(value.get("content"), next_depth, max_depth)
if "value" in value:
return _extract_text_parts(value.get("value"), next_depth, max_depth)
return []
next_depth = depth + 1
if hasattr(value, "text"):
return _extract_text_parts(getattr(value, "text"), next_depth, max_depth)
if hasattr(value, "content"):
return _extract_text_parts(getattr(value, "content"), next_depth, max_depth)
return []
def _join_text_parts(parts: list[str]) -> str | None:
"""Join text parts with newlines, filtering empty strings.
Args:
parts: Candidate text segments.
Returns:
Joined string or None if the result is empty.
"""
joined = "\n".join(part for part in parts if part).strip()
return joined or None
def _extract_message_text(message: Any) -> str | None:
"""Extract plain text from a LiteLLM message object across providers."""
content: Any = None
if hasattr(message, "content"):
content = message.content
elif isinstance(message, dict):
content = message.get("content")
return _join_text_parts(_extract_text_parts(content))
def _extract_choice_text(choice: Any) -> str | None:
"""Extract plain text from a LiteLLM choice object.
Tries message.content first, then choice.text, then choice.delta. Handles both
object attributes and dict keys.
Args:
choice: LiteLLM choice object or dict.
Returns:
Extracted text or None if no content is found.
"""
message: Any = None
if hasattr(choice, "message"):
message = choice.message
elif isinstance(choice, dict):
message = choice.get("message")
content = _extract_message_text(message)
if content:
return content
if hasattr(choice, "text"):
content = _join_text_parts(_extract_text_parts(getattr(choice, "text")))
if content:
return content
if isinstance(choice, dict) and "text" in choice:
content = _join_text_parts(_extract_text_parts(choice.get("text")))
if content:
return content
if hasattr(choice, "delta"):
content = _join_text_parts(_extract_text_parts(getattr(choice, "delta")))
if content:
return content
if isinstance(choice, dict) and "delta" in choice:
content = _join_text_parts(_extract_text_parts(choice.get("delta")))
if content:
return content
return None
def _to_code_block(content: str | None, language: str = "text") -> str:
"""Wrap content in a markdown code block for client display."""
text = (content or "").strip()
if not text:
text = "<empty>"
return f"```{language}\n{text}\n```"
def _load_stored_config() -> dict:
"""Load config from config.json file."""
config_path = settings.config_path
if config_path.exists():
try:
return json.loads(config_path.read_text())
except (json.JSONDecodeError, OSError):
return {}
return {}
def get_llm_config() -> LLMConfig:
"""Get current LLM configuration.
Priority: config.json file > environment variables/settings
"""
stored = _load_stored_config()
return LLMConfig(
provider=stored.get("provider", settings.llm_provider),
model=stored.get("model", settings.llm_model),
api_key=stored.get("api_key", settings.llm_api_key),
api_base=stored.get("api_base", settings.llm_api_base),
)
def get_model_name(config: LLMConfig) -> str:
"""Convert provider/model to LiteLLM format.
For most providers, adds the provider prefix if not already present.
For OpenRouter, always adds 'openrouter/' prefix since OpenRouter models
use nested prefixes like 'openrouter/anthropic/claude-3.5-sonnet'.
"""
provider_prefixes = {
"openai": "", # OpenAI models don't need prefix
"anthropic": "anthropic/",
"openrouter": "openrouter/",
"gemini": "gemini/",
"deepseek": "deepseek/",
"ollama": "ollama/",
}
prefix = provider_prefixes.get(config.provider, "")
# OpenRouter is special: always add openrouter/ prefix unless already present
# OpenRouter models use nested format: openrouter/anthropic/claude-3.5-sonnet
if config.provider == "openrouter":
if config.model.startswith("openrouter/"):
return config.model
return f"openrouter/{config.model}"
# For other providers, don't add prefix if model already has a known prefix
known_prefixes = ["openrouter/", "anthropic/", "gemini/", "deepseek/", "ollama/"]
if any(config.model.startswith(p) for p in known_prefixes):
return config.model
# Add provider prefix for models that need it
return f"{prefix}{config.model}" if prefix else config.model
def _supports_temperature(provider: str, model: str) -> bool:
"""Return whether passing `temperature` is supported for this model/provider combo.
Some models (e.g., OpenAI gpt-5 family) reject temperature values other than 1,
and LiteLLM may error when temperature is passed.
"""
_ = provider
model_lower = model.lower()
if "gpt-5" in model_lower:
return False
return True
def _get_reasoning_effort(provider: str, model: str) -> str | None:
"""Return a default reasoning_effort for models that require it.
Some OpenAI gpt-5 models may return empty message.content unless a supported
`reasoning_effort` is explicitly set. This keeps downstream JSON parsing reliable.
"""
_ = provider
model_lower = model.lower()
if "gpt-5" in model_lower:
return "minimal"
return None
async def check_llm_health(
config: LLMConfig | None = None,
*,
include_details: bool = False,
test_prompt: str | None = None,
) -> dict[str, Any]:
"""Check if the LLM provider is accessible and working."""
if config is None:
config = get_llm_config()
# Check if API key is configured (except for Ollama)
if config.provider != "ollama" and not config.api_key:
return {
"healthy": False,
"provider": config.provider,
"model": config.model,
"error_code": "api_key_missing",
}
model_name = get_model_name(config)
prompt = test_prompt or "Hi"
try:
# Make a minimal test call with timeout
# Pass API key directly to avoid race conditions with global os.environ
kwargs: dict[str, Any] = {
"model": model_name,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 16,
"api_key": config.api_key,
"api_base": _normalize_api_base(config.provider, config.api_base),
"timeout": LLM_TIMEOUT_HEALTH_CHECK,
}
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
response = await litellm.acompletion(**kwargs)
content = _extract_choice_text(response.choices[0])
if not content:
# LLM-003: Empty response should mark health check as unhealthy
logging.warning(
"LLM health check returned empty content",
extra={"provider": config.provider, "model": config.model},
)
result: dict[str, Any] = {
"healthy": False, # Fixed: empty content means unhealthy
"provider": config.provider,
"model": config.model,
"response_model": response.model if response else None,
"error_code": "empty_content", # Changed from warning_code
"message": "LLM returned empty response",
}
if include_details:
result["test_prompt"] = _to_code_block(prompt)
result["model_output"] = _to_code_block(None)
return result
result = {
"healthy": True,
"provider": config.provider,
"model": config.model,
"response_model": response.model if response else None,
}
if include_details:
result["test_prompt"] = _to_code_block(prompt)
result["model_output"] = _to_code_block(content)
return result
except Exception as e:
# Log full exception details server-side, but do not expose them to clients
logging.exception(
"LLM health check failed",
extra={"provider": config.provider, "model": config.model},
)
# Provide a minimal, actionable client-facing hint without leaking secrets.
error_code = "health_check_failed"
message = str(e)
if "404" in message and "/v1/v1/" in message:
error_code = "duplicate_v1_path"
elif "404" in message:
error_code = "not_found_404"
elif "<!doctype html" in message.lower() or "<html" in message.lower():
error_code = "html_response"
result = {
"healthy": False,
"provider": config.provider,
"model": config.model,
"error_code": error_code,
}
if include_details:
result["test_prompt"] = _to_code_block(prompt)
result["model_output"] = _to_code_block(None)
result["error_detail"] = _to_code_block(message)
return result
async def complete(
prompt: str,
system_prompt: str | None = None,
config: LLMConfig | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> str:
"""Make a completion request to the LLM."""
if config is None:
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
if cf_orch_url:
try:
# Premium/ultra users get their personal fine-tuned writing model as the
# first candidate; the base model is the fallback so cf-orch can
# degrade gracefully if the personal model isn't loaded yet.
tier = get_request_tier()
writing_model = get_request_writing_model()
model_candidates: list[str] = (
[writing_model, "Qwen2.5-3B-Instruct"]
if writing_model and tier in _PREMIUM_TIERS
else ["Qwen2.5-3B-Instruct"]
)
async with _allocate_orch_async(
cf_orch_url,
"vllm",
model_candidates=model_candidates,
ttl_s=300.0,
caller="peregrine-resume-matcher",
) as alloc:
orch_config = LLMConfig(
provider="openai",
model="__auto__",
api_key="any",
api_base=alloc.url.rstrip("/") + "/v1",
)
return await complete(prompt, system_prompt, orch_config, max_tokens, temperature)
except Exception as exc:
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
config = get_llm_config()
model_name = get_model_name(config)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
# Pass API key directly to avoid race conditions with global os.environ
kwargs: dict[str, Any] = {
"model": model_name,
"messages": messages,
"max_tokens": max_tokens,
"api_key": config.api_key,
"api_base": _normalize_api_base(config.provider, config.api_base),
"timeout": LLM_TIMEOUT_COMPLETION,
}
if _supports_temperature(config.provider, model_name):
kwargs["temperature"] = temperature
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
response = await litellm.acompletion(**kwargs)
content = _extract_choice_text(response.choices[0])
if not content:
raise ValueError("Empty response from LLM")
return content
except Exception as e:
# Log the actual error server-side for debugging
logging.error(f"LLM completion failed: {e}", extra={"model": model_name})
raise ValueError(
"LLM completion failed. Please check your API configuration and try again."
) from e
def _supports_json_mode(provider: str, model: str) -> bool:
"""Check if the model supports JSON mode."""
# Models that support response_format={"type": "json_object"}
json_mode_providers = ["openai", "anthropic", "gemini", "deepseek"]
if provider in json_mode_providers:
return True
# LLM-004: OpenRouter models - use explicit allowlist instead of substring matching
if provider == "openrouter":
return model in OPENROUTER_JSON_CAPABLE_MODELS
return False
def _appears_truncated(data: dict) -> bool:
"""LLM-001: Check if JSON data appears to be truncated.
Detects suspicious patterns indicating incomplete responses.
"""
if not isinstance(data, dict):
return False
# Check for empty arrays that should typically have content
suspicious_empty_arrays = ["workExperience", "education", "skills"]
for key in suspicious_empty_arrays:
if key in data and data[key] == []:
# Log warning - these are rarely empty in real resumes
logging.warning(
"Possible truncation detected: '%s' is empty",
key,
)
return True
# Check for missing critical sections
required_top_level = ["personalInfo"]
for key in required_top_level:
if key not in data:
logging.warning(
"Possible truncation detected: missing required section '%s'",
key,
)
return True
return False
def _get_retry_temperature(attempt: int, base_temp: float = 0.1) -> float:
"""LLM-002: Get temperature for retry attempt - increases with each retry.
Higher temperature on retries gives the model more variation to produce
different (hopefully valid) output.
"""
temperatures = [base_temp, 0.3, 0.5, 0.7]
return temperatures[min(attempt, len(temperatures) - 1)]
def _calculate_timeout(
operation: str,
max_tokens: int = 4096,
provider: str = "openai",
) -> int:
"""LLM-005: Calculate adaptive timeout based on operation and parameters."""
base_timeouts = {
"health_check": LLM_TIMEOUT_HEALTH_CHECK,
"completion": LLM_TIMEOUT_COMPLETION,
"json": LLM_TIMEOUT_JSON,
}
base = base_timeouts.get(operation, LLM_TIMEOUT_COMPLETION)
# Scale by token count (relative to 4096 baseline)
token_factor = max(1.0, max_tokens / 4096)
# Provider-specific latency adjustments
provider_factors = {
"openai": 1.0,
"anthropic": 1.2,
"openrouter": 1.5, # More variable latency
"ollama": 2.0, # Local models can be slower
}
provider_factor = provider_factors.get(provider, 1.0)
return int(base * token_factor * provider_factor)
def _extract_json(content: str, _depth: int = 0) -> str:
"""Extract JSON from LLM response, handling various formats.
LLM-001: Improved to detect and reject likely truncated JSON.
LLM-007: Improved error messages for debugging.
JSON-010: Added recursion depth and size limits.
"""
# JSON-010: Safety limits
if _depth > MAX_JSON_EXTRACTION_RECURSION:
raise ValueError(f"JSON extraction exceeded max recursion depth: {_depth}")
if len(content) > MAX_JSON_CONTENT_SIZE:
raise ValueError(f"Content too large for JSON extraction: {len(content)} bytes")
original = content
# Remove markdown code blocks
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
parts = content.split("```")
if len(parts) >= 2:
content = parts[1]
# Remove language identifier if present (e.g., "json\n{...")
if content.startswith(("json", "JSON")):
content = content[4:]
content = content.strip()
# If content starts with {, find the matching }
if content.startswith("{"):
depth = 0
end_idx = -1
in_string = False
escape_next = False
for i, char in enumerate(content):
if escape_next:
escape_next = False
continue
if char == "\\":
escape_next = True
continue
if char == '"' and not escape_next:
in_string = not in_string
continue
if in_string:
continue
if char == "{":
depth += 1
elif char == "}":
depth -= 1
if depth == 0:
end_idx = i
break
# LLM-001: Check for unbalanced braces - loop ended without depth reaching 0
if end_idx == -1 and depth != 0:
logging.warning(
"JSON extraction found unbalanced braces (depth=%d), possible truncation",
depth,
)
if end_idx != -1:
return content[: end_idx + 1]
# Try to find JSON object in the content (only if not already at start)
start_idx = content.find("{")
if start_idx > 0:
# Only recurse if { is found after position 0 to avoid infinite recursion
return _extract_json(content[start_idx:], _depth + 1)
# LLM-007: Log unrecognized format for debugging
logging.error(
"Could not extract JSON from response format. Content preview: %s",
content[:200] if content else "<empty>",
)
raise ValueError(f"No JSON found in response: {original[:200]}")
async def complete_json(
prompt: str,
system_prompt: str | None = None,
config: LLMConfig | None = None,
max_tokens: int = 4096,
retries: int = 2,
) -> dict[str, Any]:
"""Make a completion request expecting JSON response.
Uses JSON mode when available, with retry logic for reliability.
"""
if config is None:
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
if cf_orch_url:
try:
async with _allocate_orch_async(
cf_orch_url,
"vllm",
model_candidates=["Qwen2.5-3B-Instruct"],
ttl_s=300.0,
caller="peregrine-resume-matcher",
) as alloc:
orch_config = LLMConfig(
provider="openai",
model="__auto__",
api_key="any",
api_base=alloc.url.rstrip("/") + "/v1",
)
return await complete_json(prompt, system_prompt, orch_config, max_tokens, retries)
except Exception as exc:
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
config = get_llm_config()
model_name = get_model_name(config)
# Build messages
json_system = (
system_prompt or ""
) + "\n\nYou must respond with valid JSON only. No explanations, no markdown."
messages = [
{"role": "system", "content": json_system},
{"role": "user", "content": prompt},
]
# Check if we can use JSON mode
use_json_mode = _supports_json_mode(config.provider, config.model)
last_error = None
for attempt in range(retries + 1):
try:
# Build request kwargs
# Pass API key directly to avoid race conditions with global os.environ
kwargs: dict[str, Any] = {
"model": model_name,
"messages": messages,
"max_tokens": max_tokens,
"api_key": config.api_key,
"api_base": _normalize_api_base(config.provider, config.api_base),
"timeout": _calculate_timeout("json", max_tokens, config.provider),
}
if _supports_temperature(config.provider, model_name):
# LLM-002: Increase temperature on retry for variation
kwargs["temperature"] = _get_retry_temperature(attempt)
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
# Add JSON mode if supported
if use_json_mode:
kwargs["response_format"] = {"type": "json_object"}
response = await litellm.acompletion(**kwargs)
content = _extract_choice_text(response.choices[0])
if not content:
raise ValueError("Empty response from LLM")
logging.debug(f"LLM response (attempt {attempt + 1}): {content[:300]}")
# Extract and parse JSON
json_str = _extract_json(content)
result = json.loads(json_str)
# LLM-001: Check if parsed result appears truncated
if isinstance(result, dict) and _appears_truncated(result):
logging.warning(
"Parsed JSON appears truncated, but proceeding with result"
)
return result
except json.JSONDecodeError as e:
last_error = e
logging.warning(f"JSON parse failed (attempt {attempt + 1}): {e}")
if attempt < retries:
# Add hint to prompt for retry
messages[-1]["content"] = (
prompt
+ "\n\nIMPORTANT: Output ONLY a valid JSON object. Start with { and end with }."
)
continue
raise ValueError(f"Failed to parse JSON after {retries + 1} attempts: {e}")
except Exception as e:
last_error = e
logging.warning(f"LLM call failed (attempt {attempt + 1}): {e}")
if attempt < retries:
continue
raise
raise ValueError(f"Failed after {retries + 1} attempts: {last_error}")

View file

@ -1,88 +0,0 @@
"""FastAPI application entry point."""
import asyncio
import logging
import sys
from contextlib import asynccontextmanager
from fastapi import Depends, FastAPI
# Fix for Windows: Use ProactorEventLoop for subprocess support (Playwright)
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
logger = logging.getLogger(__name__)
from fastapi.middleware.cors import CORSMiddleware
from app import __version__
from app.cloud_session import session_middleware_dep
from app.config import settings
from app.database import db
from app.pdf import close_pdf_renderer, init_pdf_renderer
from app.routers import config_router, enrichment_router, health_router, jobs_router, resumes_router
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager."""
# Startup
settings.data_dir.mkdir(parents=True, exist_ok=True)
# PDF renderer uses lazy initialization - will initialize on first use
# await init_pdf_renderer()
yield
# Shutdown - wrap each cleanup in try-except to ensure all resources are released
try:
await close_pdf_renderer()
except Exception as e:
logger.error(f"Error closing PDF renderer: {e}")
try:
db.close()
except Exception as e:
logger.error(f"Error closing database: {e}")
app = FastAPI(
title="Resume Matcher API",
description="AI-powered resume tailoring for job descriptions",
version=__version__,
lifespan=lifespan,
dependencies=[Depends(session_middleware_dep)],
)
# CORS middleware - origins configurable via CORS_ORIGINS env var
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include routers
app.include_router(health_router, prefix="/api/v1")
app.include_router(config_router, prefix="/api/v1")
app.include_router(resumes_router, prefix="/api/v1")
app.include_router(jobs_router, prefix="/api/v1")
app.include_router(enrichment_router, prefix="/api/v1")
@app.get("/")
async def root():
"""Root endpoint."""
return {
"name": "Resume Matcher API",
"version": __version__,
"docs": "/docs",
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host=settings.host,
port=settings.port,
reload=True,
)

View file

@ -14,6 +14,7 @@ Enhanced features:
import argparse
import csv
import json
import os
import random
import re

View file

@ -31,6 +31,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.classifier_adapters import (
LABELS,
LABEL_DESCRIPTIONS,
ClassifierAdapter,
GLiClassAdapter,
RerankerAdapter,

View file

@ -5,6 +5,7 @@ push updates the existing event rather than creating a duplicate.
"""
from __future__ import annotations
import uuid
import yaml
from datetime import datetime, timedelta, timezone
from pathlib import Path

View file

@ -277,8 +277,7 @@ def _load_resume_and_keywords() -> tuple[dict, list[str]]:
return resume, keywords
def research_company(job: dict, use_scraper: bool = True, on_stage=None,
config_path: "Path | None" = None) -> dict:
def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict:
"""
Generate a pre-interview research brief for a job.
@ -296,7 +295,7 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None,
"""
from scripts.llm_router import LLMRouter
router = LLMRouter(config_path=config_path) if config_path else LLMRouter()
router = LLMRouter()
research_order = router.config.get("research_fallback_order") or router.config["fallback_order"]
company = job.get("company") or "the company"
title = job.get("title") or "this role"

View file

@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
print(f" [adzuna] Skipped — {exc}")
return []
titles = profile.get("titles") or profile.get("job_titles", [])
titles = profile.get("titles", [])
hours_old = profile.get("hours_old", 240)
max_days_old = max(1, hours_old // 24)
is_remote_search = location.lower() == "remote"

View file

@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
return []
metros = [metro]
titles: list[str] = profile.get("titles") or profile.get("job_titles", [])
titles: list[str] = profile.get("titles", [])
hours_old: int = profile.get("hours_old", 240)
cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)

View file

@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
)
page = ctx.new_page()
for title in (profile.get("titles") or profile.get("job_titles", [])):
for title in profile.get("titles", []):
if len(results) >= results_wanted:
break

View file

@ -9,14 +9,30 @@ from datetime import datetime
from pathlib import Path
from typing import Optional
from circuitforge_core.db import get_connection as _cf_get_connection
DEFAULT_DB = Path(os.environ.get("STAGING_DB", Path(__file__).parent.parent / "staging.db"))
def get_connection(db_path: Path = DEFAULT_DB, key: str = "") -> "sqlite3.Connection":
"""Thin shim — delegates to circuitforge_core.db.get_connection."""
return _cf_get_connection(db_path, key)
"""
Open a database connection.
In cloud mode with a key: uses SQLCipher (AES-256 encrypted, API-identical to sqlite3).
Otherwise: vanilla sqlite3.
Args:
db_path: Path to the SQLite/SQLCipher database file.
key: SQLCipher encryption key (hex string). Empty = unencrypted.
"""
import os as _os
cloud_mode = _os.environ.get("CLOUD_MODE", "").lower() in ("1", "true", "yes")
if cloud_mode and key:
from pysqlcipher3 import dbapi2 as _sqlcipher
conn = _sqlcipher.connect(str(db_path))
conn.execute(f"PRAGMA key='{key}'")
return conn
else:
import sqlite3 as _sqlite3
return _sqlite3.connect(str(db_path))
CREATE_JOBS = """
@ -130,32 +146,6 @@ CREATE TABLE IF NOT EXISTS digest_queue (
)
"""
CREATE_REFERENCES = """
CREATE TABLE IF NOT EXISTS references_ (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
relationship TEXT,
company TEXT,
email TEXT,
phone TEXT,
notes TEXT,
tags TEXT DEFAULT '[]',
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now'))
);
"""
CREATE_JOB_REFERENCES = """
CREATE TABLE IF NOT EXISTS job_references (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
reference_id INTEGER NOT NULL REFERENCES references_(id) ON DELETE CASCADE,
prep_email TEXT,
rec_letter TEXT,
UNIQUE(job_id, reference_id)
);
"""
_MIGRATIONS = [
("cover_letter", "TEXT"),
("applied_at", "TEXT"),
@ -167,11 +157,6 @@ _MIGRATIONS = [
("hired_at", "TEXT"),
("survey_at", "TEXT"),
("calendar_event_id", "TEXT"),
("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier)
("ats_gap_report", "TEXT"), # JSON gap report (free tier)
("date_posted", "TEXT"), # Original posting date from job board (shadow listing detection)
("hired_feedback", "TEXT"), # JSON: optional post-hire "what helped" response
("excluded_from_training", "INTEGER DEFAULT 0"), # opt-out of training export
]
@ -205,9 +190,6 @@ def _migrate_db(db_path: Path) -> None:
conn.execute("ALTER TABLE background_tasks ADD COLUMN params TEXT")
except sqlite3.OperationalError:
pass # column already exists
# Ensure references tables exist (CREATE IF NOT EXISTS is idempotent)
conn.execute(CREATE_REFERENCES)
conn.execute(CREATE_JOB_REFERENCES)
conn.commit()
conn.close()
@ -221,8 +203,6 @@ def init_db(db_path: Path = DEFAULT_DB) -> None:
conn.execute(CREATE_BACKGROUND_TASKS)
conn.execute(CREATE_SURVEY_RESPONSES)
conn.execute(CREATE_DIGEST_QUEUE)
conn.execute(CREATE_REFERENCES)
conn.execute(CREATE_JOB_REFERENCES)
conn.commit()
conn.close()
_migrate_db(db_path)
@ -234,11 +214,10 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
return None
conn = sqlite3.connect(db_path)
try:
status = job.get("status", "pending")
cursor = conn.execute(
"""INSERT INTO jobs
(title, company, url, source, location, is_remote, salary, description, date_found, date_posted, status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(title, company, url, source, location, is_remote, salary, description, date_found)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
job.get("title", ""),
job.get("company", ""),
@ -249,8 +228,6 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
job.get("salary", ""),
job.get("description", ""),
job.get("date_found", ""),
job.get("date_posted", "") or "",
status,
),
)
conn.commit()
@ -350,128 +327,6 @@ def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: st
conn.close()
def save_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
text: str = "", gap_report: str = "") -> None:
"""Persist ATS-optimized resume text and/or gap report for a job."""
if job_id is None:
return
conn = sqlite3.connect(db_path)
conn.execute(
"UPDATE jobs SET optimized_resume = ?, ats_gap_report = ? WHERE id = ?",
(text or None, gap_report or None, job_id),
)
conn.commit()
conn.close()
def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
"""Return optimized_resume and ats_gap_report for a job, or empty strings if absent."""
if job_id is None:
return {"optimized_resume": "", "ats_gap_report": ""}
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT optimized_resume, ats_gap_report FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.close()
if not row:
return {"optimized_resume": "", "ats_gap_report": ""}
return {
"optimized_resume": row["optimized_resume"] or "",
"ats_gap_report": row["ats_gap_report"] or "",
}
def save_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None,
draft_json: str = "") -> None:
"""Persist a structured resume review draft (awaiting user approval)."""
if job_id is None:
return
conn = sqlite3.connect(db_path)
conn.execute(
"UPDATE jobs SET resume_draft_json = ? WHERE id = ?",
(draft_json or None, job_id),
)
conn.commit()
conn.close()
def get_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict | None:
"""Return the pending review draft, or None if no draft is waiting."""
if job_id is None:
return None
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT resume_draft_json FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.close()
if not row or not row["resume_draft_json"]:
return None
import json
try:
return json.loads(row["resume_draft_json"])
except Exception:
return None
def finalize_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
final_text: str = "") -> None:
"""Save approved resume text, archive the previous version, and clear draft."""
if job_id is None:
return
import json
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT optimized_resume, resume_archive_json FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.row_factory = None
# Archive current finalized version if present
archive: list = []
if row:
if row["resume_archive_json"]:
try:
archive = json.loads(row["resume_archive_json"])
except Exception:
archive = []
if row["optimized_resume"]:
from datetime import datetime
archive.append({
"archived_at": datetime.now().isoformat()[:16],
"text": row["optimized_resume"],
})
conn.execute(
"UPDATE jobs SET optimized_resume = ?, resume_draft_json = NULL, "
"resume_archive_json = ? WHERE id = ?",
(final_text, json.dumps(archive), job_id),
)
conn.commit()
conn.close()
def get_resume_archive(db_path: Path = DEFAULT_DB, job_id: int = None) -> list:
"""Return list of past finalized resume versions (newest archived first)."""
if job_id is None:
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT resume_archive_json FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.close()
if not row or not row["resume_archive_json"]:
return []
import json
try:
entries = json.loads(row["resume_archive_json"])
return list(reversed(entries)) # newest first
except Exception:
return []
_UPDATABLE_JOB_COLS = {
"title", "company", "url", "source", "location", "is_remote",
"salary", "description", "match_score", "keyword_gaps",
@ -510,19 +365,6 @@ def mark_applied(db_path: Path = DEFAULT_DB, ids: list[int] = None) -> None:
conn.close()
def cancel_task(db_path: Path = DEFAULT_DB, task_id: int = 0) -> bool:
"""Cancel a single queued/running task by id. Returns True if a row was updated."""
conn = sqlite3.connect(db_path)
count = conn.execute(
"UPDATE background_tasks SET status='failed', error='Cancelled by user',"
" finished_at=datetime('now') WHERE id=? AND status IN ('queued','running')",
(task_id,),
).rowcount
conn.commit()
conn.close()
return count > 0
def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int:
"""Mark all queued/running background tasks as failed. Returns count killed."""
conn = sqlite3.connect(db_path)
@ -958,286 +800,3 @@ def get_task_for_job(db_path: Path = DEFAULT_DB, task_type: str = "",
).fetchone()
conn.close()
return dict(row) if row else None
# ── Resume library helpers ────────────────────────────────────────────────────
def _resume_as_dict(row) -> dict:
"""Convert a sqlite3.Row from the resumes table to a plain dict."""
return {
"id": row["id"],
"name": row["name"],
"source": row["source"],
"job_id": row["job_id"],
"text": row["text"],
"struct_json": row["struct_json"],
"word_count": row["word_count"],
"is_default": row["is_default"],
"created_at": row["created_at"],
"updated_at": row["updated_at"],
"synced_at": row["synced_at"] if "synced_at" in row.keys() else None,
}
def create_resume(
db_path: Path = DEFAULT_DB,
name: str = "",
text: str = "",
source: str = "manual",
job_id: int | None = None,
struct_json: str | None = None,
) -> dict:
"""Insert a new resume into the library. Returns the created row as a dict."""
word_count = len(text.split()) if text else 0
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
cur = conn.execute(
"""INSERT INTO resumes (name, source, job_id, text, struct_json, word_count)
VALUES (?, ?, ?, ?, ?, ?)""",
(name, source, job_id, text, struct_json, word_count),
)
conn.commit()
row = conn.execute("SELECT * FROM resumes WHERE id=?", (cur.lastrowid,)).fetchone()
return _resume_as_dict(row)
finally:
conn.close()
def list_resumes(db_path: Path = DEFAULT_DB) -> list[dict]:
"""Return all resumes ordered by default-first then newest-first."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT * FROM resumes ORDER BY is_default DESC, created_at DESC"
).fetchall()
return [_resume_as_dict(r) for r in rows]
finally:
conn.close()
def get_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> dict | None:
"""Return a single resume by id, or None if not found."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
return _resume_as_dict(row) if row else None
finally:
conn.close()
def update_resume(
db_path: Path = DEFAULT_DB,
resume_id: int = 0,
name: str | None = None,
text: str | None = None,
) -> dict | None:
"""Update name and/or text of a resume. Returns updated row or None."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
if name is not None:
conn.execute(
"UPDATE resumes SET name=?, updated_at=datetime('now') WHERE id=?",
(name, resume_id),
)
if text is not None:
word_count = len(text.split())
conn.execute(
"UPDATE resumes SET text=?, word_count=?, updated_at=datetime('now') WHERE id=?",
(text, word_count, resume_id),
)
conn.commit()
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
return _resume_as_dict(row) if row else None
finally:
conn.close()
def delete_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
"""Delete a resume by id."""
conn = sqlite3.connect(db_path)
try:
conn.execute("DELETE FROM resumes WHERE id=?", (resume_id,))
conn.commit()
finally:
conn.close()
def set_default_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
"""Set one resume as default, clearing the flag on all others."""
conn = sqlite3.connect(db_path)
try:
conn.execute("UPDATE resumes SET is_default=0")
conn.execute("UPDATE resumes SET is_default=1 WHERE id=?", (resume_id,))
conn.commit()
finally:
conn.close()
def update_resume_synced_at(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
"""Mark a library entry as synced to the profile (library→profile direction)."""
conn = sqlite3.connect(db_path)
try:
conn.execute(
"UPDATE resumes SET synced_at=datetime('now') WHERE id=?",
(resume_id,),
)
conn.commit()
finally:
conn.close()
def update_resume_content(
db_path: Path = DEFAULT_DB,
resume_id: int = 0,
text: str = "",
struct_json: str | None = None,
) -> None:
"""Update text, struct_json, and synced_at for a library entry.
Called by the profilelibrary sync path (PUT /api/settings/resume).
"""
word_count = len(text.split()) if text else 0
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""UPDATE resumes
SET text=?, struct_json=?, word_count=?,
synced_at=datetime('now'), updated_at=datetime('now')
WHERE id=?""",
(text, struct_json, word_count, resume_id),
)
conn.commit()
finally:
conn.close()
def get_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0) -> dict | None:
"""Return the resume for a job: job-specific first, then default, then None."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
row = conn.execute(
"""SELECT r.* FROM resumes r
JOIN jobs j ON j.resume_id = r.id
WHERE j.id=?""",
(job_id,),
).fetchone()
if row:
return _resume_as_dict(row)
row = conn.execute(
"SELECT * FROM resumes WHERE is_default=1 LIMIT 1"
).fetchone()
return _resume_as_dict(row) if row else None
finally:
conn.close()
def set_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0, resume_id: int = 0) -> None:
"""Attach a specific resume to a job (overrides default for that job)."""
conn = sqlite3.connect(db_path)
try:
conn.execute("UPDATE jobs SET resume_id=? WHERE id=?", (resume_id, job_id))
conn.commit()
finally:
conn.close()
# ── Training export helpers ───────────────────────────────────────────────────
def _strip_greeting(text: str) -> str:
"""Remove 'Dear X,' greeting line from cover letter text."""
lines = text.splitlines()
for i, line in enumerate(lines):
stripped_line = line.strip()
if stripped_line.lower().startswith("dear ") and stripped_line.endswith((",", ":")):
rest = lines[i + 1:]
while rest and not rest[0].strip():
rest = rest[1:]
result = "\n".join(rest).strip()
return result if result else text.strip()
return text.strip()
def get_db_pairs(db_path: Path) -> list[dict]:
"""Return curation metadata for ALL qualifying jobs (included and excluded).
Used by the curation UI. Includes excluded=True rows so users can restore them.
"""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT id, title, company, description, status, "
" excluded_from_training "
"FROM jobs "
"WHERE status IN ('applied','phone_screen','interviewing','offer','hired') "
" AND cover_letter IS NOT NULL AND cover_letter != '' "
"ORDER BY applied_at DESC",
).fetchall()
finally:
conn.close()
return [
{
"job_id": row["id"],
"title": row["title"] or "",
"company": row["company"] or "",
"status": row["status"],
"instruction": (
f"Write a cover letter for the {row['title'] or 'unknown'} "
f"position at {row['company'] or 'unknown'}."
),
"input_preview": (row["description"] or "")[:200],
"excluded": bool(row["excluded_from_training"]),
}
for row in rows
]
def get_training_pairs(db_path: Path) -> list[dict]:
"""Return Alpaca-format training pairs for non-excluded qualifying jobs.
Used by the JSONL export endpoint.
"""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT id, title, company, description, cover_letter "
"FROM jobs "
"WHERE status IN ('applied','phone_screen','interviewing','offer','hired') "
" AND cover_letter IS NOT NULL AND cover_letter != '' "
" AND excluded_from_training = 0 "
"ORDER BY applied_at DESC",
).fetchall()
finally:
conn.close()
return [
{
"instruction": (
f"Write a cover letter for the {row['title'] or 'unknown'} "
f"position at {row['company'] or 'unknown'}."
),
"input": row["description"] or "",
"output": _strip_greeting(row["cover_letter"]),
"source": "db",
"job_id": row["id"],
}
for row in rows
]
def set_training_exclusion(db_path: Path, job_id: int, excluded: bool) -> None:
"""Set excluded_from_training flag on a job."""
conn = sqlite3.connect(db_path)
try:
conn.execute(
"UPDATE jobs SET excluded_from_training = ? WHERE id = ?",
(1 if excluded else 0, job_id),
)
conn.commit()
finally:
conn.close()

View file

@ -1,122 +0,0 @@
"""
db_migrate.py Rails-style numbered SQL migration runner for Peregrine user DBs.
Migration files live in migrations/ (sibling to this script's parent directory),
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
order and tracked in the schema_migrations table so each runs exactly once.
Usage:
from scripts.db_migrate import migrate_db
migrate_db(Path("/path/to/user.db"))
"""
import logging
import sqlite3
from pathlib import Path
log = logging.getLogger(__name__)
# Resolved at import time: peregrine repo root / migrations/
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
_CREATE_MIGRATIONS_TABLE = """
CREATE TABLE IF NOT EXISTS schema_migrations (
version TEXT PRIMARY KEY,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)
"""
def migrate_db(db_path: Path) -> list[str]:
"""Apply any pending migrations to db_path. Returns list of applied versions."""
applied: list[str] = []
con = sqlite3.connect(db_path)
try:
con.execute(_CREATE_MIGRATIONS_TABLE)
con.commit()
if not _MIGRATIONS_DIR.is_dir():
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
return applied
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
if not migration_files:
return applied
already_applied = {
row[0] for row in con.execute("SELECT version FROM schema_migrations")
}
for path in migration_files:
version = path.stem # e.g. "001_baseline"
if version in already_applied:
continue
sql = path.read_text(encoding="utf-8")
log.info("Applying migration %s to %s", version, db_path.name)
try:
# Execute statements individually so that ALTER TABLE ADD COLUMN
# errors caused by already-existing columns (pre-migration DBs
# created from a newer schema) are treated as no-ops rather than
# fatal failures.
statements = [s.strip() for s in sql.split(";") if s.strip()]
for stmt in statements:
# Strip leading SQL comment lines (-- ...) before processing.
# Checking startswith("--") on the raw chunk would skip entire
# multi-line statements whose first line is a comment.
stripped_lines = [
ln for ln in stmt.splitlines()
if not ln.strip().startswith("--")
]
stmt = "\n".join(stripped_lines).strip()
if not stmt:
continue
# Pre-check: if this is ADD COLUMN and the column already exists, skip.
# This guards against schema_migrations being ahead of the actual schema
# (e.g. DB reset after migrations were recorded).
stmt_upper = stmt.upper()
if "ALTER TABLE" in stmt_upper and "ADD COLUMN" in stmt_upper:
# Extract table name and column name from the statement
import re as _re
m = _re.match(
r"ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)",
stmt, _re.IGNORECASE
)
if m:
tbl, col = m.group(1), m.group(2)
existing = {
row[1]
for row in con.execute(f"PRAGMA table_info({tbl})")
}
if col in existing:
log.info(
"Migration %s: column %s.%s already exists, skipping",
version, tbl, col,
)
continue
try:
con.execute(stmt)
except sqlite3.OperationalError as stmt_exc:
msg = str(stmt_exc).lower()
if "duplicate column name" in msg or "already exists" in msg:
log.info(
"Migration %s: statement already applied, skipping: %s",
version, stmt_exc,
)
else:
raise
con.execute(
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
)
con.commit()
applied.append(version)
log.info("Migration %s applied successfully", version)
except Exception as exc:
con.rollback()
log.error("Migration %s failed: %s", version, exc)
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
finally:
con.close()
return applied

View file

@ -34,48 +34,17 @@ CUSTOM_SCRAPERS: dict[str, object] = {
}
def _normalize_profiles(raw: dict) -> dict:
"""Normalize search_profiles.yaml to the canonical {profiles: [...]} format.
The onboarding wizard (pre-fix) wrote a flat `default: {...}` structure.
Canonical format is `profiles: [{name, titles/job_titles, boards, ...}]`.
This converts on load so both formats work without a migration.
"""
if "profiles" in raw:
return raw
# Wizard-written format: top-level keys are profile names (usually "default")
profiles = []
for name, body in raw.items():
if not isinstance(body, dict):
continue
# job_boards: [{name, enabled}] → boards: [name] (enabled only)
job_boards = body.pop("job_boards", None)
if job_boards and "boards" not in body:
body["boards"] = [b["name"] for b in job_boards if b.get("enabled", True)]
# blocklist_* keys live in load_blocklist, not per-profile — drop them
body.pop("blocklist_companies", None)
body.pop("blocklist_industries", None)
body.pop("blocklist_locations", None)
profiles.append({"name": name, **body})
return {"profiles": profiles}
def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
cfg = config_dir or CONFIG_DIR
profiles_path = cfg / "search_profiles.yaml"
notion_path = cfg / "notion.yaml"
raw = yaml.safe_load(profiles_path.read_text()) or {}
profiles = _normalize_profiles(raw)
notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
def load_config() -> tuple[dict, dict]:
profiles = yaml.safe_load(PROFILES_CFG.read_text())
notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
return profiles, notion_cfg
def load_blocklist(config_dir: Path | None = None) -> dict:
def load_blocklist() -> dict:
"""Load global blocklist config. Returns dict with companies, industries, locations lists."""
blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml"
if not blocklist_path.exists():
if not BLOCKLIST_CFG.exists():
return {"companies": [], "industries": [], "locations": []}
raw = yaml.safe_load(blocklist_path.read_text()) or {}
raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
return {
"companies": [c.lower() for c in raw.get("companies", []) if c],
"industries": [i.lower() for i in raw.get("industries", []) if i],
@ -148,15 +117,10 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
)
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None:
# In cloud mode, config_dir is the per-user config directory derived from db_path.
# Falls back to the app-level /app/config for single-tenant deployments.
resolved_cfg = config_dir or Path(db_path).parent / "config"
if not resolved_cfg.exists():
resolved_cfg = CONFIG_DIR
profiles_cfg, notion_cfg = load_config(resolved_cfg)
fm = notion_cfg.get("field_map") or {}
blocklist = load_blocklist(resolved_cfg)
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
profiles_cfg, notion_cfg = load_config()
fm = notion_cfg["field_map"]
blocklist = load_blocklist()
_bl_summary = {k: len(v) for k, v in blocklist.items() if v}
if _bl_summary:
@ -239,44 +203,15 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
_rp = profile.get("remote_preference", "both")
_is_remote: bool | None = True if _rp == "remote" else (False if _rp == "onsite" else None)
# When filtering for remote-only, also drop hybrid roles at the description level.
# Job boards (especially LinkedIn) tag hybrid listings as is_remote=True, so the
# board-side filter alone is not reliable. We match specific work-arrangement
# phrases to avoid false positives like "hybrid cloud" or "hybrid architecture".
_HYBRID_PHRASES = [
"hybrid role", "hybrid position", "hybrid work", "hybrid schedule",
"hybrid model", "hybrid arrangement", "hybrid opportunity",
"in-office/remote", "in office/remote", "remote/in-office",
"remote/office", "office/remote",
"days in office", "days per week in", "days onsite", "days on-site",
"required to be in office", "required in office",
]
if _rp == "remote":
exclude_kw = exclude_kw + _HYBRID_PHRASES
for location in profile["locations"]:
# ── JobSpy boards ──────────────────────────────────────────────────
if boards:
# Validate boards against the installed JobSpy Site enum.
# One unsupported name in the list aborts the entire scrape_jobs() call.
try:
from jobspy import Site as _Site
_valid = {s.value for s in _Site}
_filtered = [b for b in boards if b in _valid]
_dropped = [b for b in boards if b not in _valid]
if _dropped:
print(f" [jobspy] Skipping unsupported boards: {', '.join(_dropped)}")
except ImportError:
_filtered = boards # fallback: pass through unchanged
if not _filtered:
print(f" [jobspy] No valid boards for {location} — skipping")
continue
print(f" [jobspy] {location} — boards: {', '.join(_filtered)}")
print(f" [jobspy] {location} — boards: {', '.join(boards)}")
try:
jobspy_kwargs: dict = dict(
site_name=_filtered,
search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
site_name=boards,
search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
location=location,
results_wanted=results_per_board,
hours_old=profile.get("hours_old", 72),
@ -307,10 +242,6 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
elif job_dict.get("salary_source") and str(job_dict["salary_source"]) not in ("nan", "None", ""):
salary_str = str(job_dict["salary_source"])
_dp = job_dict.get("date_posted")
date_posted_str = (
_dp.isoformat() if hasattr(_dp, "isoformat") else str(_dp)
) if _dp and str(_dp) not in ("nan", "None", "") else ""
row = {
"url": url,
"title": _s(job_dict.get("title")),
@ -320,7 +251,6 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
"is_remote": bool(job_dict.get("is_remote", False)),
"salary": salary_str,
"description": _s(job_dict.get("description")),
"date_posted": date_posted_str,
"_exclude_kw": exclude_kw,
}
if _insert_if_new(row, _s(job_dict.get("site"))):

View file

@ -323,6 +323,6 @@ if gguf_path and gguf_path.exists():
else:
print(f"\n{'='*60}")
print(" Adapter saved (no GGUF produced).")
print(" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
print(f" Adapter path: {adapter_path}")
print(f"{'='*60}\n")

View file

@ -16,8 +16,6 @@ import re
import sys
from pathlib import Path
import yaml
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.user_profile import UserProfile
@ -28,89 +26,130 @@ LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "Jo
LETTER_GLOB = "*Cover Letter*.md"
# Background injected into every prompt so the model has the candidate's facts
def _build_system_context(profile=None) -> str:
p = profile or _profile
if not p:
def _build_system_context() -> str:
if not _profile:
return "You are a professional cover letter writer. Write in first person."
parts = [f"You are writing cover letters for {p.name}. {p.career_summary}"]
if p.candidate_voice:
parts = [f"You are writing cover letters for {_profile.name}. {_profile.career_summary}"]
if _profile.candidate_voice:
parts.append(
f"Voice and personality: {p.candidate_voice} "
f"Voice and personality: {_profile.candidate_voice} "
"Write in a way that reflects these authentic traits — not as a checklist, "
"but as a natural expression of who this person is."
)
return " ".join(parts)
SYSTEM_CONTEXT = _build_system_context()
_candidate = _profile.name if _profile else "the candidate"
# ── Mission-alignment detection ───────────────────────────────────────────────
# Domains and their keyword signals are loaded from config/mission_domains.yaml.
# When a company/JD signals one of these preferred industries, the cover letter
# prompt injects a hint so Para 3 can reflect genuine personal connection.
# This does NOT disclose any personal disability or family information.
_MISSION_DOMAINS_PATH = Path(__file__).parent.parent / "config" / "mission_domains.yaml"
def load_mission_domains(path: Path | None = None) -> dict[str, dict]:
"""Load mission domain config from YAML. Returns dict keyed by domain name."""
p = path or _MISSION_DOMAINS_PATH
if not p.exists():
return {}
with p.open(encoding="utf-8") as fh:
data = yaml.safe_load(fh)
return data.get("domains", {}) if data else {}
_MISSION_DOMAINS: dict[str, dict] = load_mission_domains()
_MISSION_SIGNALS: dict[str, list[str]] = {
domain: cfg.get("signals", []) for domain, cfg in _MISSION_DOMAINS.items()
"music": [
"music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music",
"distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl",
"streaming", "artist", "label", "live nation", "ticketmaster", "aeg",
"songkick", "concert", "venue", "festival", "audio", "podcast",
"studio", "record", "musician", "playlist",
],
"animal_welfare": [
"animal", "shelter", "rescue", "humane society", "spca", "aspca",
"veterinary", "vet ", "wildlife", "pet ", "adoption", "foster",
"dog", "cat", "feline", "canine", "sanctuary", "zoo",
],
"education": [
"education", "school", "learning", "student", "edtech", "classroom",
"curriculum", "tutoring", "academic", "university", "kids", "children",
"youth", "literacy", "khan academy", "duolingo", "chegg", "coursera",
"instructure", "canvas lms", "clever", "district", "teacher",
"k-12", "k12", "grade", "pedagogy",
],
"social_impact": [
"nonprofit", "non-profit", "501(c)", "social impact", "mission-driven",
"public benefit", "community", "underserved", "equity", "justice",
"humanitarian", "advocacy", "charity", "foundation", "ngo",
"social good", "civic", "public health", "mental health", "food security",
"housing", "homelessness", "poverty", "workforce development",
],
# Health is listed last — it's a genuine but lower-priority connection than
# music/animals/education/social_impact. detect_mission_alignment returns on first
# match, so dict order = preference order.
"health": [
"patient", "patients", "healthcare", "health tech", "healthtech",
"pharma", "pharmaceutical", "clinical", "medical",
"hospital", "clinic", "therapy", "therapist",
"rare disease", "life sciences", "life science",
"treatment", "prescription", "biotech", "biopharma", "medtech",
"behavioral health", "population health",
"care management", "care coordination", "oncology", "specialty pharmacy",
"provider network", "payer", "health plan", "benefits administration",
"ehr", "emr", "fhir", "hipaa",
],
}
_candidate = _profile.name if _profile else "the candidate"
_MISSION_DEFAULTS: dict[str, str] = {
"music": (
f"This company is in the music industry — an industry {_candidate} finds genuinely "
"compelling. Para 3 should warmly and specifically reflect this authentic alignment, "
"not as a generic fan statement, but as an honest statement of where they'd love to "
"apply their skills."
),
"animal_welfare": (
f"This organization works in animal welfare/rescue — a mission {_candidate} finds "
"genuinely meaningful. Para 3 should reflect this authentic connection warmly and "
"specifically, tying their skills to this mission."
),
"education": (
f"This company works in education or EdTech — a domain that resonates with "
f"{_candidate}'s values. Para 3 should reflect this authentic connection specifically "
"and warmly."
),
"social_impact": (
f"This organization is mission-driven / social impact focused — exactly the kind of "
f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine "
"desire to apply their skills to work that makes a real difference in people's lives."
),
"health": (
f"This company works in healthcare, life sciences, or patient care. "
f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an "
"industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies "
"exist to serve — those navigating complex, often invisible, or unusual health journeys; "
"patients facing rare or poorly understood conditions; individuals whose situations don't "
"fit a clean category. The connection is to the humans behind the data, not the industry. "
"If the user has provided a personal note, use that to anchor Para 3 specifically."
),
}
def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]:
"""Merge user's custom mission notes with YAML defaults.
For domains defined in mission_domains.yaml the default_note is used when
the user has not provided a custom note in user.yaml mission_preferences.
For user-defined domains (keys in mission_preferences that are NOT in the
YAML config), the custom note is used as-is; no signal detection applies.
"""
p = profile or _profile
name = candidate_name or (p.name if p else "the candidate")
prefs = p.mission_preferences if p else {}
notes: dict[str, str] = {}
for domain, cfg in _MISSION_DOMAINS.items():
default_note = (cfg.get("default_note") or "").strip()
custom = (prefs.get(domain) or "").strip()
def _build_mission_notes() -> dict[str, str]:
"""Merge user's custom mission notes with generic defaults."""
prefs = _profile.mission_preferences if _profile else {}
notes = {}
for industry, default_note in _MISSION_DEFAULTS.items():
custom = (prefs.get(industry) or "").strip()
if custom:
notes[domain] = (
f"Mission alignment — {name} shared: \"{custom}\". "
notes[industry] = (
f"Mission alignment — {_candidate} shared: \"{custom}\". "
"Para 3 should warmly and specifically reflect this authentic connection."
)
else:
notes[domain] = default_note
notes[industry] = default_note
return notes
_MISSION_NOTES = _build_mission_notes()
def detect_mission_alignment(
company: str, description: str, mission_notes: dict | None = None
) -> str | None:
"""Return a mission hint string if company/JD matches a configured domain, else None.
Checks domains in YAML file order (dict order = match priority).
"""
notes = mission_notes if mission_notes is not None else _MISSION_NOTES
def detect_mission_alignment(company: str, description: str) -> str | None:
"""Return a mission hint string if company/JD matches a preferred industry, else None."""
text = f"{company} {description}".lower()
for domain, signals in _MISSION_SIGNALS.items():
for industry, signals in _MISSION_SIGNALS.items():
if any(sig in text for sig in signals):
return notes.get(domain)
return _MISSION_NOTES[industry]
return None
@ -151,14 +190,10 @@ def build_prompt(
examples: list[dict],
mission_hint: str | None = None,
is_jobgether: bool = False,
system_context: str | None = None,
candidate_name: str | None = None,
) -> str:
ctx = system_context if system_context is not None else SYSTEM_CONTEXT
name = candidate_name or _candidate
parts = [ctx.strip(), ""]
parts = [SYSTEM_CONTEXT.strip(), ""]
if examples:
parts.append(f"=== STYLE EXAMPLES ({name}'s past letters) ===\n")
parts.append(f"=== STYLE EXAMPLES ({_candidate}'s past letters) ===\n")
for i, ex in enumerate(examples, 1):
parts.append(f"--- Example {i} ({ex['company']}) ---")
parts.append(ex["text"])
@ -186,7 +221,7 @@ def build_prompt(
)
parts.append(f"{recruiter_note}\n")
parts.append("Now write a new cover letter for:")
parts.append(f"Now write a new cover letter for:")
parts.append(f" Role: {title}")
parts.append(f" Company: {company}")
if description:
@ -196,14 +231,13 @@ def build_prompt(
return "\n".join(parts)
def _trim_to_letter_end(text: str, profile=None) -> str:
def _trim_to_letter_end(text: str) -> str:
"""Remove repetitive hallucinated content after the first complete sign-off.
Fine-tuned models sometimes loop after completing the letter. This cuts at
the first closing + candidate name so only the intended letter is saved.
"""
p = profile or _profile
candidate_first = (p.name.split()[0] if p else "").strip()
candidate_first = (_profile.name.split()[0] if _profile else "").strip()
pattern = (
r'(?:Warm regards|Sincerely|Best regards|Kind regards|Thank you)[,.]?\s*\n+\s*'
+ (re.escape(candidate_first) if candidate_first else r'\w+(?:\s+\w+)?')
@ -223,8 +257,6 @@ def generate(
feedback: str = "",
is_jobgether: bool = False,
_router=None,
config_path: "Path | None" = None,
user_yaml_path: "Path | None" = None,
) -> str:
"""Generate a cover letter and return it as a string.
@ -232,29 +264,15 @@ def generate(
and requested changes are appended to the prompt so the LLM revises rather
than starting from scratch.
user_yaml_path overrides the module-level profile required in cloud mode
so each user's name/voice/mission prefs are used instead of the global default.
_router is an optional pre-built LLMRouter (used in tests to avoid real LLM calls).
"""
# Per-call profile override (cloud mode: each user has their own user.yaml)
if user_yaml_path and Path(user_yaml_path).exists():
_prof = UserProfile(Path(user_yaml_path))
else:
_prof = _profile
sys_ctx = _build_system_context(_prof)
mission_notes = _build_mission_notes(_prof, candidate_name=(_prof.name if _prof else None))
candidate_name = _prof.name if _prof else _candidate
corpus = load_corpus()
examples = find_similar_letters(description or f"{title} {company}", corpus)
mission_hint = detect_mission_alignment(company, description, mission_notes=mission_notes)
mission_hint = detect_mission_alignment(company, description)
if mission_hint:
print(f"[cover-letter] Mission alignment detected for {company}", file=sys.stderr)
prompt = build_prompt(title, company, description, examples,
mission_hint=mission_hint, is_jobgether=is_jobgether,
system_context=sys_ctx, candidate_name=candidate_name)
mission_hint=mission_hint, is_jobgether=is_jobgether)
if previous_result:
prompt += f"\n\n---\nPrevious draft:\n{previous_result}"
@ -263,9 +281,8 @@ def generate(
if _router is None:
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.llm_router import LLMRouter, CONFIG_PATH
resolved = config_path if (config_path and Path(config_path).exists()) else CONFIG_PATH
_router = LLMRouter(resolved)
from scripts.llm_router import LLMRouter
_router = LLMRouter()
print(f"[cover-letter] Generating for: {title} @ {company}", file=sys.stderr)
print(f"[cover-letter] Style examples: {[e['company'] for e in examples]}", file=sys.stderr)
@ -275,7 +292,7 @@ def generate(
# max_tokens=1200 caps generation at ~900 words — enough for any cover letter
# and prevents fine-tuned models from looping into repetitive garbage output.
result = _router.complete(prompt, max_tokens=1200)
return _trim_to_letter_end(result, _prof)
return _trim_to_letter_end(result)
def main() -> None:

View file

@ -1,254 +0,0 @@
#!/usr/bin/env python3
"""
Generate demo/seed.sql committed seed INSERT statements for the demo DB.
Run whenever seed data needs to change:
conda run -n cf python scripts/generate_demo_seed.py
Outputs pure INSERT SQL (no DDL). Schema migrations are handled by db_migrate.py
at container startup. The seed SQL is loaded after migrations complete.
"""
from __future__ import annotations
from datetime import date, timedelta
from pathlib import Path
OUT_PATH = Path(__file__).parent.parent / "demo" / "seed.sql"
TODAY = date.today()
def _dago(n: int) -> str:
return (TODAY - timedelta(days=n)).isoformat()
def _dfrom(n: int) -> str:
return (TODAY + timedelta(days=n)).isoformat()
COVER_LETTER_SPOTIFY = """\
Dear Hiring Manager,
I'm excited to apply for the UX Designer role at Spotify. With five years of
experience designing for music discovery and cross-platform experiences, I've
consistently shipped features that make complex audio content feel effortless to
navigate. At my last role I led a redesign of the playlist creation flow that
reduced drop-off by 31%.
Spotify's commitment to artist and listener discovery — and its recent push into
audiobooks and podcast tooling aligns directly with the kind of cross-format
design challenges I'm most energised by.
I'd love to bring that focus to your product design team.
Warm regards,
[Your name]
"""
SQL_PARTS: list[str] = []
# ── Jobs ──────────────────────────────────────────────────────────────────────
# Columns: title, company, url, source, location, is_remote, salary,
# match_score, status, date_found, date_posted, cover_letter,
# applied_at, phone_screen_at, interviewing_at, offer_at, hired_at,
# interview_date, rejection_stage, hired_feedback
JOBS: list[tuple] = [
# ---- Review queue (12 jobs — mix of pending + approved) ------------------
("UX Designer",
"Spotify", "https://www.linkedin.com/jobs/view/1000001",
"linkedin", "Remote", 1, "$110k$140k",
94.0, "approved", _dago(1), _dago(3), COVER_LETTER_SPOTIFY,
None, None, None, None, None, None, None, None),
("Product Designer",
"Duolingo", "https://www.linkedin.com/jobs/view/1000002",
"linkedin", "Pittsburgh, PA", 0, "$95k$120k",
87.0, "approved", _dago(2), _dago(5), "Draft in progress — cover letter generating…",
None, None, None, None, None, None, None, None),
("UX Lead",
"NPR", "https://www.indeed.com/viewjob?jk=1000003",
"indeed", "Washington, DC", 1, "$120k$150k",
81.0, "approved", _dago(3), _dago(7), None,
None, None, None, None, None, None, None, None),
# Ghost post — date_posted 34 days ago → shadow indicator
("Senior UX Designer",
"Mozilla", "https://www.linkedin.com/jobs/view/1000004",
"linkedin", "Remote", 1, "$105k$130k",
81.0, "pending", _dago(2), _dago(34), None,
None, None, None, None, None, None, None, None),
("Interaction Designer",
"Figma", "https://www.indeed.com/viewjob?jk=1000005",
"indeed", "San Francisco, CA", 1, "$115k$145k",
78.0, "pending", _dago(4), _dago(6), None,
None, None, None, None, None, None, None, None),
("Product Designer II",
"Notion", "https://www.linkedin.com/jobs/view/1000006",
"linkedin", "Remote", 1, "$100k$130k",
76.0, "pending", _dago(5), _dago(8), None,
None, None, None, None, None, None, None, None),
("UX Designer",
"Stripe", "https://www.linkedin.com/jobs/view/1000007",
"linkedin", "Remote", 1, "$120k$150k",
74.0, "pending", _dago(6), _dago(9), None,
None, None, None, None, None, None, None, None),
("UI/UX Designer",
"Canva", "https://www.indeed.com/viewjob?jk=1000008",
"indeed", "Remote", 1, "$90k$115k",
72.0, "pending", _dago(7), _dago(10), None,
None, None, None, None, None, None, None, None),
("Senior Product Designer",
"Asana", "https://www.linkedin.com/jobs/view/1000009",
"linkedin", "San Francisco, CA", 1, "$125k$155k",
69.0, "pending", _dago(8), _dago(11), None,
None, None, None, None, None, None, None, None),
("UX Researcher",
"Intercom", "https://www.indeed.com/viewjob?jk=1000010",
"indeed", "Remote", 1, "$95k$120k",
67.0, "pending", _dago(9), _dago(12), None,
None, None, None, None, None, None, None, None),
("Product Designer",
"Linear", "https://www.linkedin.com/jobs/view/1000011",
"linkedin", "Remote", 1, "$110k$135k",
65.0, "pending", _dago(10), _dago(13), None,
None, None, None, None, None, None, None, None),
("UX Designer",
"Loom", "https://www.indeed.com/viewjob?jk=1000012",
"indeed", "Remote", 1, "$90k$110k",
62.0, "pending", _dago(11), _dago(14), None,
None, None, None, None, None, None, None, None),
# ---- Pipeline jobs (applied → hired) ------------------------------------
("Senior Product Designer",
"Asana", "https://www.asana.com/jobs/1000013",
"linkedin", "San Francisco, CA", 1, "$125k$155k",
91.0, "phone_screen", _dago(14), _dago(16), None,
_dago(7), _dfrom(0), None, None, None,
f"{_dfrom(0)}T14:00:00", None, None),
("Product Designer",
"Notion", "https://www.notion.so/jobs/1000014",
"indeed", "Remote", 1, "$100k$130k",
88.0, "interviewing", _dago(21), _dago(23), None,
_dago(14), _dago(10), _dago(3), None, None,
f"{_dfrom(7)}T10:00:00", None, None),
("Design Systems Designer",
"Figma", "https://www.figma.com/jobs/1000015",
"linkedin", "San Francisco, CA", 1, "$130k$160k",
96.0, "hired", _dago(45), _dago(47), None,
_dago(38), _dago(32), _dago(25), _dago(14), _dago(7),
None, None,
'{"factors":["clear_scope","great_manager","mission_aligned"],"notes":"Excited about design systems work. Salary met expectations."}'),
("UX Designer",
"Slack", "https://slack.com/jobs/1000016",
"indeed", "Remote", 1, "$115k$140k",
79.0, "applied", _dago(28), _dago(30), None,
_dago(18), None, None, None, None, None, None, None),
]
def _q(v: object) -> str:
"""SQL-quote a Python value."""
if v is None:
return "NULL"
return "'" + str(v).replace("'", "''") + "'"
_JOB_COLS = (
"title, company, url, source, location, is_remote, salary, "
"match_score, status, date_found, date_posted, cover_letter, "
"applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, "
"interview_date, rejection_stage, hired_feedback"
)
SQL_PARTS.append("-- jobs")
for job in JOBS:
vals = ", ".join(_q(v) for v in job)
SQL_PARTS.append(f"INSERT INTO jobs ({_JOB_COLS}) VALUES ({vals});")
# ── Contacts ──────────────────────────────────────────────────────────────────
# (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal)
CONTACTS: list[tuple] = [
(1, "inbound", "Excited to connect — UX Designer role at Spotify",
"jamie.chen@spotify.com", "you@example.com", _dago(3), "positive_response"),
(1, "outbound", "Re: Excited to connect — UX Designer role at Spotify",
"you@example.com", "jamie.chen@spotify.com", _dago(2), None),
(13, "inbound", "Interview Confirmation — Senior Product Designer",
"recruiting@asana.com", "you@example.com", _dago(2), "interview_scheduled"),
(14, "inbound", "Your panel interview is confirmed for Apr 22",
"recruiting@notion.so", "you@example.com", _dago(3), "interview_scheduled"),
(14, "inbound", "Pre-interview prep resources",
"marcus.webb@notion.so", "you@example.com", _dago(2), "positive_response"),
(15, "inbound", "Figma Design Systems — Offer Letter",
"offers@figma.com", "you@example.com", _dago(14), "offer_received"),
(15, "outbound", "Re: Figma Design Systems — Offer Letter (acceptance)",
"you@example.com", "offers@figma.com", _dago(10), None),
(15, "inbound", "Welcome to Figma! Onboarding next steps",
"onboarding@figma.com", "you@example.com", _dago(7), None),
(16, "inbound", "Thanks for applying to Slack",
"noreply@slack.com", "you@example.com", _dago(18), None),
]
SQL_PARTS.append("\n-- job_contacts")
for c in CONTACTS:
job_id, direction, subject, from_addr, to_addr, received_at, stage_signal = c
SQL_PARTS.append(
f"INSERT INTO job_contacts "
f"(job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) "
f"VALUES ({job_id}, {_q(direction)}, {_q(subject)}, {_q(from_addr)}, "
f"{_q(to_addr)}, {_q(received_at)}, {_q(stage_signal)});"
)
# ── References ────────────────────────────────────────────────────────────────
# (name, email, role, company, relationship, notes, tags, prep_email)
REFERENCES: list[tuple] = [
("Dr. Priya Nair", "priya.nair@example.com", "Director of Design", "Acme Corp",
"former_manager",
"Managed me for 3 years on the consumer app redesign. Enthusiastic reference.",
'["manager","design"]',
"Hi Priya,\n\nI hope you're doing well! I'm currently interviewing for a few senior UX roles "
"and would be so grateful if you'd be willing to serve as a reference.\n\nThank you!\n[Your name]"),
("Sam Torres", "sam.torres@example.com", "Senior Product Designer", "Acme Corp",
"former_colleague",
"Worked together on design systems. Great at speaking to collaborative process.",
'["colleague","design_systems"]', None),
("Jordan Kim", "jordan.kim@example.com", "VP of Product", "Streamline Inc",
"former_manager",
"Led the product team I was embedded in. Can speak to business impact of design work.",
'["manager","product"]', None),
]
SQL_PARTS.append("\n-- references_")
for ref in REFERENCES:
name, email, role, company, relationship, notes, tags, prep_email = ref
SQL_PARTS.append(
f"INSERT INTO references_ "
f"(name, email, role, company, relationship, notes, tags, prep_email) "
f"VALUES ({_q(name)}, {_q(email)}, {_q(role)}, {_q(company)}, "
f"{_q(relationship)}, {_q(notes)}, {_q(tags)}, {_q(prep_email)});"
)
# ── Write output ──────────────────────────────────────────────────────────────
output = "\n".join(SQL_PARTS) + "\n"
OUT_PATH.write_text(output, encoding="utf-8")
print(
f"Wrote {OUT_PATH} "
f"({len(JOBS)} jobs, {len(CONTACTS)} contacts, {len(REFERENCES)} references)"
)

View file

@ -392,7 +392,6 @@ def _has_todo_keyword(subject: str) -> bool:
_LINKEDIN_ALERT_SENDER = "jobalerts-noreply@linkedin.com"
_INDEED_ALERT_SENDER = "jobalerts@indeed.com"
# Social-proof / nav lines to skip when parsing alert blocks
_ALERT_SKIP_PHRASES = {
@ -448,75 +447,6 @@ def parse_linkedin_alert(body: str) -> list[dict]:
return jobs
def parse_indeed_alert(body: str) -> list[dict]:
"""
Parse the HTML body of an Indeed Job Alert email.
Returns a list of dicts: {title, company, location, salary, url}.
URL is canonicalised to https://www.indeed.com/viewjob?jk=<id>
(tracking parameters stripped).
"""
try:
from bs4 import BeautifulSoup as _BS
except ImportError:
return []
jobs: list[dict] = []
soup = _BS(body, "html.parser")
# Each job card is an <a> wrapping a job title — Indeed uses several layouts
# across their email templates. We try two strategies:
#
# Strategy A (2023+ layout): <td> blocks containing an <a> with /viewjob?jk=
# Strategy B (older layout): <tr class="job"> blocks
#
# Both extract the canonical jk= key from the href.
seen_jks: set[str] = set()
for anchor in soup.find_all("a", href=True):
href: str = anchor["href"]
jk_m = re.search(r"[?&]jk=([a-z0-9]+)", href, re.IGNORECASE)
if not jk_m:
continue
jk = jk_m.group(1)
if jk in seen_jks:
continue
seen_jks.add(jk)
title = anchor.get_text(separator=" ", strip=True)
if not title or len(title) < 3:
continue
# Walk up to find the container cell/row and extract company + location
container = anchor.find_parent(["td", "tr", "div"])
company = location = salary = ""
if container:
text_lines = [
t.strip() for t in container.get_text(separator="\n").splitlines()
if t.strip() and t.strip().lower() != title.lower()
]
if text_lines:
company = text_lines[0]
if len(text_lines) > 1:
location = text_lines[1]
# salary line often contains "$" or "/yr"
for line in text_lines[2:]:
if "$" in line or "/yr" in line.lower() or "/hour" in line.lower():
salary = line
break
jobs.append({
"title": title,
"company": company,
"location": location,
"salary": salary,
"url": f"https://www.indeed.com/viewjob?jk={jk}",
})
return jobs
def _scan_todo_label(conn: imaplib.IMAP4, cfg: dict, db_path: Path,
active_jobs: list[dict],
known_message_ids: set) -> int:
@ -628,29 +558,20 @@ def _scan_unmatched_leads(conn: imaplib.IMAP4, cfg: dict,
if mid in known_message_ids:
continue
# ── Job alert digests — parse each card deterministically ───────
from_lower = parsed["from_addr"].lower()
alert_cards: list[dict] = []
alert_source = ""
if _LINKEDIN_ALERT_SENDER in from_lower:
alert_cards = parse_linkedin_alert(parsed["body"])
alert_source = "linkedin"
elif _INDEED_ALERT_SENDER in from_lower:
alert_cards = parse_indeed_alert(parsed["body"])
alert_source = "indeed"
if alert_cards:
for card in alert_cards:
# ── LinkedIn Job Alert digest — parse each card individually ──────
if _LINKEDIN_ALERT_SENDER in parsed["from_addr"].lower():
cards = parse_linkedin_alert(parsed["body"])
for card in cards:
if card["url"] in existing_urls:
continue
job_id = insert_job(db_path, {
"title": card["title"],
"company": card["company"],
"url": card["url"],
"source": alert_source,
"location": card.get("location", ""),
"source": "linkedin",
"location": card["location"],
"is_remote": 0,
"salary": card.get("salary", ""),
"salary": "",
"description": "",
"date_found": datetime.now().isoformat()[:10],
})
@ -659,7 +580,7 @@ def _scan_unmatched_leads(conn: imaplib.IMAP4, cfg: dict,
submit_task(db_path, "scrape_url", job_id)
existing_urls.add(card["url"])
new_leads += 1
print(f"[imap] {alert_source} alert → {card['company']}{card['title']}")
print(f"[imap] LinkedIn alert → {card['company']}{card['title']}")
known_message_ids.add(mid)
continue # skip normal LLM extraction path

View file

@ -1,5 +1,5 @@
from __future__ import annotations
from datetime import datetime
from datetime import datetime, timedelta, timezone
from scripts.integrations.base import IntegrationBase

View file

@ -1,367 +0,0 @@
"""Job ranking engine — two-stage discovery → review pipeline.
Stage 1 (discover.py) scrapes a wide corpus and stores everything as 'pending'.
Stage 2 (this module) scores the corpus; GET /api/jobs/stack returns top-N best
matches for the user's current review session.
All signal functions return a float in [0, 1]. The final stack_score is 0100.
Usage:
from scripts.job_ranker import rank_jobs
ranked = rank_jobs(jobs, search_titles, salary_min, salary_max, user_level)
"""
from __future__ import annotations
import logging
import math
import re
from datetime import datetime, timezone
_log = logging.getLogger(__name__)
# Max jobs passed to the reranker (avoids excessive inference time on large stacks)
_RERANK_POOL = 50
def _try_rerank(resume_text: str, jobs: list[dict]) -> list[dict]:
"""Rerank jobs by cross-encoder relevance to resume text.
Returns jobs sorted best-first by the reranker. Falls back silently to the
input order if the reranker package is unavailable or inference fails.
"""
if not jobs:
return jobs
try:
from circuitforge_core.reranker import rerank
except ImportError:
return jobs
try:
descriptions = [j.get("description") or j.get("title", "") for j in jobs]
results = rerank(resume_text, descriptions, top_n=len(jobs))
# Map ranked candidates back to job dicts, handling duplicate descriptions
idx_queue: dict[str, list[int]] = {}
for i, d in enumerate(descriptions):
idx_queue.setdefault(d, []).append(i)
reranked: list[dict] = []
used: set[int] = set()
for r in results:
for idx in idx_queue.get(r.candidate, []):
if idx not in used:
reranked.append(jobs[idx])
used.add(idx)
break
# Safety: append anything the reranker didn't return
for i, j in enumerate(jobs):
if i not in used:
reranked.append(j)
return reranked
except Exception:
_log.warning("Reranker pass failed; using stack_score order.", exc_info=True)
return jobs
# ── TUNING ─────────────────────────────────────────────────────────────────────
# Adjust these constants to change how jobs are ranked.
# All individual signal scores are normalised to [0, 1] before weighting.
# Weights should sum to ≤ 1.0; the remainder is unallocated slack.
W_RESUME_MATCH = 0.40 # TF-IDF cosine similarity stored as match_score (0100 → 01)
W_TITLE_MATCH = 0.30 # seniority-aware title + domain keyword overlap
W_RECENCY = 0.15 # freshness — exponential decay from date_found
W_SALARY_FIT = 0.10 # salary range overlap vs user target (neutral when unknown)
W_DESC_QUALITY = 0.05 # posting completeness — penalises stub / ghost posts
# Keyword gap penalty: each missing keyword from the resume match costs points.
# Gaps are already partially captured by W_RESUME_MATCH (same TF-IDF source),
# so this is a soft nudge, not a hard filter.
GAP_PENALTY_PER_KEYWORD: float = 0.5 # points off per gap keyword (0100 scale)
GAP_MAX_PENALTY: float = 5.0 # hard cap so a gap-heavy job can still rank
# Recency half-life: score halves every N days past date_found
RECENCY_HALF_LIFE: int = 7 # days
# Description word-count thresholds
DESC_MIN_WORDS: int = 50 # below this → scaled penalty
DESC_TARGET_WORDS: int = 200 # at or above → full quality score
# ── END TUNING ─────────────────────────────────────────────────────────────────
# ── Seniority level map ────────────────────────────────────────────────────────
# (level, [keyword substrings that identify that level])
# Matched on " <lower_title> " with a space-padded check to avoid false hits.
# Level 3 is the default (mid-level, no seniority modifier in title).
_SENIORITY_MAP: list[tuple[int, list[str]]] = [
(1, ["intern", "internship", "trainee", "apprentice", "co-op", "coop"]),
(2, ["entry level", "entry-level", "junior", "jr ", "jr.", "associate "]),
(3, ["mid level", "mid-level", "intermediate"]),
(4, ["senior ", "senior,", "sr ", "sr.", " lead ", "lead,", " ii ", " iii ",
"specialist", "experienced"]),
(5, ["staff ", "principal ", "architect ", "expert ", "distinguished"]),
(6, ["director", "head of ", "manager ", "vice president", " vp "]),
(7, ["chief", "cto", "cio", "cpo", "president", "founder"]),
]
# job_level user_level → scoring multiplier
# Positive delta = job is more senior (stretch up = encouraged)
# Negative delta = job is below the user's level
_LEVEL_MULTIPLIER: dict[int, float] = {
-4: 0.05, -3: 0.10, -2: 0.25, -1: 0.65,
0: 1.00,
1: 0.90, 2: 0.65, 3: 0.25, 4: 0.05,
}
_DEFAULT_LEVEL_MULTIPLIER = 0.05
# ── Seniority helpers ─────────────────────────────────────────────────────────
def infer_seniority(title: str) -> int:
"""Return seniority level 17 for a job or resume title. Defaults to 3."""
padded = f" {title.lower()} "
# Iterate highest → lowest so "Senior Lead" resolves to 4, not 6
for level, keywords in reversed(_SENIORITY_MAP):
for kw in keywords:
if kw in padded:
return level
return 3
def seniority_from_experience(titles: list[str]) -> int:
"""Estimate user's current level from their most recent experience titles.
Averages the levels of the top-3 most recent titles (first in the list).
Falls back to 3 (mid-level) if no titles are provided.
"""
if not titles:
return 3
sample = [t for t in titles if t.strip()][:3]
if not sample:
return 3
levels = [infer_seniority(t) for t in sample]
return round(sum(levels) / len(levels))
def _strip_level_words(text: str) -> str:
"""Remove seniority/modifier words so domain keywords stand out."""
strip = {
"senior", "sr", "junior", "jr", "lead", "staff", "principal",
"associate", "entry", "mid", "intermediate", "experienced",
"director", "head", "manager", "architect", "chief", "intern",
"ii", "iii", "iv", "i",
}
return " ".join(w for w in text.lower().split() if w not in strip)
# ── Signal functions ──────────────────────────────────────────────────────────
def title_match_score(job_title: str, search_titles: list[str], user_level: int) -> float:
"""Seniority-aware title similarity in [0, 1].
Combines:
- Domain overlap: keyword intersection between job title and search titles
after stripping level modifiers (so "Senior Software Engineer" vs
"Software Engineer" compares only on "software engineer").
- Seniority multiplier: rewards same-level and +1 stretch; penalises
large downgrade or unreachable stretch.
"""
if not search_titles:
return 0.5 # neutral — user hasn't set title prefs yet
job_level = infer_seniority(job_title)
level_delta = job_level - user_level
seniority_factor = _LEVEL_MULTIPLIER.get(level_delta, _DEFAULT_LEVEL_MULTIPLIER)
job_core_words = {w for w in _strip_level_words(job_title).split() if len(w) > 2}
best_domain = 0.0
for st in search_titles:
st_core_words = {w for w in _strip_level_words(st).split() if len(w) > 2}
if not st_core_words:
continue
# Recall-biased overlap: what fraction of the search title keywords
# appear in the job title? (A job posting may use synonyms but we
# at least want the core nouns to match.)
overlap = len(st_core_words & job_core_words) / len(st_core_words)
best_domain = max(best_domain, overlap)
# Base score from domain match scaled by seniority appropriateness.
# A small seniority_factor bonus (×0.2) ensures that even a near-miss
# domain match still benefits from seniority alignment.
return min(1.0, best_domain * seniority_factor + seniority_factor * 0.15)
def recency_decay(date_found: str) -> float:
"""Exponential decay starting from date_found.
Returns 1.0 for today, 0.5 after RECENCY_HALF_LIFE days, ~0.0 after ~4×.
Returns 0.5 (neutral) if the date is unparseable.
"""
try:
# Support both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS"
found = datetime.fromisoformat(date_found.split("T")[0].split(" ")[0])
found = found.replace(tzinfo=timezone.utc)
now = datetime.now(tz=timezone.utc)
days_old = max(0.0, (now - found).total_seconds() / 86400)
return math.exp(-math.log(2) * days_old / RECENCY_HALF_LIFE)
except Exception:
return 0.5
def _parse_salary_range(text: str | None) -> tuple[int | None, int | None]:
"""Extract (low, high) salary integers from free-text. Returns (None, None) on failure.
Handles: "$80k - $120k", "USD 80,000 - 120,000 per year", "£45,000",
"80000", "80K/yr", "80-120k", etc.
"""
if not text:
return None, None
normalized = re.sub(r"[$,£€₹¥\s]", "", text.lower())
# Match numbers optionally followed by 'k'
raw_nums = re.findall(r"(\d+(?:\.\d+)?)k?", normalized)
values = []
for n, full in zip(raw_nums, re.finditer(r"(\d+(?:\.\d+)?)(k?)", normalized)):
val = float(full.group(1))
if full.group(2): # ends with 'k'
val *= 1000
elif val < 1000: # bare numbers < 1000 are likely thousands (e.g., "80" in "80-120k")
val *= 1000
if val >= 10_000: # sanity: ignore clearly wrong values
values.append(int(val))
values = sorted(set(values))
if not values:
return None, None
return values[0], values[-1]
def salary_fit(
salary_text: str | None,
target_min: int | None,
target_max: int | None,
) -> float:
"""Salary range overlap score in [0, 1].
Returns 0.5 (neutral) when either range is unknown a missing salary
line is not inherently negative.
"""
if not salary_text or (target_min is None and target_max is None):
return 0.5
job_low, job_high = _parse_salary_range(salary_text)
if job_low is None:
return 0.5
t_min = target_min or 0
t_max = target_max or (int(target_min * 1.5) if target_min else job_high or job_low)
job_high = job_high or job_low
overlap_low = max(job_low, t_min)
overlap_high = min(job_high, t_max)
overlap = max(0, overlap_high - overlap_low)
target_span = max(1, t_max - t_min)
return min(1.0, overlap / target_span)
def description_quality(description: str | None) -> float:
"""Posting completeness score in [0, 1].
Stubs and ghost posts score near 0; well-written descriptions score 1.0.
"""
if not description:
return 0.0
words = len(description.split())
if words < DESC_MIN_WORDS:
return (words / DESC_MIN_WORDS) * 0.4 # steep penalty for stubs
if words >= DESC_TARGET_WORDS:
return 1.0
return 0.4 + 0.6 * (words - DESC_MIN_WORDS) / (DESC_TARGET_WORDS - DESC_MIN_WORDS)
# ── Composite scorer ──────────────────────────────────────────────────────────
def score_job(
job: dict,
search_titles: list[str],
target_salary_min: int | None,
target_salary_max: int | None,
user_level: int,
) -> float:
"""Compute composite stack_score (0100) for a single job dict.
Args:
job: Row dict from the jobs table (must have title, match_score,
date_found, salary, description, keyword_gaps).
search_titles: User's desired job titles (from search prefs).
target_salary_*: User's salary target from resume profile (or None).
user_level: Inferred seniority level 17.
Returns:
A float 0100. Higher = better match for this user's session.
"""
# ── Individual signals (all 01) ──────────────────────────────────────────
match_raw = job.get("match_score")
s_resume = (match_raw / 100.0) if match_raw is not None else 0.5
s_title = title_match_score(job.get("title", ""), search_titles, user_level)
s_recency = recency_decay(job.get("date_found", ""))
s_salary = salary_fit(job.get("salary"), target_salary_min, target_salary_max)
s_desc = description_quality(job.get("description"))
# ── Weighted sum ──────────────────────────────────────────────────────────
base = (
W_RESUME_MATCH * s_resume
+ W_TITLE_MATCH * s_title
+ W_RECENCY * s_recency
+ W_SALARY_FIT * s_salary
+ W_DESC_QUALITY * s_desc
)
# ── Keyword gap penalty (applied on the 0100 scale) ─────────────────────
gaps_raw = job.get("keyword_gaps") or ""
gap_count = len([g for g in gaps_raw.split(",") if g.strip()]) if gaps_raw else 0
gap_penalty = min(GAP_MAX_PENALTY, gap_count * GAP_PENALTY_PER_KEYWORD) / 100.0
return round(max(0.0, base - gap_penalty) * 100, 1)
# ── Public API ────────────────────────────────────────────────────────────────
def rank_jobs(
jobs: list[dict],
search_titles: list[str],
target_salary_min: int | None = None,
target_salary_max: int | None = None,
user_level: int = 3,
limit: int = 10,
min_score: float = 20.0,
resume_text: str = "",
) -> list[dict]:
"""Score and rank pending jobs; return top-N above min_score.
Args:
jobs: List of job dicts (from DB or any source).
search_titles: User's desired job titles from search prefs.
target_salary_*: User's salary target (from resume profile).
user_level: Seniority level 17 (use seniority_from_experience()).
limit: Stack size; pass 0 to return all qualifying jobs.
min_score: Minimum stack_score to include (0100).
resume_text: Plain-text resume for cross-encoder reranking pass.
When provided, the top-_RERANK_POOL candidates are
reranked by (resume, description) relevance before
the limit is applied. Graceful no-op when empty.
Returns:
Sorted list (best first) with 'stack_score' key added to each dict.
"""
scored = []
for job in jobs:
s = score_job(job, search_titles, target_salary_min, target_salary_max, user_level)
if s >= min_score:
scored.append({**job, "stack_score": s})
scored.sort(key=lambda j: j["stack_score"], reverse=True)
if resume_text and scored:
pool = scored[:_RERANK_POOL]
pool = _try_rerank(resume_text, pool)
scored = pool + scored[_RERANK_POOL:]
return scored[:limit] if limit > 0 else scored

View file

@ -1,42 +0,0 @@
# BSL 1.1 — see LICENSE-BSL
"""LLM-assisted reply draft generation for inbound job contacts (BSL 1.1)."""
from __future__ import annotations
from pathlib import Path
from typing import Optional
_SYSTEM = (
"You are drafting a professional email reply on behalf of a job seeker. "
"Be concise and professional. Do not fabricate facts. If you are uncertain "
"about a detail, leave a [TODO: fill in] placeholder. "
"Output the reply body only — no subject line, no salutation preamble."
)
def _build_prompt(subject: str, from_addr: str, body: str, user_name: str, target_role: str) -> str:
return (
f"ORIGINAL EMAIL:\n"
f"Subject: {subject}\n"
f"From: {from_addr}\n"
f"Body:\n{body}\n\n"
f"USER PROFILE CONTEXT:\n"
f"Name: {user_name}\n"
f"Target role: {target_role}\n\n"
"Write a concise, professional reply to this email."
)
def generate_draft_reply(
subject: str,
from_addr: str,
body: str,
user_name: str,
target_role: str,
config_path: Optional[Path] = None,
) -> str:
"""Return a draft reply body string."""
from scripts.llm_router import LLMRouter
router = LLMRouter(config_path=config_path)
prompt = _build_prompt(subject, from_addr, body, user_name, target_role)
return router.complete(system=_SYSTEM, user=prompt).strip()

View file

@ -1,46 +1,169 @@
"""
LLM abstraction layer with priority fallback chain.
Config lookup order:
1. <repo>/config/llm.yaml per-install local config
2. ~/.config/circuitforge/llm.yaml user-level config (circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, )
Reads config/llm.yaml. Tries backends in order; falls back on any error.
"""
import os
import yaml
import requests
from pathlib import Path
from openai import OpenAI
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
# from this module continue to work.
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
class LLMRouter(_CoreLLMRouter):
"""Peregrine-specific LLMRouter — tri-level config path priority.
class LLMRouter:
def __init__(self, config_path: Path = CONFIG_PATH):
with open(config_path) as f:
self.config = yaml.safe_load(f)
When ``config_path`` is supplied (e.g. in tests) it is passed straight
through to the core. When omitted, the lookup order is:
1. <repo>/config/llm.yaml (per-install local config)
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST )
def _is_reachable(self, base_url: str) -> bool:
"""Quick health-check ping. Returns True if backend is up."""
health_url = base_url.rstrip("/").removesuffix("/v1") + "/health"
try:
resp = requests.get(health_url, timeout=2)
return resp.status_code < 500
except Exception:
return False
def _resolve_model(self, client: OpenAI, model: str) -> str:
"""Resolve __auto__ to the first model served by vLLM."""
if model != "__auto__":
return model
models = client.models.list()
return models.data[0].id
def complete(self, prompt: str, system: str | None = None,
model_override: str | None = None,
fallback_order: list[str] | None = None,
images: list[str] | None = None,
max_tokens: int | None = None) -> str:
"""
Generate a completion. Tries each backend in fallback_order.
def __init__(self, config_path: Path | None = None) -> None:
if config_path is not None:
# Explicit path supplied — use it directly (e.g. tests, CLI override).
super().__init__(config_path)
return
model_override: when set, replaces the configured model for
openai_compat backends (e.g. pass a research-specific ollama model).
fallback_order: when set, overrides config fallback_order for this
call (e.g. pass config["research_fallback_order"] for research tasks).
images: optional list of base64-encoded PNG/JPG strings. When provided,
backends without supports_images=true are skipped. vision_service backends
are only tried when images is provided.
Raises RuntimeError if all backends are exhausted.
"""
if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"):
raise RuntimeError(
"AI inference is disabled in the public demo. "
"Run your own instance to use AI features."
)
order = fallback_order if fallback_order is not None else self.config["fallback_order"]
for name in order:
backend = self.config["backends"][name]
local = Path(__file__).parent.parent / "config" / "llm.yaml"
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
if local.exists():
super().__init__(local)
elif user_level.exists():
super().__init__(user_level)
if not backend.get("enabled", True):
print(f"[LLMRouter] {name}: disabled, skipping")
continue
supports_images = backend.get("supports_images", False)
is_vision_service = backend["type"] == "vision_service"
# vision_service only used when images provided
if is_vision_service and not images:
print(f"[LLMRouter] {name}: vision_service skipped (no images)")
continue
# non-vision backends skipped when images provided and they don't support it
if images and not supports_images and not is_vision_service:
print(f"[LLMRouter] {name}: no image support, skipping")
continue
if is_vision_service:
if not self._is_reachable(backend["base_url"]):
print(f"[LLMRouter] {name}: unreachable, skipping")
continue
try:
resp = requests.post(
backend["base_url"].rstrip("/") + "/analyze",
json={
"prompt": prompt,
"image_base64": images[0] if images else "",
},
timeout=60,
)
resp.raise_for_status()
print(f"[LLMRouter] Used backend: {name} (vision_service)")
return resp.json()["text"]
except Exception as e:
print(f"[LLMRouter] {name}: error — {e}, trying next")
continue
elif backend["type"] == "openai_compat":
if not self._is_reachable(backend["base_url"]):
print(f"[LLMRouter] {name}: unreachable, skipping")
continue
try:
client = OpenAI(
base_url=backend["base_url"],
api_key=backend.get("api_key") or "any",
)
raw_model = model_override or backend["model"]
model = self._resolve_model(client, raw_model)
messages = []
if system:
messages.append({"role": "system", "content": system})
if images and supports_images:
content = [{"type": "text", "text": prompt}]
for img in images:
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{img}"},
})
messages.append({"role": "user", "content": content})
else:
# No yaml found — let circuitforge-core's env-var auto-config run.
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
# won't exist either, so _auto_config_from_env() will be triggered.
super().__init__()
messages.append({"role": "user", "content": prompt})
create_kwargs: dict = {"model": model, "messages": messages}
if max_tokens is not None:
create_kwargs["max_tokens"] = max_tokens
resp = client.chat.completions.create(**create_kwargs)
print(f"[LLMRouter] Used backend: {name} ({model})")
return resp.choices[0].message.content
except Exception as e:
print(f"[LLMRouter] {name}: error — {e}, trying next")
continue
elif backend["type"] == "anthropic":
api_key = os.environ.get(backend["api_key_env"], "")
if not api_key:
print(f"[LLMRouter] {name}: {backend['api_key_env']} not set, skipping")
continue
try:
import anthropic as _anthropic
client = _anthropic.Anthropic(api_key=api_key)
if images and supports_images:
content = []
for img in images:
content.append({
"type": "image",
"source": {"type": "base64", "media_type": "image/png", "data": img},
})
content.append({"type": "text", "text": prompt})
else:
content = prompt
kwargs: dict = {
"model": backend["model"],
"max_tokens": 4096,
"messages": [{"role": "user", "content": content}],
}
if system:
kwargs["system"] = system
msg = client.messages.create(**kwargs)
print(f"[LLMRouter] Used backend: {name}")
return msg.content[0].text
except Exception as e:
print(f"[LLMRouter] {name}: error — {e}, trying next")
continue
raise RuntimeError("All LLM backends exhausted")
# Module-level singleton for convenience

View file

@ -1,285 +0,0 @@
"""
DB helpers for the messaging feature.
Messages table: manual log entries and LLM drafts (one row per message).
Message templates table: built-in seeds and user-created templates.
Conventions (match scripts/db.py):
- All functions take db_path: Path as first argument.
- sqlite3.connect(db_path), row_factory = sqlite3.Row
- Return plain dicts (dict(row))
- Always close connection in finally
"""
import sqlite3
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _connect(db_path: Path) -> sqlite3.Connection:
con = sqlite3.connect(db_path)
con.row_factory = sqlite3.Row
return con
def _now_utc() -> str:
"""Return current UTC time as ISO 8601 string."""
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
# ---------------------------------------------------------------------------
# Messages
# ---------------------------------------------------------------------------
def create_message(
db_path: Path,
*,
job_id: Optional[int],
job_contact_id: Optional[int],
type: str,
direction: str,
subject: Optional[str],
body: Optional[str],
from_addr: Optional[str],
to_addr: Optional[str],
template_id: Optional[int],
logged_at: Optional[str] = None,
) -> dict:
"""Insert a new message row and return it as a dict."""
con = _connect(db_path)
try:
cur = con.execute(
"""
INSERT INTO messages
(job_id, job_contact_id, type, direction, subject, body,
from_addr, to_addr, logged_at, template_id)
VALUES
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(job_id, job_contact_id, type, direction, subject, body,
from_addr, to_addr, logged_at or _now_utc(), template_id),
)
con.commit()
row = con.execute(
"SELECT * FROM messages WHERE id = ?", (cur.lastrowid,)
).fetchone()
return dict(row)
finally:
con.close()
def list_messages(
db_path: Path,
*,
job_id: Optional[int] = None,
type: Optional[str] = None,
direction: Optional[str] = None,
limit: int = 100,
) -> list[dict]:
"""Return messages, optionally filtered. Ordered by logged_at DESC."""
conditions: list[str] = []
params: list = []
if job_id is not None:
conditions.append("job_id = ?")
params.append(job_id)
if type is not None:
conditions.append("type = ?")
params.append(type)
if direction is not None:
conditions.append("direction = ?")
params.append(direction)
where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
params.append(limit)
con = _connect(db_path)
try:
rows = con.execute(
f"SELECT * FROM messages {where} ORDER BY logged_at DESC LIMIT ?",
params,
).fetchall()
return [dict(r) for r in rows]
finally:
con.close()
def delete_message(db_path: Path, message_id: int) -> None:
"""Delete a message by id. Raises KeyError if not found."""
con = _connect(db_path)
try:
row = con.execute(
"SELECT id FROM messages WHERE id = ?", (message_id,)
).fetchone()
if row is None:
raise KeyError(f"Message {message_id} not found")
con.execute("DELETE FROM messages WHERE id = ?", (message_id,))
con.commit()
finally:
con.close()
def approve_message(db_path: Path, message_id: int) -> dict:
"""Set approved_at to now for the given message. Raises KeyError if not found."""
con = _connect(db_path)
try:
row = con.execute(
"SELECT id FROM messages WHERE id = ?", (message_id,)
).fetchone()
if row is None:
raise KeyError(f"Message {message_id} not found")
con.execute(
"UPDATE messages SET approved_at = ? WHERE id = ?",
(_now_utc(), message_id),
)
con.commit()
updated = con.execute(
"SELECT * FROM messages WHERE id = ?", (message_id,)
).fetchone()
return dict(updated)
finally:
con.close()
# ---------------------------------------------------------------------------
# Templates
# ---------------------------------------------------------------------------
def list_templates(db_path: Path) -> list[dict]:
"""Return all templates ordered by is_builtin DESC, then title ASC."""
con = _connect(db_path)
try:
rows = con.execute(
"SELECT * FROM message_templates ORDER BY is_builtin DESC, title ASC"
).fetchall()
return [dict(r) for r in rows]
finally:
con.close()
def create_template(
db_path: Path,
*,
title: str,
category: str = "custom",
subject_template: Optional[str] = None,
body_template: str,
) -> dict:
"""Insert a new user-defined template and return it as a dict."""
con = _connect(db_path)
try:
cur = con.execute(
"""
INSERT INTO message_templates
(title, category, subject_template, body_template, is_builtin)
VALUES
(?, ?, ?, ?, 0)
""",
(title, category, subject_template, body_template),
)
con.commit()
row = con.execute(
"SELECT * FROM message_templates WHERE id = ?", (cur.lastrowid,)
).fetchone()
return dict(row)
finally:
con.close()
def update_template(db_path: Path, template_id: int, **fields) -> dict:
"""
Update allowed fields on a user-defined template.
Raises PermissionError if the template is a built-in (is_builtin=1).
Raises KeyError if the template is not found.
"""
if not fields:
# Nothing to update — just return current state
con = _connect(db_path)
try:
row = con.execute(
"SELECT * FROM message_templates WHERE id = ?", (template_id,)
).fetchone()
if row is None:
raise KeyError(f"Template {template_id} not found")
return dict(row)
finally:
con.close()
_ALLOWED_FIELDS = {
"title", "category", "subject_template", "body_template",
}
invalid = set(fields) - _ALLOWED_FIELDS
if invalid:
raise ValueError(f"Cannot update field(s): {invalid}")
con = _connect(db_path)
try:
row = con.execute(
"SELECT id, is_builtin FROM message_templates WHERE id = ?",
(template_id,),
).fetchone()
if row is None:
raise KeyError(f"Template {template_id} not found")
if row["is_builtin"]:
raise PermissionError(
f"Template {template_id} is a built-in and cannot be modified"
)
set_clause = ", ".join(f"{col} = ?" for col in fields)
values = list(fields.values()) + [_now_utc(), template_id]
con.execute(
f"UPDATE message_templates SET {set_clause}, updated_at = ? WHERE id = ?",
values,
)
con.commit()
updated = con.execute(
"SELECT * FROM message_templates WHERE id = ?", (template_id,)
).fetchone()
return dict(updated)
finally:
con.close()
def delete_template(db_path: Path, template_id: int) -> None:
"""
Delete a user-defined template.
Raises PermissionError if the template is a built-in (is_builtin=1).
Raises KeyError if the template is not found.
"""
con = _connect(db_path)
try:
row = con.execute(
"SELECT id, is_builtin FROM message_templates WHERE id = ?",
(template_id,),
).fetchone()
if row is None:
raise KeyError(f"Template {template_id} not found")
if row["is_builtin"]:
raise PermissionError(
f"Template {template_id} is a built-in and cannot be deleted"
)
con.execute("DELETE FROM message_templates WHERE id = ?", (template_id,))
con.commit()
finally:
con.close()
def update_message_body(db_path: Path, message_id: int, body: str) -> dict:
"""Update the body text of a draft message before approval. Returns updated row."""
con = _connect(db_path)
try:
row = con.execute("SELECT id FROM messages WHERE id=?", (message_id,)).fetchone()
if not row:
raise KeyError(f"message {message_id} not found")
con.execute("UPDATE messages SET body=? WHERE id=?", (body, message_id))
con.commit()
updated = con.execute("SELECT * FROM messages WHERE id=?", (message_id,)).fetchone()
return dict(updated)
finally:
con.close()

View file

@ -25,6 +25,7 @@ import argparse
import shutil
import sys
from pathlib import Path
from textwrap import dedent
import yaml

Some files were not shown because too many files have changed in this diff Show more