Compare commits

..

No commits in common. "main" and "feature/vue-streamlit-parity" have entirely different histories.

228 changed files with 1595 additions and 23820 deletions

View file

@ -1,44 +0,0 @@
# git-cliff changelog configuration for Peregrine
# See: https://git-cliff.org/docs/configuration
[changelog]
header = """
# Changelog\n
"""
body = """
{% if version %}\
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else %}\
## [Unreleased]
{% endif %}\
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | upper_first }}
{% for commit in commits %}
- {% if commit.scope %}**{{ commit.scope }}:** {% endif %}{{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
trim = true
[git]
conventional_commits = true
filter_unconventional = true
split_commits = false
commit_preprocessors = []
commit_parsers = [
{ message = "^feat", group = "Features" },
{ message = "^fix", group = "Bug Fixes" },
{ message = "^perf", group = "Performance" },
{ message = "^refactor", group = "Refactoring" },
{ message = "^docs", group = "Documentation" },
{ message = "^test", group = "Testing" },
{ message = "^chore", group = "Chores" },
{ message = "^ci", group = "CI/CD" },
{ message = "^revert", group = "Reverts" },
]
filter_commits = false
tag_pattern = "v[0-9].*"
skip_tags = ""
ignore_tags = ""
topo_order = false
sort_commits = "oldest"

View file

@ -2,10 +2,9 @@
# Auto-generated by the setup wizard, or fill in manually. # Auto-generated by the setup wizard, or fill in manually.
# NEVER commit .env to git. # NEVER commit .env to git.
STREAMLIT_PORT=8502 STREAMLIT_PORT=8501
OLLAMA_PORT=11434 OLLAMA_PORT=11434
VLLM_PORT=8000 VLLM_PORT=8000
CF_TEXT_PORT=8006
SEARXNG_PORT=8888 SEARXNG_PORT=8888
VISION_PORT=8002 VISION_PORT=8002
VISION_MODEL=vikhyatk/moondream2 VISION_MODEL=vikhyatk/moondream2
@ -16,19 +15,10 @@ OLLAMA_MODELS_DIR=~/models/ollama
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
OLLAMA_DEFAULT_MODEL=llama3.2:3b OLLAMA_DEFAULT_MODEL=llama3.2:3b
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
# Set any of these to configure LLM backends without needing a config/llm.yaml.
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
# API keys (required for remote profile) # API keys (required for remote profile)
ANTHROPIC_API_KEY= ANTHROPIC_API_KEY=
OPENAI_COMPAT_URL= OPENAI_COMPAT_URL=
@ -41,26 +31,6 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
# GITHUB_TOKEN= # future — enable when public mirror is active # GITHUB_TOKEN= # future — enable when public mirror is active
# GITHUB_REPO= # future # GITHUB_REPO= # future
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
CF_LICENSE_KEY=
GPU_SERVER_URL=https://orch.circuitforge.tech
# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
# Defaults to 127.0.0.1 (same-host coordinator).
# Set to your host LAN IP for a remote coordinator.
CF_ORCH_COORDINATOR_URL=http://localhost:7700
CF_ORCH_NODE_ID=peregrine
CF_ORCH_AGENT_PORT=7701
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs) # Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
CLOUD_MODE=false CLOUD_MODE=false
CLOUD_DATA_ROOT=/devl/menagerie-data CLOUD_DATA_ROOT=/devl/menagerie-data

View file

@ -1,63 +0,0 @@
# Peregrine CI — lint, type-check, test on PR/push
# Full-stack: FastAPI (Python) + Vue 3 SPA (Node)
# Adapted from Circuit-Forge/cf-agents workflows/ci.yml (cf-agents#4 tracks the
# upstream ci-fullstack.yml variant; update this file when that lands).
name: CI
on:
push:
branches: [main, 'feature/**', 'fix/**', 'freeze/**']
pull_request:
branches: [main]
jobs:
backend:
name: Backend (Python)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: pip
- name: Install system dependencies
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
- name: Install dependencies
run: pip install -r requirements.txt
- name: Install lint tools
run: pip install ruff
- name: Lint
run: ruff check .
- name: Test
run: pytest tests/ -v --tb=short
frontend:
name: Frontend (Vue)
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: npm
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Type check
run: npx vue-tsc --noEmit
- name: Test
run: npm run test

View file

@ -1,35 +0,0 @@
# Mirror push to GitHub and Codeberg on every push to main or tag.
# Copied from Circuit-Forge/cf-agents workflows/mirror.yml
# Required secrets: GH_MIRROR_TOKEN, CODEBERG_MIRROR_TOKEN
# Note: Forgejo reserves the GITHUB_* prefix for secret names — use GH_* instead.
name: Mirror
on:
push:
branches: [main]
tags: ['v*']
jobs:
mirror:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Mirror to GitHub
env:
GH_MIRROR_PAT: ${{ secrets.GH_MIRROR_TOKEN }}
REPO: ${{ github.event.repository.name }}
run: |
git remote add github "https://x-access-token:${GH_MIRROR_PAT}@github.com/CircuitForgeLLC/${REPO}.git"
git push github --mirror
- name: Mirror to Codeberg
env:
CODEBERG_TOKEN: ${{ secrets.CODEBERG_MIRROR_TOKEN }}
REPO: ${{ github.event.repository.name }}
run: |
git remote add codeberg "https://CircuitForge:${CODEBERG_TOKEN}@codeberg.org/CircuitForge/${REPO}.git"
git push codeberg --mirror

View file

@ -1,71 +0,0 @@
# Tag-triggered release workflow.
# Generates changelog and creates Forgejo release on v* tags.
# Copied from Circuit-Forge/cf-agents workflows/release.yml
#
# Docker push is intentionally disabled — BSL 1.1 registry policy not yet resolved.
# Tracked in Circuit-Forge/cf-agents#3. Re-enable the Docker steps when that lands.
#
# Required secrets: FORGEJO_RELEASE_TOKEN
# (GHCR_TOKEN not needed until Docker push is enabled)
name: Release
on:
push:
tags: ['v*']
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# ── Changelog ────────────────────────────────────────────────────────────
- name: Generate changelog
uses: orhun/git-cliff-action@v3
id: cliff
with:
config: .cliff.toml
args: --latest --strip header
env:
OUTPUT: CHANGES.md
# ── Docker (disabled — BSL registry policy pending cf-agents#3) ──────────
# - name: Set up QEMU
# uses: docker/setup-qemu-action@v3
# - name: Set up Buildx
# uses: docker/setup-buildx-action@v3
# - name: Log in to GHCR
# uses: docker/login-action@v3
# with:
# registry: ghcr.io
# username: ${{ github.actor }}
# password: ${{ secrets.GHCR_TOKEN }}
# - name: Build and push Docker image
# uses: docker/build-push-action@v6
# with:
# context: .
# push: true
# platforms: linux/amd64,linux/arm64
# tags: |
# ghcr.io/circuitforgellc/peregrine:${{ github.ref_name }}
# ghcr.io/circuitforgellc/peregrine:latest
# cache-from: type=gha
# cache-to: type=gha,mode=max
# ── Forgejo Release ───────────────────────────────────────────────────────
- name: Create Forgejo release
env:
FORGEJO_TOKEN: ${{ secrets.FORGEJO_RELEASE_TOKEN }}
REPO: ${{ github.event.repository.name }}
TAG: ${{ github.ref_name }}
NOTES: ${{ steps.cliff.outputs.content }}
run: |
curl -sS -X POST \
"https://git.opensourcesolarpunk.com/api/v1/repos/Circuit-Forge/${REPO}/releases" \
-H "Authorization: token ${FORGEJO_TOKEN}" \
-H "Content-Type: application/json" \
-d "$(jq -n --arg tag "$TAG" --arg body "$NOTES" \
'{tag_name: $tag, name: $tag, body: $body}')"

View file

@ -1,7 +1,3 @@
# Peregrine CI — runs on GitHub mirror for public credibility badge.
# Forgejo (.forgejo/workflows/ci.yml) is the canonical CI — keep these in sync.
# No Forgejo-specific secrets used here; circuitforge-core is public on Forgejo.
name: CI name: CI
on: on:
@ -11,46 +7,29 @@ on:
branches: [main] branches: [main]
jobs: jobs:
backend: test:
name: Backend (Python)
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - name: Install system dependencies
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
- name: Set up Python
uses: actions/setup-python@v5
with: with:
python-version: '3.12' python-version: "3.11"
cache: pip cache: pip
- name: Configure git credentials for Forgejo
env:
FORGEJO_TOKEN: ${{ secrets.FORGEJO_TOKEN }}
run: |
git config --global url."https://oauth2:${FORGEJO_TOKEN}@git.opensourcesolarpunk.com/".insteadOf "https://git.opensourcesolarpunk.com/"
- name: Install dependencies - name: Install dependencies
run: pip install -r requirements.txt run: pip install -r requirements.txt
- name: Lint - name: Run tests
run: ruff check .
- name: Test
run: pytest tests/ -v --tb=short run: pytest tests/ -v --tb=short
frontend:
name: Frontend (Vue)
runs-on: ubuntu-latest
defaults:
run:
working-directory: web
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '20'
cache: npm
cache-dependency-path: web/package-lock.json
- name: Install dependencies
run: npm ci
- name: Type check
run: npx vue-tsc --noEmit
- name: Test
run: npm run test

3
.gitignore vendored
View file

@ -40,11 +40,8 @@ pytest-output.txt
docs/superpowers/ docs/superpowers/
data/email_score.jsonl data/email_score.jsonl
data/email_score.jsonl.bad-labels
data/email_label_queue.jsonl data/email_label_queue.jsonl
data/email_compare_sample.jsonl data/email_compare_sample.jsonl
data/.feedback_ratelimit.json
data/config/
config/label_tool.yaml config/label_tool.yaml
config/server.yaml config/server.yaml

View file

@ -9,263 +9,6 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
--- ---
## [0.9.5] — 2026-05-08
### Fixed
- **Theme: dark/explicit themes show correct page background**`index.html` inline style
set `html, body { background: #eaeff8 }` hardcoded. `body` paints on top of `html`, so
even when `html { background: var(--color-surface) }` correctly resolved to `#16202e` in
dark mode, the body's hardcoded light background covered it — producing dark cards on a
light page. Fixed by: (1) removing body background from the inline style; (2) adding a
tiny blocking `<script>` that reads `cf-theme` / `cf-hacker-mode` from localStorage and
sets `data-theme` on `<html>` before first paint; (3) adding
`html[data-theme="dark"|"solarized-dark"|"hacker"]` rules so FOUT prevention fires the
right background immediately on load.
---
## [0.9.4] — 2026-05-08
### Added
- **Messages view — expandable email timeline** — click any email item to lazy-load
and read the full body inline (HTML stripped to plain text via `DOMParser`).
Bodies are fetched on-demand via the new `GET /api/contacts/{id}` endpoint to avoid
loading 50KB+ email bodies on every page view.
- **Messages view — compose bar** — action buttons (Log call, Log note, Use template,
Draft reply with LLM, Call via Osprey) moved from the always-visible header into a
sticky bottom compose bar triggered by a New toggle. Reduces visual clutter when
just reading the thread.
- **Home view — "Skip review" checkbox** — when adding jobs by URL, a checkbox (default
on) sends them directly to the Apply queue, bypassing Job Review.
- **ContactsView — sync status** — shows last completed sync time and a spinner when
an email sync is running.
- **imap_sync: Indeed alert parser**`parse_indeed_alert()` extracts job title,
company, location, salary, and canonical URL from Indeed Job Alert digest emails.
- **scrape_url: Oracle HCM support** — Playwright-based scraper for Oracle HCM
CandidateExperience portals (React SPAs requiring JS execution).
- **manage.sh** — compose engine auto-detection (docker compose / podman compose /
podman-compose), `build` command, and cloud/demo stack shortcuts.
- **theme.css**`--color-overlay` token for modal/dialog backdrops.
### Fixed
- **Messages view layout** — changed `height: 100%` to `height: 100dvh` with a mobile
override for the 56px tab bar. `height: 100%` was resolving to "shrink-wrap" because
`.app-main` has no explicit height; compose bar is now correctly pinned to the bottom.
- **Accessibility: danger button contrast**`btn--danger` used `color: white` on
`--app-accent` (Talon Orange), yielding 2.8:1 contrast (fails WCAG AA 4.5:1 for
normal text). Fixed to `color: var(--app-accent-text)` (dark navy, 5.5:1).
- **Accessibility: warning badge contrast**`tab-badge` in Job Review used `color: white`
on `--color-warning` (amber). Same fix applied.
- **Theme: Interviews signal banners** — hardcoded `rgba(245,158,11,…)` / `rgba(39,174,…)`
/ `rgba(192,57,…)` replaced with `color-mix()` against `--color-warning/success/error`.
- **Theme: Interviews signal count**`color: #e67e22` hardcode replaced with
`var(--app-accent)`.
- **Theme: References academic tag chip**`color: #7c3aed` hardcode replaced with
`var(--status-synced)`; background uses `color-mix()` with the same token.
- **Theme: Interviews signal-move button**`color: #fff` on `--color-primary` fails
in dark mode (light green bg); fixed to `var(--color-text-inverse)`.
- **Modal backdrops**`rgba(0,0,0,0.5)` replaced with `var(--color-overlay)` for
theme consistency.
---
## [0.9.3] — 2026-05-05
### Added
- **Editable resume review** — proposed summary and experience bullets in the review modal
are now editable text areas. Edits flow through `apply_review_decisions()` and override
the LLM output in the final resume struct. Preview textarea in Apply Workspace is also
editable, with manual changes preserved through the approve step via `preview_text_override`.
### Fixed
- **Double bullets in resume optimizer**`_section_text_for_prompt` now strips existing
bullet characters before prefixing with `•`, and `_reparse_experience_bullets` uses a
greedy strip regex so `• •` patterns can no longer survive parsing.
- **Asterisk markup in summary** — added `_clean_summary_markup()` to strip LLM-generated
markdown bullet chars (`*`, `-`, etc.) from career summary output; injected no-markdown
rule into the LLM prompt's CRITICAL RULES list.
- **Light theme dark CSS bleed**`peregrine.css` media dark override now scoped to
`:root:not([data-theme])` (auto mode only) instead of `:root:not([data-theme="hacker"])`.
Fixes dark navy `--app-primary-light`/`--app-accent-light` bleeding into light themes
(light, solarized-light, colorblind) on dark-OS machines.
---
## [0.9.2] — 2026-05-02
### Added
- **Cover letter training export** (#111) — opt-in consent gate (`training_export_opt_in`
in `user.yaml`, default off) lets users export applied-job cover letters as Alpaca-format
JSONL for local fine-tuning. Per-job exclude/restore curation in Settings → Fine-Tune.
Streaming JSONL download merges DB pairs with any previously uploaded file pairs.
Cloud fine-tune Phase 2 stub (501) reserved for cf-orch integration.
- **WizardTrainingStep** — new onboarding consent step inserted between Resume and Identity;
skippable, opt-in default off, cloud-aware privacy copy.
- **a11y:** confirmed-state toggle (no optimistic DOM divergence), visible Premium tier gate
with upgrade link, `aria-live` region on pairs list, cloud-aware consent copy.
---
## [0.9.0] — 2026-04-20
### Added
- **Messaging tab** (#74) — per-job communication timeline replacing `/contacts`.
Unified view of IMAP emails (`job_contacts`) and manually logged entries (`messages`).
Log calls and in-person notes with timestamp. Message template library with 4 built-in
templates (follow-up, thank-you, accommodation request, withdrawal) and user-created
templates with `{{token}}` substitution. LLM draft reply for inbound emails (BYOK-unlockable,
BSL 1.1). Draft approval flow with inline editing and one-click clipboard copy. Osprey
IVR stub button (Phase 2 placeholder with easter egg). `migrations/008_messaging.sql`.
- **Public demo experience** (#103) — full read-only demo mode at `demo.circuitforge.tech/peregrine`.
`IS_DEMO=true` write-blocks all mutating API endpoints with a toast notification.
Ephemeral seed data via tmpfs + `demo/seed.sql` (resets on container start). WelcomeModal
on first visit (localStorage-gated). Per-view HintChips guiding new users through the
job search flow (localStorage-dismissed). DemoBanner with accessible CTA buttons
(WCAG-compliant contrast in light and dark themes). `migrations/006_missing_columns.sql`.
- **References tracker and recommendation letter system** (#96) — track professional
references and generate LLM-drafted recommendation request letters.
- **Shadow listing detector** — flags duplicate or aggregator-reposted job listings.
- **Hired feedback widget** — capture post-hire notes and retrospective feedback on jobs.
- **Interview prep Q&A** — LLM-generated practice questions for the selected job.
- **Resume library ↔ profile sync**`POST /api/resumes/{id}/apply-to-profile` pushes
a library resume into the active profile; `PUT /api/settings/resume` syncs edits back
to the default library entry. `ResumeSyncConfirmModal` shows a before/after diff.
`ResumeProfileView` extended with career summary, education, and achievements sections.
`migrations/007_resume_sync.sql` adds `synced_at` to `resumes`.
- **Plausible analytics** — lightweight privacy-preserving analytics in Vue SPA and docs.
- **cf_text / cf_voice LLM backends** — wire trunk service backends in `llm.yaml`.
- **Mission alignment domains** — load preferred company domains from
`config/mission_domains.yaml` rather than hardcoded values.
- **GitHub Actions CI** — workflow for public credibility badge (`ci.yml`).
- **`CF_APP_NAME` cloud annotation** — coordinator pipeline attribution for multi-product
cloud deployments.
### Changed
- `/contacts` route now redirects to `/messages`; nav item renamed "Messages" → "Contacts"
label removed. `ContactsView.vue` preserved for reference, router points to `MessagingView`.
- Survey `/analyze` endpoint is now fully async via the task queue (no blocking LLM call
on the request thread).
- nginx config adds `/peregrine/` base-path routing for subdirectory deployments.
- `compose.demo.yml` updated for Vue/FastAPI architecture with tmpfs demo volume.
### Fixed
- Tier bypass and draft body persistence after page navigation.
- `canDraftLlm` cleanup and message list `limit` cap.
- DemoBanner button contrast — semantic surface token instead of hardcoded white.
- Period split in `profile_to_library` now handles ISO date strings containing hyphens.
- Cloud startup sweeps all user DBs for pending migrations on deploy.
- Resume import strips CID glyph references via `resume_parser` extractors.
- Survey and interview tests updated for `hired_feedback` column and async analyze flow.
---
## [0.8.6] — 2026-04-12
### Added
- **Resume Review Modal** — paged tabbed dialog replaces the inline resume review
section in the Apply workspace. Pages through Skills diff, Summary diff, one page
per experience entry, and a Confirm summary. Color-coded tab status: unvisited
(gray), in-progress (indigo), accepted (green), partial (amber), skipped (slate).
Full ARIA tabs pattern with focus trap and `Teleport to body`.
- **Resume Library** — new `/resumes` page for managing saved resumes. Two-column
layout: list sidebar + full-text preview pane. Supports import (.txt, .pdf, .docx,
.odt, .yaml), rename (Edit), set as default, download (txt/pdf/yaml), and delete
(guarded: disabled when only resume or is default). 5 MB upload limit.
- **ResumeLibraryCard** — compact widget shown above the ATS Resume Optimizer in the
Apply workspace. Displays the currently active resume for the job (job-specific or
global default), with Switch and Manage deep links.
- **Resume library API**`GET/POST /api/resumes`, `GET/PATCH/DELETE /api/resumes/{id}`,
`POST /api/resumes/{id}/set-default`, `POST /api/resumes/import`,
`GET/PATCH /api/jobs/{job_id}/resume`. `approve_resume` extended with
`save_to_library` + `resume_name` params to save optimized resumes directly.
- **`resumes` DB migration** — `migrations/005_resumes_table.sql` adds `resumes` table
(10 columns) and `resume_id` FK on `jobs`.
- **Resumes nav link** — Document icon entry added after Apply in the main nav.
### Changed
- Resume optimizer "Awaiting review" state now triggers the Review Modal instead of
rendering an inline diff; save-to-library checkbox and name input surfaced on the
preview confirmation step.
---
## [0.8.5] — 2026-04-02
### Added
- **Vue onboarding wizard** — 7-step first-run setup replaces the Streamlit wizard
in the Vue SPA: Hardware detection → Tier → Resume upload/build → Identity →
Inference & API keys → Search preferences → Integrations. Progress saves to
`user.yaml` on every step; crash-recovery resumes from the last completed step.
- **Wizard API endpoints**`GET /api/wizard/status`, `POST /api/wizard/step`,
`GET /api/wizard/hardware`, `POST /api/wizard/inference/test`,
`POST /api/wizard/complete`. Inference test always soft-fails so Ollama being
unreachable never blocks setup completion.
- **Cloud auto-skip** — cloud instances automatically complete steps 1 (hardware),
2 (tier), and 5 (inference) and drop the user directly on the Resume step.
- **`wizardGuard` router gate** — all Vue routes require wizard completion; completed
users are bounced away from `/setup` to `/`.
- **Chip-input search step** — job titles and locations entered as press-Enter/comma
chips; validates at least one title before advancing.
- **Integrations tile grid** — optional step 7 shows Notion, Calendar, Slack, Discord,
Drive with paid-tier badges; skippable on Finish.
### Fixed
- **User config isolation: dangerous fallback removed**`_user_yaml_path()` fell
back to `/devl/job-seeker/config/user.yaml` (legacy profile) when `user.yaml`
didn't exist at the expected path; new users now get an empty dict instead of
another user's data. Affects profile, resume, search, and all wizard endpoints.
- **Resume path not user-isolated**`RESUME_PATH = Path("config/plain_text_resume.yaml")`
was a relative CWD path shared across all users. Replaced with `_resume_path()`
derived from `_user_yaml_path()` / `STAGING_DB`.
- **Resume upload silently returned empty data**`upload_resume` was passing a
file path string to `structure_resume()` which expects raw text; now reads bytes
and dispatches to the correct extractor (`extract_text_from_pdf` / `_docx` / `_odt`).
- **Wizard resume step read wrong envelope field**`WizardResumeStep.vue` read
`data.experience` but the upload response wraps parsed data under `data.data`.
---
## [0.8.4] — 2026-04-02
### Fixed
- **Cloud: cover letter used wrong user's profile**`generate_cover_letter.generate()`
loaded `_profile` from the global `config/user.yaml` at module import time, so all
cloud users got the default user's name, voice, and mission preferences in their
generated letters. `generate()` now accepts a `user_yaml_path` parameter; `task_runner`
derives it from the per-user config directory (`db_path/../config/user.yaml`) and
passes it through. `_build_system_context`, `_build_mission_notes`, `detect_mission_alignment`,
`build_prompt`, and `_trim_to_letter_end` all accept a `profile` override so the
per-call profile is used end-to-end without breaking CLI mode.
- **Apply Workspace: hardcoded config paths in cloud mode**`4_Apply.py` was loading
`_USER_YAML` and `RESUME_YAML` from the repo-root `config/` before `resolve_session()`
ran, so cloud users saw the global (Meg's) resume in the Apply tab. Both paths now
derive from `get_config_dir()` after session resolution.
### Changed
- **Vue SPA open to all tiers** — Vue 3 frontend is no longer gated behind the beta
flag; all tier users can switch to the Vue UI from Settings.
- **LLM model candidates** — vllm backend now tries Qwen2.5-3B first, Phi-4-mini
as fallback (was reversed). cf_orch allocation block added to vllm config.
- **Preflight** — removed `vllm` from Docker adoption list; vllm is now managed
entirely by cf-orch and should not be stubbed by preflight.
---
## [0.8.3] — 2026-04-01 ## [0.8.3] — 2026-04-01
### Fixed ### Fixed

View file

@ -34,7 +34,7 @@ full instructions.
```bash ```bash
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine.git git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine.git
cd peregrine cd peregrine
./install.sh # installs deps, activates git hooks ./setup.sh # installs deps, activates git hooks
./manage.sh start ./manage.sh start
``` ```

View file

@ -6,7 +6,7 @@ WORKDIR /app
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen # System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode) # libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libffi-dev curl libsqlcipher-dev git \ gcc libffi-dev curl libsqlcipher-dev \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
COPY requirements.txt . COPY requirements.txt .

View file

@ -26,12 +26,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
COPY circuitforge-core/ /circuitforge-core/ COPY circuitforge-core/ /circuitforge-core/
RUN pip install --no-cache-dir /circuitforge-core RUN pip install --no-cache-dir /circuitforge-core
# circuitforge-orch client — needed for LLMRouter cf_orch allocation.
# Optional: if the directory doesn't exist the COPY will fail at build time; keep
# cf-orch as a sibling of peregrine in the build context.
COPY circuitforge-orch/ /circuitforge-orch/
RUN pip install --no-cache-dir /circuitforge-orch
COPY peregrine/requirements.txt . COPY peregrine/requirements.txt .
# Skip the cfcore line — already installed above from the local copy # Skip the cfcore line — already installed above from the local copy
RUN grep -v 'circuitforge-core' requirements.txt | pip install --no-cache-dir -r /dev/stdin RUN grep -v 'circuitforge-core' requirements.txt | pip install --no-cache-dir -r /dev/stdin
@ -45,13 +39,6 @@ COPY peregrine/scrapers/ /app/scrapers/
COPY peregrine/ . COPY peregrine/ .
# Remove per-user config files that are gitignored but may exist locally.
# Defense-in-depth: the parent .dockerignore should already exclude these,
# but an explicit rm guarantees they never end up in the cloud image.
RUN rm -f config/user.yaml config/plain_text_resume.yaml config/notion.yaml \
config/email.yaml config/tokens.yaml config/craigslist.yaml \
config/adzuna.yaml .env
EXPOSE 8501 EXPOSE 8501
CMD ["streamlit", "run", "app/app.py", \ CMD ["streamlit", "run", "app/app.py", \

View file

@ -1,153 +0,0 @@
# Peregrine → xanderland.tv Setup Handoff
**Written from:** dev machine (CircuitForge dev env)
**Target:** xanderland.tv (beta tester, rootful Podman + systemd)
**Date:** 2026-02-27
---
## What we're doing
Getting Peregrine running on the beta tester's server as a Podman container managed by systemd. He already runs SearXNG and other services in the same style — rootful Podman with `--net=host`, `--restart=unless-stopped`, registered as systemd units.
The script `podman-standalone.sh` in the repo root handles the container setup.
---
## Step 1 — Get the repo onto xanderland.tv
From navi (or directly if you have a route):
```bash
ssh xanderland.tv "sudo git clone <repo-url> /opt/peregrine"
```
Or if it's already there, just pull:
```bash
ssh xanderland.tv "cd /opt/peregrine && sudo git pull"
```
---
## Step 2 — Verify /opt/peregrine looks right
```bash
ssh xanderland.tv "ls /opt/peregrine"
```
Expect to see: `Dockerfile`, `compose.yml`, `manage.sh`, `podman-standalone.sh`, `config/`, `app/`, `scripts/`, etc.
---
## Step 3 — Config
```bash
ssh xanderland.tv
cd /opt/peregrine
sudo mkdir -p data
sudo cp config/llm.yaml.example config/llm.yaml
sudo cp config/notion.yaml.example config/notion.yaml # only if he wants Notion sync
```
Then edit `config/llm.yaml` and set `searxng_url` to his existing SearXNG instance
(default is `http://localhost:8888` — confirm his actual port).
He won't need Anthropic/OpenAI keys to start — the setup wizard lets him pick local Ollama
or whatever he has running.
---
## Step 4 — Fix DOCS_DIR in the script
The script defaults `DOCS_DIR=/Library/Documents/JobSearch` which is the original user's path.
Update it to wherever his job search documents actually live, or a placeholder empty dir:
```bash
sudo mkdir -p /opt/peregrine/docs # placeholder if he has no docs yet
```
Then edit the script:
```bash
sudo sed -i 's|DOCS_DIR=.*|DOCS_DIR=/opt/peregrine/docs|' /opt/peregrine/podman-standalone.sh
```
---
## Step 5 — Build the image
```bash
ssh xanderland.tv "cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest ."
```
Takes a few minutes on first run (downloads python:3.11-slim, installs deps).
---
## Step 6 — Run the script
```bash
ssh xanderland.tv "sudo bash /opt/peregrine/podman-standalone.sh"
```
This starts a single container (`peregrine`) with `--net=host` and `--restart=unless-stopped`.
SearXNG is NOT included — his existing instance is used.
Verify it came up:
```bash
ssh xanderland.tv "sudo podman ps | grep peregrine"
ssh xanderland.tv "sudo podman logs peregrine"
```
Health check endpoint: `http://xanderland.tv:8501/_stcore/health`
---
## Step 7 — Register as a systemd service
```bash
ssh xanderland.tv
sudo podman generate systemd --new --name peregrine \
| sudo tee /etc/systemd/system/peregrine.service
sudo systemctl daemon-reload
sudo systemctl enable --now peregrine
```
Confirm:
```bash
sudo systemctl status peregrine
```
---
## Step 8 — First-run wizard
Open `http://xanderland.tv:8501` in a browser.
The setup wizard (page 0) will gate the app until `config/user.yaml` is created.
He'll fill in his profile — name, resume, LLM backend preferences. This writes
`config/user.yaml` and unlocks the rest of the UI.
---
## Troubleshooting
| Symptom | Check |
|---------|-------|
| Container exits immediately | `sudo podman logs peregrine` — usually a missing config file |
| Port 8501 already in use | `sudo ss -tlnp \| grep 8501` — something else on that port |
| SearXNG not reachable | Confirm `searxng_url` in `config/llm.yaml` and that JSON format is enabled in SearXNG settings |
| Wizard loops / won't save | `config/` volume mount permissions — `sudo chown -R 1000:1000 /opt/peregrine/config` |
---
## To update Peregrine later
```bash
cd /opt/peregrine
sudo git pull
sudo podman build -t localhost/peregrine:latest .
sudo podman restart peregrine
```
No need to touch the systemd unit — it launches fresh via `--new` in the generate step.

View file

@ -45,7 +45,7 @@ endif
PROFILE_ARG := $(if $(filter remote,$(PROFILE)),,--profile $(PROFILE)) PROFILE_ARG := $(if $(filter remote,$(PROFILE)),,--profile $(PROFILE))
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit) setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
@bash install.sh @bash setup.sh
preflight: ## Check ports + system resources; write .env preflight: ## Check ports + system resources; write .env
@$(PYTHON) scripts/preflight.py @$(PYTHON) scripts/preflight.py

259
README.md
View file

@ -1,143 +1,195 @@
<div align="center"> # Peregrine
<img src="web/public/peregrine.svg" alt="Peregrine" width="120" />
<h1>Peregrine</h1> > **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
<p><strong>Job search pipeline — by <a href="https://circuitforge.tech">Circuit Forge LLC</a></strong></p> [![License: BSL 1.1](https://img.shields.io/badge/License-BSL_1.1-blue.svg)](./LICENSE-BSL)
[![CI](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml/badge.svg)](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml)
<p><em>AI for the tasks the system made hard on purpose.</em></p> **Job search pipeline — by [Circuit Forge LLC](https://circuitforge.tech)**
[![License: MIT / BSL 1.1](https://img.shields.io/badge/License-MIT%20%2F%20BSL%201.1-blue.svg)](#license) > *"Tools for the jobs that the system made hard on purpose."*
[![CI](https://github.com/CircuitForgeLLC/peregrine/actions/workflows/ci.yml/badge.svg)](https://github.com/CircuitForgeLLC/peregrine/actions/workflows/ci.yml)
[![Docs](https://img.shields.io/badge/docs-docs.circuitforge.tech-orange)](https://docs.circuitforge.tech/peregrine/)
[![Version](https://img.shields.io/badge/version-0.9.0-green)](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/releases)
<p>
<a href="https://demo.circuitforge.tech/peregrine"><strong>Live Demo</strong></a>
no account required, nothing saved &nbsp;|&nbsp;
<a href="https://docs.circuitforge.tech/peregrine/">Docs</a> &nbsp;|&nbsp;
<a href="https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues">Issues</a>
</p>
<blockquote>
<strong>Primary development</strong> happens at
<a href="https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine">git.opensourcesolarpunk.com/Circuit-Forge/peregrine</a>.
GitHub and Codeberg are push mirrors. Issues and PRs are welcome on any platform.
</blockquote>
</div>
--- ---
<table> Job search is a second job nobody hired you for.
<tr>
<td><img src="docs/screenshots/01-dashboard.png" alt="Dashboard with pipeline stats and discovery controls"/></td>
<td><img src="docs/screenshots/02-review.png" alt="Job review — approve, skip, or reject with keyboard shortcuts"/></td>
</tr>
<tr>
<td><img src="docs/screenshots/03-apply.png" alt="Apply workspace with LLM-drafted cover letter"/></td>
<td><img src="docs/screenshots/04-interviews.png" alt="Interview kanban with company research and recruiter emails"/></td>
</tr>
</table>
--- ATS filters designed to reject. Job boards that show the same listing eight times. Cover letter number forty-seven for a role that might already be filled. Hours of prep for a phone screen that lasts twelve minutes.
## Why Peregrine? Peregrine handles the pipeline — discovery, matching, tracking, drafting, and prep — so you can spend your time doing the work you actually want to be doing.
Job search is a second job nobody hired you for. ATS (applicant tracking system) filters designed to reject. Boards that show the same listing eight times. Cover letter number forty-seven for a role that might already be filled. Hours of prep for a phone screen that lasts twelve minutes. **LLM support is optional.** The full discovery and tracking pipeline works without one. When you do configure a backend, the LLM drafts the parts that are genuinely miserable — cover letters, company research briefs, interview prep sheets — and waits for your approval before anything goes anywhere.
- **Handles the full pipeline.** Discover, filter, match, draft, track — one tool, one database, no duct tape. ### What Peregrine does not do
- **LLM is optional and local-first.** Discovery and tracking work with no LLM at all. When you do configure one, it runs on your hardware by default. Cloud inference is a fallback, not the default path.
- **Ghost-post detection baked in.** Listings that have been open too long or look like sourcing traps get flagged before you spend time on them. Peregrine does **not** submit job applications for you. You still have to go to each employer's site and click apply yourself.
- **Human approval at every step.** LLM drafts cover letters and research briefs; you approve before anything goes anywhere. Peregrine never submits an application on your behalf.
- **Privacy · Safety · Accessibility** are architectural constraints, not aspirational copy. No PII (personally identifiable information) logging, no behavioral profiling, no dark patterns. This is intentional. Automated mass-applying is a bad experience for everyone — it's also a trust violation with employers who took the time to post a real role. Peregrine is a preparation and organization tool, not a bot.
What it *does* cover is everything before and after that click: finding the jobs, matching them against your resume, generating cover letters and prep materials, and once you've applied — tracking where you stand, classifying the emails that come back, and surfacing company research when an interview lands on your calendar. The submit button is yours. The rest of the grind is ours.
> **Exception:** [AIHawk](https://github.com/nicolomantini/LinkedIn-Easy-Apply) is a separate, optional tool that handles LinkedIn Easy Apply automation. Peregrine integrates with it for AIHawk-compatible profiles, but it is not part of Peregrine's core pipeline.
--- ---
## Quick Start ## Quick Start
One-line install: **1. Clone and install dependencies** (Docker, NVIDIA toolkit if needed):
```bash
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/raw/branch/main/install.sh)
```
Or clone and run manually:
```bash ```bash
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
cd peregrine cd peregrine
./manage.sh setup ./manage.sh setup
./manage.sh start
``` ```
Open **http://localhost:8502** — the setup wizard walks you through the rest. **2. Start Peregrine:**
> **macOS / Apple Silicon:** install Ollama natively via Homebrew before starting for Metal GPU-accelerated inference. `install.sh` handles this automatically.
> **Windows:** use WSL2 with Ubuntu.
### Inference profiles
```bash ```bash
./manage.sh start # remote — no GPU; LLM calls go to Anthropic / OpenAI ./manage.sh start # remote profile (API-only, no GPU)
./manage.sh start --profile cpu # local Ollama on CPU (or Metal via native Ollama on macOS) ./manage.sh start --profile cpu # local Ollama (CPU, or Metal GPU on Apple Silicon — see below)
./manage.sh start --profile single-gpu # Ollama + vision on GPU 0 (NVIDIA only) ./manage.sh start --profile single-gpu # Ollama + Vision on GPU 0 (NVIDIA only)
./manage.sh start --profile dual-gpu # Ollama + vLLM on two NVIDIA GPUs ./manage.sh start --profile dual-gpu # Ollama + Vision + vLLM (GPU 0 + 1) (NVIDIA only)
``` ```
Or use `make` directly:
```bash
make start # remote profile
make start PROFILE=single-gpu
```
**3.** Open http://localhost:8501 — the setup wizard guides you through the rest.
> **macOS / Apple Silicon:** Docker Desktop must be running. For Metal GPU-accelerated inference, install Ollama natively before starting — `setup.sh` will prompt you to do this. See [Apple Silicon GPU](#apple-silicon-gpu) below.
> **Windows:** Not supported — use WSL2 with Ubuntu.
### Installing to `/opt` or other system directories
If you clone into a root-owned directory (e.g. `sudo git clone ... /opt/peregrine`), two things need fixing:
**1. Git ownership warning** (`fatal: detected dubious ownership`) — `./manage.sh setup` fixes this automatically. If you need git to work *before* running setup:
```bash
git config --global --add safe.directory /opt/peregrine
```
**2. Preflight write access** — preflight writes `.env` and `compose.override.yml` into the repo directory. Fix ownership once:
```bash
sudo chown -R $USER:$USER /opt/peregrine
```
After that, run everything without `sudo`.
### Podman
Podman is rootless by default — **no `sudo` needed.** `./manage.sh setup` will configure `podman-compose` if it isn't already present.
### Docker
After `./manage.sh setup`, log out and back in for docker group membership to take effect. Until then, prefix commands with `sudo`. After re-login, `sudo` is no longer required.
---
## Inference Profiles
| Profile | Services started | Use case |
|---------|-----------------|----------|
| `remote` | app + searxng | No GPU; LLM calls go to Anthropic / OpenAI |
| `cpu` | app + ollama + searxng | No GPU; local models on CPU. On Apple Silicon, use with native Ollama for Metal acceleration — see below. |
| `single-gpu` | app + ollama + vision + searxng | One **NVIDIA** GPU: cover letters, research, vision |
| `dual-gpu` | app + ollama + vllm + vision + searxng | Two **NVIDIA** GPUs: GPU 0 = Ollama, GPU 1 = vLLM |
### Apple Silicon GPU
Docker Desktop on macOS runs in a Linux VM — it cannot access the Apple GPU. Metal-accelerated inference requires Ollama to run **natively** on the host.
`setup.sh` handles this automatically: it offers to install Ollama via Homebrew, starts it as a background service, and explains what happens next. If Ollama is running on port 11434 when you start Peregrine, preflight detects it, stubs out the Docker Ollama container, and routes inference through the native process — which uses Metal automatically.
To do it manually:
```bash
brew install ollama
brew services start ollama # starts at login, uses Metal GPU
./manage.sh start --profile cpu # preflight adopts native Ollama; Docker container is skipped
```
The `cpu` profile label is a slight misnomer in this context — Ollama will be running on the GPU. `single-gpu` and `dual-gpu` profiles are NVIDIA-specific and not applicable on Mac.
---
## First-Run Wizard
On first launch the setup wizard walks through seven steps:
1. **Hardware** — detects NVIDIA GPUs (Linux) or Apple Silicon GPU (macOS) and recommends a profile
2. **Tier** — choose free, paid, or premium (or use `dev_tier_override` for local testing)
3. **Identity** — name, email, phone, LinkedIn, career summary
4. **Resume** — upload a PDF/DOCX for LLM parsing, or use the guided form builder
5. **Inference** — configure LLM backends and API keys
6. **Search** — job titles, locations, boards, keywords, blocklist
7. **Integrations** — optional cloud storage, calendar, and notification services
Wizard state is saved after each step — a crash or browser close resumes where you left off.
Re-enter the wizard any time via **Settings → Developer → Reset wizard**.
--- ---
## Features ## Features
| Feature | Tier | | Feature | Tier |
|---------|------| |---------|------|
| Job discovery — LinkedIn, Indeed, Glassdoor, Adzuna, The Ladders | Free | | Job discovery (JobSpy + custom boards) | Free |
| Ghost-post detection | Free | | Resume keyword matching & gap analysis | Free |
| Resume keyword matching and gap analysis | Free | | Document storage sync (Google Drive, Dropbox, OneDrive, MEGA, Nextcloud) | Free |
| Document storage sync (Google Drive, Dropbox, OneDrive, Nextcloud) | Free |
| Webhook notifications (Discord, Home Assistant) | Free | | Webhook notifications (Discord, Home Assistant) | Free |
| Vue 3 SPA — full UI with onboarding wizard, job board, apply workspace, interview kanban | Free | | **Cover letter generation** | Free with LLM¹ |
| **Cover letter generation** | Free with LLM ¹ | | **Company research briefs** | Free with LLM¹ |
| **Company research briefs** | Free with LLM ¹ | | **Interview prep & practice Q&A** | Free with LLM¹ |
| **Interview prep and practice Q&A** | Free with LLM ¹ | | **Survey assistant** (culture-fit Q&A, screenshot analysis) | Free with LLM¹ |
| **Survey assistant** (culture-fit Q&A, screenshot analysis) | Free with LLM ¹ | | **Wizard helpers** (career summary, bullet expansion, skill suggestions, job title suggestions, mission notes) | Free with LLM¹ |
| Managed cloud LLM (no API key needed) | Paid | | Managed cloud LLM (no API key needed) | Paid |
| Email sync and auto-classification | Paid | | Email sync & auto-classification | Paid |
| LLM-powered keyword blocklist | Paid |
| Job tracking integrations (Notion, Airtable, Google Sheets) | Paid | | Job tracking integrations (Notion, Airtable, Google Sheets) | Paid |
| Calendar sync (Google, Apple) | Paid | | Calendar sync (Google, Apple) | Paid |
| Slack notifications | Paid | | Slack notifications | Paid |
| CircuitForge shared cover-letter model | Paid | | CircuitForge shared cover-letter model | Paid |
| **Voice guidelines** (custom writing style and tone) | Premium with LLM ¹ | | Vue 3 SPA beta UI | Paid |
| Cover letter model fine-tuning — your writing, your model | Premium | | **Voice guidelines** (custom writing style & tone) | Premium with LLM¹ ² |
| Cover letter model fine-tuning (your writing, your model) | Premium |
| Multi-user support | Premium | | Multi-user support | Premium |
| Human-in-the-loop operator (CAPTCHAs, phone calls, wet signatures) | Ultra |
¹ **BYOK (bring your own key) unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance, or your own API key (Anthropic, OpenAI-compatible) — and all "Free with LLM" and "Premium with LLM" features unlock at no charge. ¹ **BYOK (bring your own key/backend) unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance,
or your own API key (Anthropic, OpenAI-compatible) — and all features marked **Free with LLM** or **Premium with LLM**
unlock at no charge. The paid tier earns its price by providing managed cloud inference so you
don't need a key at all, plus integrations and email sync.
² **Voice guidelines** requires Premium tier without a configured LLM backend. With BYOK, it unlocks at any tier.
--- ---
## What Peregrine does not do ## Email Sync
Peregrine does **not** submit job applications for you. You still click apply on the employer's site. Monitors your inbox for job-related emails and automatically updates job stages (interview requests, rejections, survey links, offers).
This is intentional. Automated mass-applying is a bad experience for everyone and a trust violation with employers who posted a real role. The submit button is yours. The rest of the grind is ours. Configure in **Settings → Email**. Requires IMAP access and, for Gmail, an App Password.
--- ---
## Stack ## Integrations
| Layer | Technology | Connect external services in **Settings → Integrations**:
|-------|-----------|
| Frontend | Vue 3 SPA (Vite) | - **Job tracking:** Notion, Airtable, Google Sheets
| Backend | FastAPI + Python | - **Document storage:** Google Drive, Dropbox, OneDrive, MEGA, Nextcloud
| Database | SQLite (local, per-user) | - **Calendar:** Google Calendar, Apple Calendar (CalDAV)
| Job scraping | [JobSpy](https://github.com/Bunsly/JobSpy) + custom board scrapers | - **Notifications:** Slack, Discord (webhook), Home Assistant
| LLM inference | Ollama, vLLM, Anthropic, OpenAI-compatible — configurable fallback chain |
| Vision | moondream2 (survey screenshot analysis) |
| Container | Docker / Podman |
--- ---
## manage.sh reference ## CLI Reference (`manage.sh`)
`manage.sh` is the single entry point for all common operations — no need to remember Make targets or Docker commands.
``` ```
./manage.sh setup Install Docker/Podman + NVIDIA toolkit ./manage.sh setup Install Docker/Podman + NVIDIA toolkit
@ -146,38 +198,31 @@ This is intentional. Automated mass-applying is a bad experience for everyone an
./manage.sh restart Restart all services ./manage.sh restart Restart all services
./manage.sh status Show running containers ./manage.sh status Show running containers
./manage.sh logs [service] Tail logs (default: app) ./manage.sh logs [service] Tail logs (default: app)
./manage.sh update Pull latest images and rebuild app container ./manage.sh update Pull latest images + rebuild app container
./manage.sh preflight Check ports + resources; write .env
./manage.sh test Run test suite ./manage.sh test Run test suite
./manage.sh prepare-training Scan docs for cover letters — outputs training JSONL ./manage.sh prepare-training Scan docs for cover letters training JSONL
./manage.sh finetune Run LoRA fine-tune (requires single-gpu profile or higher) ./manage.sh finetune Run LoRA fine-tune (needs --profile single-gpu+)
./manage.sh open Open the web UI in your browser ./manage.sh open Open the web UI in your browser
./manage.sh clean Remove containers, images, volumes (asks to confirm)
``` ```
--- ---
## Documentation ## Developer Docs
Full docs at **[docs.circuitforge.tech/peregrine](https://docs.circuitforge.tech/peregrine)** Full documentation at: https://docs.circuitforge.tech/peregrine
Bug reports and feature requests: [Forgejo issues](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues) - [Installation guide](https://docs.circuitforge.tech/peregrine/getting-started/installation/)
- [Adding a custom job board scraper](https://docs.circuitforge.tech/peregrine/developer-guide/adding-scrapers/)
--- - [Adding an integration](https://docs.circuitforge.tech/peregrine/developer-guide/adding-integrations/)
- [Contributing](https://docs.circuitforge.tech/peregrine/developer-guide/contributing/)
## Contributing
Contributions are welcome. The discovery pipeline — scrapers, board integrations, matching logic — is MIT-licensed. Fork it, extend it, send PRs. AI features are BSL 1.1. See the [contributing guide](https://docs.circuitforge.tech/peregrine/developer-guide/contributing/) for conventions.
--- ---
## License ## License
Peregrine uses a split license: Core discovery pipeline: [MIT](LICENSE-MIT)
LLM features (cover letter generation, company research, interview prep, UI): [BSL 1.1](LICENSE-BSL)
| Component | License |
|-----------|---------|
| Discovery pipeline — scrapers, matching, tracking | [MIT](LICENSE-MIT) |
| LLM features — cover letter generation, company research, interview prep, survey assistant, fine-tuning | [BSL 1.1](LICENSE-BSL) — free for personal non-commercial self-hosting; commercial use or SaaS re-hosting requires a paid license; converts to MIT after four years |
Fine-tuned model weights are proprietary and per-user — not redistributable.
© 2026 Circuit Forge LLC © 2026 Circuit Forge LLC

View file

@ -14,22 +14,24 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.user_profile import UserProfile from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
from scripts.db import init_db, get_job_counts, purge_jobs, purge_email_data, \ from scripts.db import init_db, get_job_counts, purge_jobs, purge_email_data, \
purge_non_remote, archive_jobs, kill_stuck_tasks, cancel_task, \ purge_non_remote, archive_jobs, kill_stuck_tasks, get_task_for_job, get_active_tasks, \
get_task_for_job, get_active_tasks, insert_job, get_existing_urls insert_job, get_existing_urls
from scripts.task_runner import submit_task from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path, get_config_dir from app.cloud_session import resolve_session, get_db_path
_CONFIG_DIR = Path(__file__).parent.parent / "config"
_NOTION_CONNECTED = (_CONFIG_DIR / "integrations" / "notion.yaml").exists()
resolve_session("peregrine") resolve_session("peregrine")
init_db(get_db_path()) init_db(get_db_path())
_CONFIG_DIR = get_config_dir()
_USER_YAML = _CONFIG_DIR / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
def _email_configured() -> bool: def _email_configured() -> bool:
_e = get_config_dir() / "email.yaml" _e = Path(__file__).parent.parent / "config" / "email.yaml"
if not _e.exists(): if not _e.exists():
return False return False
import yaml as _yaml import yaml as _yaml
@ -37,7 +39,7 @@ def _email_configured() -> bool:
return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host")) return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host"))
def _notion_configured() -> bool: def _notion_configured() -> bool:
_n = get_config_dir() / "notion.yaml" _n = Path(__file__).parent.parent / "config" / "notion.yaml"
if not _n.exists(): if not _n.exists():
return False return False
import yaml as _yaml import yaml as _yaml
@ -45,7 +47,7 @@ def _notion_configured() -> bool:
return bool(_cfg.get("token")) return bool(_cfg.get("token"))
def _keywords_configured() -> bool: def _keywords_configured() -> bool:
_k = get_config_dir() / "resume_keywords.yaml" _k = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
if not _k.exists(): if not _k.exists():
return False return False
import yaml as _yaml import yaml as _yaml
@ -132,7 +134,7 @@ def _queue_url_imports(db_path: Path, urls: list) -> int:
st.title(f"🔍 {_name}'s Job Search") st.title(f"🔍 {_name}'s Job Search")
st.caption("Discover → Review → Sync" + (" to Notion" if _notion_configured() else "")) st.caption("Discover → Review → Sync to Notion")
st.divider() st.divider()
@ -144,7 +146,7 @@ def _live_counts():
col1.metric("Pending Review", counts.get("pending", 0)) col1.metric("Pending Review", counts.get("pending", 0))
col2.metric("Approved", counts.get("approved", 0)) col2.metric("Approved", counts.get("approved", 0))
col3.metric("Applied", counts.get("applied", 0)) col3.metric("Applied", counts.get("applied", 0))
col4.metric("Synced" + (" to Notion" if _notion_configured() else ""), counts.get("synced", 0)) col4.metric("Synced to Notion", counts.get("synced", 0))
col5.metric("Rejected", counts.get("rejected", 0)) col5.metric("Rejected", counts.get("rejected", 0))
@ -235,7 +237,7 @@ with mid:
with right: with right:
approved_count = get_job_counts(get_db_path()).get("approved", 0) approved_count = get_job_counts(get_db_path()).get("approved", 0)
if _notion_configured(): if _NOTION_CONNECTED:
st.subheader("Send to Notion") st.subheader("Send to Notion")
st.caption("Push all approved jobs to your Notion tracking database.") st.caption("Push all approved jobs to your Notion tracking database.")
if approved_count == 0: if approved_count == 0:
@ -374,145 +376,178 @@ _scrape_status()
st.divider() st.divider()
# ── Danger zone ─────────────────────────────────────────────────────────────── # ── Danger zone: purge + re-scrape ────────────────────────────────────────────
with st.expander("⚠️ Danger Zone", expanded=False): with st.expander("⚠️ Danger Zone", expanded=False):
# ── Queue reset (the common case) ─────────────────────────────────────────
st.markdown("**Queue reset**")
st.caption( st.caption(
"Archive clears your review queue while keeping job URLs for dedup, " "**Purge** permanently deletes jobs from the local database. "
"so the same listings won't resurface on the next discovery run. " "Applied and synced jobs are never touched."
"Use hard purge only if you want a full clean slate including dedup history."
) )
_scope = st.radio( purge_col, rescrape_col, email_col, tasks_col = st.columns(4)
"Clear scope",
["Pending only", "Pending + approved (stale search)"],
horizontal=True,
label_visibility="collapsed",
)
_scope_statuses = (
["pending"] if _scope == "Pending only" else ["pending", "approved"]
)
_qc1, _qc2, _qc3 = st.columns([2, 2, 4]) with purge_col:
if _qc1.button("📦 Archive & reset", use_container_width=True, type="primary"): st.markdown("**Purge pending & rejected**")
st.session_state["confirm_dz"] = "archive" st.caption("Removes all _pending_ and _rejected_ listings so the next discovery starts fresh.")
if _qc2.button("🗑 Hard purge (delete)", use_container_width=True): if st.button("🗑 Purge Pending + Rejected", use_container_width=True):
st.session_state["confirm_dz"] = "purge" st.session_state["confirm_purge"] = "partial"
if st.session_state.get("confirm_dz") == "archive": if st.session_state.get("confirm_purge") == "partial":
st.info( st.warning("Are you sure? This cannot be undone.")
f"Archive **{', '.join(_scope_statuses)}** jobs? " c1, c2 = st.columns(2)
"URLs are kept for dedup — nothing is permanently deleted." if c1.button("Yes, purge", type="primary", use_container_width=True):
) deleted = purge_jobs(get_db_path(), statuses=["pending", "rejected"])
_dc1, _dc2 = st.columns(2) st.success(f"Purged {deleted} jobs.")
if _dc1.button("Yes, archive", type="primary", use_container_width=True, key="dz_archive_confirm"): st.session_state.pop("confirm_purge", None)
n = archive_jobs(get_db_path(), statuses=_scope_statuses) st.rerun()
st.success(f"Archived {n} jobs.") if c2.button("Cancel", use_container_width=True):
st.session_state.pop("confirm_dz", None) st.session_state.pop("confirm_purge", None)
st.rerun()
if _dc2.button("Cancel", use_container_width=True, key="dz_archive_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
if st.session_state.get("confirm_dz") == "purge":
st.warning(
f"Permanently delete **{', '.join(_scope_statuses)}** jobs? "
"This removes the URLs from dedup history too. Cannot be undone."
)
_dc1, _dc2 = st.columns(2)
if _dc1.button("Yes, delete", type="primary", use_container_width=True, key="dz_purge_confirm"):
n = purge_jobs(get_db_path(), statuses=_scope_statuses)
st.success(f"Deleted {n} jobs.")
st.session_state.pop("confirm_dz", None)
st.rerun()
if _dc2.button("Cancel", use_container_width=True, key="dz_purge_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
st.divider()
# ── Background tasks ──────────────────────────────────────────────────────
_active = get_active_tasks(get_db_path())
st.markdown(f"**Background tasks** — {len(_active)} active")
if _active:
_task_icons = {"cover_letter": "✉️", "research": "🔍", "discovery": "🌐", "enrich_descriptions": "📝"}
for _t in _active:
_tc1, _tc2, _tc3 = st.columns([3, 4, 2])
_icon = _task_icons.get(_t["task_type"], "⚙️")
_tc1.caption(f"{_icon} `{_t['task_type']}`")
_job_label = f"{_t['title']} @ {_t['company']}" if _t.get("title") else f"job #{_t['job_id']}"
_tc2.caption(_job_label)
_tc3.caption(f"_{_t['status']}_")
if st.button("✕ Cancel", key=f"dz_cancel_task_{_t['id']}", use_container_width=True):
cancel_task(get_db_path(), _t["id"])
st.rerun() st.rerun()
st.caption("")
_kill_col, _ = st.columns([2, 6]) with email_col:
if _kill_col.button("⏹ Kill all stuck", use_container_width=True, disabled=len(_active) == 0): st.markdown("**Purge email data**")
killed = kill_stuck_tasks(get_db_path()) st.caption("Clears all email thread logs and email-sourced pending jobs so the next sync starts fresh.")
st.success(f"Killed {killed} task(s).") if st.button("📧 Purge Email Data", use_container_width=True):
st.rerun() st.session_state["confirm_purge"] = "email"
if st.session_state.get("confirm_purge") == "email":
st.warning("This deletes all email contacts and email-sourced jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge emails", type="primary", use_container_width=True):
contacts, jobs = purge_email_data(get_db_path())
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with tasks_col:
_active = get_active_tasks(get_db_path())
st.markdown("**Kill stuck tasks**")
st.caption(f"Force-fail all queued/running background tasks. Currently **{len(_active)}** active.")
if st.button("⏹ Kill All Tasks", use_container_width=True, disabled=len(_active) == 0):
killed = kill_stuck_tasks(get_db_path())
st.success(f"Killed {killed} task(s).")
st.rerun()
with rescrape_col:
st.markdown("**Purge all & re-scrape**")
st.caption("Wipes _all_ non-applied, non-synced jobs then immediately runs a fresh discovery.")
if st.button("🔄 Purge All + Re-scrape", use_container_width=True):
st.session_state["confirm_purge"] = "full"
if st.session_state.get("confirm_purge") == "full":
st.warning("This will delete ALL pending, approved, and rejected jobs, then re-scrape. Applied and synced records are kept.")
c1, c2 = st.columns(2)
if c1.button("Yes, wipe + scrape", type="primary", use_container_width=True):
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"])
submit_task(get_db_path(), "discovery", 0)
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider() st.divider()
# ── Rarely needed (collapsed) ───────────────────────────────────────────── pending_col, nonremote_col, approved_col, _ = st.columns(4)
with st.expander("More options", expanded=False):
_rare1, _rare2, _rare3 = st.columns(3)
with _rare1: with pending_col:
st.markdown("**Purge email data**") st.markdown("**Purge pending review**")
st.caption("Clears all email thread logs and email-sourced pending jobs.") st.caption("Removes only _pending_ listings, keeping your rejected history intact.")
if st.button("📧 Purge Email Data", use_container_width=True): if st.button("🗑 Purge Pending Only", use_container_width=True):
st.session_state["confirm_dz"] = "email" st.session_state["confirm_purge"] = "pending_only"
if st.session_state.get("confirm_dz") == "email":
st.warning("Deletes all email contacts and email-sourced jobs. Cannot be undone.")
_ec1, _ec2 = st.columns(2)
if _ec1.button("Yes, purge emails", type="primary", use_container_width=True, key="dz_email_confirm"):
contacts, jobs = purge_email_data(get_db_path())
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
st.session_state.pop("confirm_dz", None)
st.rerun()
if _ec2.button("Cancel", use_container_width=True, key="dz_email_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with _rare2: if st.session_state.get("confirm_purge") == "pending_only":
st.markdown("**Purge non-remote**") st.warning("Deletes all pending jobs. Rejected jobs are kept. Cannot be undone.")
st.caption("Removes pending/approved/rejected on-site listings from the DB.") c1, c2 = st.columns(2)
if st.button("🏢 Purge On-site Jobs", use_container_width=True): if c1.button("Yes, purge pending", type="primary", use_container_width=True):
st.session_state["confirm_dz"] = "non_remote" deleted = purge_jobs(get_db_path(), statuses=["pending"])
if st.session_state.get("confirm_dz") == "non_remote": st.success(f"Purged {deleted} pending jobs.")
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.") st.session_state.pop("confirm_purge", None)
_rc1, _rc2 = st.columns(2) st.rerun()
if _rc1.button("Yes, purge on-site", type="primary", use_container_width=True, key="dz_nonremote_confirm"): if c2.button("Cancel ", use_container_width=True):
deleted = purge_non_remote(get_db_path()) st.session_state.pop("confirm_purge", None)
st.success(f"Purged {deleted} non-remote jobs.") st.rerun()
st.session_state.pop("confirm_dz", None)
st.rerun()
if _rc2.button("Cancel", use_container_width=True, key="dz_nonremote_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with _rare3: with nonremote_col:
st.markdown("**Wipe all + re-scrape**") st.markdown("**Purge non-remote**")
st.caption("Deletes all non-applied jobs then immediately runs a fresh discovery.") st.caption("Removes pending/approved/rejected jobs where remote is not set. Keeps anything already in the pipeline.")
if st.button("🔄 Wipe + Re-scrape", use_container_width=True): if st.button("🏢 Purge On-site Jobs", use_container_width=True):
st.session_state["confirm_dz"] = "rescrape" st.session_state["confirm_purge"] = "non_remote"
if st.session_state.get("confirm_dz") == "rescrape":
st.warning("Wipes ALL pending, approved, and rejected jobs, then re-scrapes. Applied and synced records are kept.") if st.session_state.get("confirm_purge") == "non_remote":
_wc1, _wc2 = st.columns(2) st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
if _wc1.button("Yes, wipe + scrape", type="primary", use_container_width=True, key="dz_rescrape_confirm"): c1, c2 = st.columns(2)
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"]) if c1.button("Yes, purge on-site", type="primary", use_container_width=True):
submit_task(get_db_path(), "discovery", 0) deleted = purge_non_remote(get_db_path())
st.session_state.pop("confirm_dz", None) st.success(f"Purged {deleted} non-remote jobs.")
st.rerun() st.session_state.pop("confirm_purge", None)
if _wc2.button("Cancel", use_container_width=True, key="dz_rescrape_cancel"): st.rerun()
st.session_state.pop("confirm_dz", None) if c2.button("Cancel ", use_container_width=True):
st.rerun() st.session_state.pop("confirm_purge", None)
st.rerun()
with approved_col:
st.markdown("**Purge approved (unapplied)**")
st.caption("Removes _approved_ jobs you haven't applied to yet — e.g. to reset after a review pass.")
if st.button("🗑 Purge Approved", use_container_width=True):
st.session_state["confirm_purge"] = "approved_only"
if st.session_state.get("confirm_purge") == "approved_only":
st.warning("Deletes all approved-but-not-applied jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge approved", type="primary", use_container_width=True):
deleted = purge_jobs(get_db_path(), statuses=["approved"])
st.success(f"Purged {deleted} approved jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
archive_col1, archive_col2, _, _ = st.columns(4)
with archive_col1:
st.markdown("**Archive remaining**")
st.caption(
"Move all _pending_ and _rejected_ jobs to archived status. "
"Archived jobs stay in the DB for dedup — they just won't appear in Job Review."
)
if st.button("📦 Archive Pending + Rejected", use_container_width=True):
st.session_state["confirm_purge"] = "archive_remaining"
if st.session_state.get("confirm_purge") == "archive_remaining":
st.info("Jobs will be archived (not deleted) — URLs are kept for dedup.")
c1, c2 = st.columns(2)
if c1.button("Yes, archive", type="primary", use_container_width=True):
archived = archive_jobs(get_db_path(), statuses=["pending", "rejected"])
st.success(f"Archived {archived} jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with archive_col2:
st.markdown("**Archive approved (unapplied)**")
st.caption("Archive _approved_ listings you decided to skip — keeps history without cluttering the apply queue.")
if st.button("📦 Archive Approved", use_container_width=True):
st.session_state["confirm_purge"] = "archive_approved"
if st.session_state.get("confirm_purge") == "archive_approved":
st.info("Approved jobs will be archived (not deleted).")
c1, c2 = st.columns(2)
if c1.button("Yes, archive approved", type="primary", use_container_width=True):
archived = archive_jobs(get_db_path(), statuses=["approved"])
st.success(f"Archived {archived} approved jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
# ── Setup banners ───────────────────────────────────────────────────────────── # ── Setup banners ─────────────────────────────────────────────────────────────
if _profile and _profile.wizard_complete: if _profile and _profile.wizard_complete:

View file

@ -17,16 +17,10 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s") logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s")
# Load .env before any os.environ reads — safe to call inside Docker too
# (uses setdefault, so Docker-injected vars take precedence over .env values)
from circuitforge_core.config.settings import load_env as _load_env
_load_env(Path(__file__).parent.parent / ".env")
IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes") IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
import streamlit as st import streamlit as st
from scripts.db import DEFAULT_DB, init_db, get_active_tasks from scripts.db import DEFAULT_DB, init_db, get_active_tasks
from scripts.db_migrate import migrate_db
from app.feedback import inject_feedback_button from app.feedback import inject_feedback_button
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
import sqlite3 import sqlite3
@ -42,7 +36,6 @@ st.set_page_config(
resolve_session("peregrine") resolve_session("peregrine")
init_db(get_db_path()) init_db(get_db_path())
migrate_db(Path(get_db_path()))
# Demo tier — initialize once per session (cookie persistence handled client-side) # Demo tier — initialize once per session (cookie persistence handled client-side)
if IS_DEMO and "simulated_tier" not in st.session_state: if IS_DEMO and "simulated_tier" not in st.session_state:

View file

@ -203,16 +203,8 @@ def get_config_dir() -> Path:
isolated and never shared across tenants. isolated and never shared across tenants.
Local: repo-level config/ directory. Local: repo-level config/ directory.
""" """
if CLOUD_MODE: if CLOUD_MODE and st.session_state.get("db_path"):
db_path = st.session_state.get("db_path") return Path(st.session_state["db_path"]).parent / "config"
if db_path:
return Path(db_path).parent / "config"
# Session not resolved yet (resolve_session() should have called st.stop() already).
# Return an isolated empty temp dir rather than the repo config, which may contain
# another user's data baked into the image.
_safe = Path("/tmp/peregrine-cloud-noconfig")
_safe.mkdir(exist_ok=True)
return _safe
return Path(__file__).parent.parent / "config" return Path(__file__).parent.parent / "config"

View file

@ -124,6 +124,12 @@ def sync_ui_cookie(yaml_path: Path, tier: str) -> None:
# UI components must not crash the app — silent fallback to default # UI components must not crash the app — silent fallback to default
pref = "streamlit" pref = "streamlit"
# Demo mode: Vue SPA has no demo data wiring — always serve Streamlit.
# (The tier downgrade check below is skipped in demo mode, but we must
# also block the Vue navigation itself so Caddy doesn't route to a blank SPA.)
if pref == "vue" and _DEMO_MODE:
pref = "streamlit"
# Tier downgrade protection (skip in demo — demo bypasses tier gate) # Tier downgrade protection (skip in demo — demo bypasses tier gate)
if pref == "vue" and not _DEMO_MODE and not can_use(tier, "vue_ui_beta"): if pref == "vue" and not _DEMO_MODE and not can_use(tier, "vue_ui_beta"):
if profile is not None: if profile is not None:

View file

@ -457,11 +457,6 @@ elif step == 5:
from app.wizard.step_inference import validate from app.wizard.step_inference import validate
st.subheader("Step 5 \u2014 Inference & API Keys") st.subheader("Step 5 \u2014 Inference & API Keys")
st.info(
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
"Peregrine auto-detects it, no config file needed. "
"Or use the fields below to configure API keys and endpoints."
)
profile = saved_yaml.get("inference_profile", "remote") profile = saved_yaml.get("inference_profile", "remote")
if profile == "remote": if profile == "remote":
@ -471,18 +466,8 @@ elif step == 5:
placeholder="https://api.together.xyz/v1") placeholder="https://api.together.xyz/v1")
openai_key = st.text_input("Endpoint API Key (optional)", type="password", openai_key = st.text_input("Endpoint API Key (optional)", type="password",
key="oai_key") if openai_url else "" key="oai_key") if openai_url else ""
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
placeholder="http://localhost:11434",
key="ollama_host_input")
ollama_model = st.text_input("Ollama model (optional)",
value="llama3.2:3b",
key="ollama_model_input")
else: else:
st.info(f"Local mode ({profile}): Ollama provides inference.") st.info(f"Local mode ({profile}): Ollama provides inference.")
import os
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
if _ollama_host_env:
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
anthropic_key = openai_url = openai_key = "" anthropic_key = openai_url = openai_key = ""
with st.expander("Advanced \u2014 Service Ports & Hosts"): with st.expander("Advanced \u2014 Service Ports & Hosts"):
@ -561,14 +546,6 @@ elif step == 5:
if anthropic_key or openai_url: if anthropic_key or openai_url:
env_path.write_text("\n".join(env_lines) + "\n") env_path.write_text("\n".join(env_lines) + "\n")
if profile == "remote":
if ollama_host:
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
if ollama_model:
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
if ollama_host or ollama_model:
env_path.write_text("\n".join(env_lines) + "\n")
_save_yaml({"services": svc, "wizard_step": 5}) _save_yaml({"services": svc, "wizard_step": 5})
st.session_state.wizard_step = 6 st.session_state.wizard_step = 6
st.rerun() st.rerun()

Binary file not shown.

Before

Width:  |  Height:  |  Size: 298 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 276 KiB

View file

@ -49,7 +49,6 @@ FEATURES: dict[str, str] = {
"company_research": "paid", "company_research": "paid",
"interview_prep": "paid", "interview_prep": "paid",
"survey_assistant": "paid", "survey_assistant": "paid",
"llm_reply_draft": "paid",
# Orchestration / infrastructure — stays gated # Orchestration / infrastructure — stays gated
"email_classifier": "paid", "email_classifier": "paid",
@ -82,7 +81,6 @@ BYOK_UNLOCKABLE: frozenset[str] = frozenset({
"company_research", "company_research",
"interview_prep", "interview_prep",
"survey_assistant", "survey_assistant",
"llm_reply_draft",
}) })
# Demo mode flag — read from environment at module load time. # Demo mode flag — read from environment at module load time.

View file

@ -6,40 +6,41 @@
# Caddy injects the Directus session cookie as X-CF-Session header before forwarding. # Caddy injects the Directus session cookie as X-CF-Session header before forwarding.
# cloud_session.py resolves user_id → per-user db_path at session init. # cloud_session.py resolves user_id → per-user db_path at session init.
# #
# Services: api (FastAPI :8601), web (Vue :8508), searxng (internal)
# Streamlit app service removed — Vue+FastAPI is the only frontend (peregrine#104).
#
# Usage: # Usage:
# docker compose -f compose.cloud.yml --project-name peregrine-cloud up -d # docker compose -f compose.cloud.yml --project-name peregrine-cloud up -d
# docker compose -f compose.cloud.yml --project-name peregrine-cloud down # docker compose -f compose.cloud.yml --project-name peregrine-cloud down
# docker compose -f compose.cloud.yml --project-name peregrine-cloud logs api -f # docker compose -f compose.cloud.yml --project-name peregrine-cloud logs app -f
services: services:
api: app:
build: build:
context: .. context: ..
dockerfile: peregrine/Dockerfile.cfcore dockerfile: peregrine/Dockerfile.cfcore
command: > container_name: peregrine-cloud
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
ports: ports:
- "8601:8601" # LAN-accessible — Caddy gates the public route; Kuma monitors this port directly - "8505:8501"
volumes: volumes:
- /devl/menagerie-data:/devl/menagerie-data - /devl/menagerie-data:/devl/menagerie-data # per-user data trees
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro - ./config/llm.cloud.yaml:/app/config/llm.yaml:ro # cloud-safe backends only (no claude_code/copilot/anthropic)
environment: environment:
- CLOUD_MODE=true - CLOUD_MODE=true
- CLOUD_DATA_ROOT=/devl/menagerie-data - CLOUD_DATA_ROOT=/devl/menagerie-data
- STAGING_DB=/devl/menagerie-data/cloud-default.db
- DIRECTUS_JWT_SECRET=${DIRECTUS_JWT_SECRET} - DIRECTUS_JWT_SECRET=${DIRECTUS_JWT_SECRET}
- CF_SERVER_SECRET=${CF_SERVER_SECRET} - CF_SERVER_SECRET=${CF_SERVER_SECRET}
- PLATFORM_DB_URL=${PLATFORM_DB_URL} - PLATFORM_DB_URL=${PLATFORM_DB_URL}
- HEIMDALL_URL=${HEIMDALL_URL:-http://cf-license:8000} - HEIMDALL_URL=${HEIMDALL_URL:-http://cf-license:8000}
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN} - HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
- STAGING_DB=/devl/menagerie-data/cloud-default.db # fallback only — never used
- DOCS_DIR=/tmp/cloud-docs
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
- PYTHONUNBUFFERED=1 - PYTHONUNBUFFERED=1
- PEREGRINE_CADDY_PROXY=1
- CF_ORCH_URL=http://host.docker.internal:7700
- DEMO_MODE=false
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-} - FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700} depends_on:
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}} searxng:
- CF_APP_NAME=peregrine condition: service_healthy
extra_hosts: extra_hosts:
- "host.docker.internal:host-gateway" - "host.docker.internal:host-gateway"
restart: unless-stopped restart: unless-stopped
@ -52,13 +53,8 @@ services:
VITE_BASE_PATH: /peregrine/ VITE_BASE_PATH: /peregrine/
ports: ports:
- "8508:80" - "8508:80"
depends_on:
- api
restart: unless-stopped restart: unless-stopped
# cf-orch-agent: not needed in cloud — a host-native agent already runs on :7701
# and is registered with the coordinator. app/api reach it via CF_ORCH_URL.
searxng: searxng:
image: searxng/searxng:latest image: searxng/searxng:latest
volumes: volumes:

View file

@ -15,21 +15,19 @@
services: services:
api: app:
build: . build: .
command: > ports:
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601" - "8504:8501"
volumes: volumes:
- ./demo/config:/app/config - ./demo/config:/app/config
- ./demo:/app/demo:ro # seed.sql lives here; read-only - ./demo/data:/app/data
# /app/data is tmpfs — ephemeral, resets on every container start # No /docs mount — demo has no personal documents
tmpfs:
- /app/data
environment: environment:
- DEMO_MODE=true - DEMO_MODE=true
- STAGING_DB=/app/data/staging.db - STAGING_DB=/app/data/staging.db
- DEMO_SEED_FILE=/app/demo/seed.sql
- DOCS_DIR=/tmp/demo-docs - DOCS_DIR=/tmp/demo-docs
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
- PYTHONUNBUFFERED=1 - PYTHONUNBUFFERED=1
- PYTHONLOGGING=WARNING - PYTHONLOGGING=WARNING
# No API keys — inference is blocked by DEMO_MODE before any key is needed # No API keys — inference is blocked by DEMO_MODE before any key is needed
@ -39,7 +37,6 @@ services:
extra_hosts: extra_hosts:
- "host.docker.internal:host-gateway" - "host.docker.internal:host-gateway"
restart: unless-stopped restart: unless-stopped
# No host port — nginx proxies /api/ → api:8601 internally
web: web:
build: build:
@ -48,9 +45,7 @@ services:
args: args:
VITE_BASE_PATH: /peregrine/ VITE_BASE_PATH: /peregrine/
ports: ports:
- "8504:80" # demo.circuitforge.tech/peregrine* → host:8504 - "8507:80"
depends_on:
- api
restart: unless-stopped restart: unless-stopped
searxng: searxng:

View file

@ -29,8 +29,7 @@ services:
- STAGING_DB=/devl/job-seeker/staging.db - STAGING_DB=/devl/job-seeker/staging.db
- PYTHONUNBUFFERED=1 - PYTHONUNBUFFERED=1
- STREAMLIT_SERVER_BASE_URL_PATH= - STREAMLIT_SERVER_BASE_URL_PATH=
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700} - CF_ORCH_URL=http://host.docker.internal:7700
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
extra_hosts: extra_hosts:
- "host.docker.internal:host-gateway" - "host.docker.internal:host-gateway"
restart: "no" restart: "no"

View file

@ -1,7 +1,48 @@
# compose.yml — Peregrine by Circuit Forge LLC # compose.yml — Peregrine by Circuit Forge LLC
# Streamlit (app service) removed — Vue+FastAPI is the only frontend (#104) # Profiles: remote | cpu | single-gpu | dual-gpu-ollama
services: services:
app:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
command: >
bash -c "streamlit run app/app.py
--server.port=8501
--server.headless=true
--server.fileWatcherType=none
2>&1 | tee /app/data/.streamlit.log"
ports:
- "${STREAMLIT_PORT:-8501}:8501"
volumes:
- ./config:/app/config
- ./data:/app/data
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
- /var/run/docker.sock:/var/run/docker.sock
- /usr/bin/docker:/usr/bin/docker:ro
environment:
- STAGING_DB=/app/data/staging.db
- DOCS_DIR=/docs
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
- RECOMMENDED_PROFILE=${RECOMMENDED_PROFILE:-remote}
- STREAMLIT_SERVER_BASE_URL_PATH=${STREAMLIT_BASE_URL_PATH:-}
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
- FORGEJO_REPO=${FORGEJO_REPO:-}
- FORGEJO_API_URL=${FORGEJO_API_URL:-}
- PYTHONUNBUFFERED=1
- PYTHONLOGGING=WARNING
- PEREGRINE_CADDY_PROXY=1
depends_on:
searxng:
condition: service_healthy
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
api: api:
build: build:
context: .. context: ..
@ -20,9 +61,6 @@ services:
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-} - OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0} - PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-} - PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
- GPU_SERVER_URL=${GPU_SERVER_URL:-${CF_ORCH_URL:-http://host.docker.internal:7700}}
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
- CF_APP_NAME=peregrine
- PYTHONUNBUFFERED=1 - PYTHONUNBUFFERED=1
extra_hosts: extra_hosts:
- "host.docker.internal:host-gateway" - "host.docker.internal:host-gateway"
@ -91,31 +129,6 @@ services:
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed] profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
restart: unless-stopped restart: unless-stopped
cf-orch-agent:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
command: ["/bin/sh", "/app/docker/cf-orch-agent/start.sh"]
ports:
- "${CF_ORCH_AGENT_PORT:-7701}:7701"
environment:
- CF_ORCH_COORDINATOR_URL=${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700}
- CF_ORCH_NODE_ID=${CF_ORCH_NODE_ID:-peregrine}
- CF_ORCH_AGENT_PORT=${CF_ORCH_AGENT_PORT:-7701}
- CF_ORCH_ADVERTISE_HOST=${CF_ORCH_ADVERTISE_HOST:-}
- PYTHONUNBUFFERED=1
extra_hosts:
- "host.docker.internal:host-gateway"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
restart: unless-stopped
finetune: finetune:
build: build:
context: . context: .

View file

@ -1,23 +0,0 @@
# config/label_tool.yaml — Multi-account IMAP config for the email label tool
# Copy to config/label_tool.yaml and fill in your credentials.
# This file is gitignored.
accounts:
- name: "Gmail"
host: "imap.gmail.com"
port: 993
username: "you@gmail.com"
password: "your-app-password" # Use an App Password, not your login password
folder: "INBOX"
days_back: 90
- name: "Outlook"
host: "outlook.office365.com"
port: 993
username: "you@outlook.com"
password: "your-app-password"
folder: "INBOX"
days_back: 90
# Optional: limit emails fetched per account per run (0 = unlimited)
max_per_account: 500

View file

@ -45,11 +45,6 @@ backends:
model: __auto__ model: __auto__
supports_images: false supports_images: false
type: openai_compat type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
vllm_research: vllm_research:
api_key: '' api_key: ''
base_url: http://host.docker.internal:8000/v1 base_url: http://host.docker.internal:8000/v1
@ -57,11 +52,6 @@ backends:
model: __auto__ model: __auto__
supports_images: false supports_images: false
type: openai_compat type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
fallback_order: fallback_order:
- vllm - vllm
- ollama - ollama

View file

@ -1,11 +1,4 @@
backends: backends:
cf_text:
api_key: any
base_url: http://host.docker.internal:8006/v1
enabled: true
model: cf-text
supports_images: false
type: openai_compat
anthropic: anthropic:
api_key_env: ANTHROPIC_API_KEY api_key_env: ANTHROPIC_API_KEY
enabled: false enabled: false
@ -41,7 +34,7 @@ backends:
supports_images: false supports_images: false
type: openai_compat type: openai_compat
vision_service: vision_service:
base_url: http://vision:8002 base_url: http://host.docker.internal:8002
enabled: true enabled: true
supports_images: true supports_images: true
type: vision_service type: vision_service
@ -65,7 +58,6 @@ backends:
supports_images: false supports_images: false
type: openai_compat type: openai_compat
fallback_order: fallback_order:
- cf_text
- ollama - ollama
- claude_code - claude_code
- vllm - vllm
@ -75,7 +67,6 @@ research_fallback_order:
- claude_code - claude_code
- vllm_research - vllm_research
- ollama_research - ollama_research
- cf_text
- github_copilot - github_copilot
- anthropic - anthropic
vision_fallback_order: vision_fallback_order:

View file

@ -45,89 +45,6 @@ backends:
enabled: false enabled: false
type: vision_service type: vision_service
supports_images: true supports_images: true
# ── cf-orch task-routed backends (preferred for GPU inference) ────────────
# Use these when GPU_SERVER_URL is configured. The coordinator resolves
# product+task → model_id → node via assignments.yaml; no model IDs needed here.
# Set enabled: true once GPU_SERVER_URL is configured.
cf_cover_letter:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1 # fallback when cf-orch is unavailable
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: cover_letter
ttl_s: 3600
cf_ats_rewrite:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: ats_rewrite
ttl_s: 3600
cf_job_research:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: job_research
ttl_s: 3600
cf_interview_prep:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1
model: __auto__
api_key: any
supports_images: false
cf_orch:
product: peregrine
task: interview_prep
ttl_s: 3600
# ── cf-orch trunk services (service-based, legacy) ─────────────────────────
# Generic service allocation — use the task-routed backends above when possible.
# Set GPU_SERVER_URL (env) or url below; leave enabled: false if cf-orch is
# not deployed in your environment.
cf_text:
type: openai_compat
enabled: false
base_url: http://localhost:8008/v1 # fallback when cf-orch is not available
model: __auto__
api_key: any
supports_images: false
cf_orch:
service: cf-text
# model_candidates: leave empty to use the service's default_model,
# or specify an alias from the node's catalog (e.g. "qwen2.5-3b").
model_candidates: []
ttl_s: 3600
cf_voice:
type: openai_compat
enabled: false
base_url: http://localhost:8009/v1 # fallback when cf-orch is not available
model: __auto__
api_key: any
supports_images: false
cf_orch:
service: cf-voice
model_candidates: []
ttl_s: 3600
fallback_order: fallback_order:
- ollama - ollama
- claude_code - claude_code

View file

@ -1,258 +0,0 @@
# Mission domain signal configuration for cover letter generation.
#
# When a job description or company name matches signals in a domain,
# the cover letter prompt injects a Para 3 hint to reflect genuine personal
# alignment. Dict order = match priority (first match wins).
#
# Users can add custom domains under `mission_preferences` in user.yaml.
# Any key in mission_preferences that is NOT listed here is treated as a
# user-defined domain: no signal detection, custom note only (skipped if
# the job description doesn't contain the key as a literal word).
#
# Schema per domain:
# signals: list[str] — lowercase keywords to scan for in "company + JD"
# default_note: str — hint injected when user has no custom note for domain
domains:
music:
signals:
- music
- spotify
- tidal
- soundcloud
- bandcamp
- apple music
- distrokid
- cd baby
- landr
- beatport
- reverb
- vinyl
- streaming
- artist
- label
- live nation
- ticketmaster
- aeg
- songkick
- concert
- venue
- festival
- audio
- podcast
- studio
- record
- musician
- playlist
default_note: >
This company is in the music industry — an industry the candidate finds genuinely
compelling. Para 3 should warmly and specifically reflect this authentic alignment,
not as a generic fan statement, but as an honest statement of where they'd love to
apply their skills.
animal_welfare:
signals:
- animal
- shelter
- rescue
- humane society
- spca
- aspca
- veterinary
- "vet "
- wildlife
- "pet "
- adoption
- foster
- dog
- cat
- feline
- canine
- sanctuary
- zoo
default_note: >
This organization works in animal welfare/rescue — a mission the candidate finds
genuinely meaningful. Para 3 should reflect this authentic connection warmly and
specifically, tying their skills to this mission.
education:
signals:
- education
- school
- learning
- student
- edtech
- classroom
- curriculum
- tutoring
- academic
- university
- kids
- children
- youth
- literacy
- khan academy
- duolingo
- chegg
- coursera
- instructure
- canvas lms
- clever
- district
- teacher
- k-12
- k12
- grade
- pedagogy
default_note: >
This company works in education or EdTech — a domain that resonates with the
candidate's values. Para 3 should reflect this authentic connection specifically
and warmly.
social_impact:
signals:
- nonprofit
- non-profit
- "501(c)"
- social impact
- mission-driven
- public benefit
- community
- underserved
- equity
- justice
- humanitarian
- advocacy
- charity
- foundation
- ngo
- social good
- civic
- public health
- mental health
- food security
- housing
- homelessness
- poverty
- workforce development
default_note: >
This organization is mission-driven / social impact focused — exactly the kind of
cause the candidate cares deeply about. Para 3 should warmly reflect their genuine
desire to apply their skills to work that makes a real difference in people's lives.
# Health listed last — genuine but lower-priority connection.
health:
signals:
- patient
- patients
- healthcare
- health tech
- healthtech
- pharma
- pharmaceutical
- clinical
- medical
- hospital
- clinic
- therapy
- therapist
- rare disease
- life sciences
- life science
- treatment
- prescription
- biotech
- biopharma
- medtech
- behavioral health
- population health
- care management
- care coordination
- oncology
- specialty pharmacy
- provider network
- payer
- health plan
- benefits administration
- ehr
- emr
- fhir
- hipaa
default_note: >
This company works in healthcare, life sciences, or patient care.
Do NOT write about the candidate's passion for pharmaceuticals or healthcare as an
industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies
exist to serve: those navigating complex, often invisible, or unusual health journeys;
patients facing rare or poorly understood conditions; individuals whose situations don't
fit a clean category. The connection is to the humans behind the data, not the industry.
If the user has provided a personal note, use that to anchor Para 3 specifically.
# Extended domains — added 2026-04-12
privacy:
signals:
- privacy
- data rights
- surveillance
- gdpr
- ccpa
- anonymity
- end-to-end encryption
- open source
- decentralized
- self-hosted
- zero knowledge
- data sovereignty
- digital rights
- eff
- electronic frontier
default_note: >
This company operates in the privacy, data rights, or digital rights space —
a domain the candidate genuinely cares about. Para 3 should reflect their
authentic belief in user autonomy and data sovereignty, not as abstract principle
but as something that shapes how they approach their work.
accessibility:
signals:
- accessibility
- assistive technology
- a11y
- wcag
- screen reader
- adaptive technology
- disability
- neurodivergent
- neurodiversity
- adhd
- autism
- inclusive design
- universal design
- accommodations
- ada compliance
default_note: >
This company works in accessibility or assistive technology — a mission the
candidate feels genuine, personal alignment with. Para 3 should reflect authentic
investment in building tools and systems that work for everyone, especially those
whose needs are most often overlooked in mainstream product development.
open_source:
signals:
- open source
- open-source
- linux foundation
- apache foundation
- free software
- gnu
- contributor
- maintainer
- upstream
- community-driven
- innersource
- copyleft
- mozilla
- wikimedia
default_note: >
This organization is rooted in open source culture — a community the candidate
actively participates in and believes in. Para 3 should reflect genuine investment
in the collaborative, transparent, and community-driven approach to building
software that lasts.

View file

@ -1,11 +1,9 @@
candidate_accessibility_focus: false candidate_accessibility_focus: false
candidate_lgbtq_focus: false candidate_lgbtq_focus: false
candidate_voice: Clear, direct, and human. Focuses on impact over jargon. Avoids candidate_voice: Clear, direct, and human. Focuses on impact over jargon.
buzzwords and lets the work speak. career_summary: 'Experienced software engineer with a background in full-stack development,
career_summary: 'Senior UX Designer with 6 years of experience designing for music, cloud infrastructure, and data pipelines. Passionate about building tools that help
education, and media products. Strong background in cross-platform design systems, people navigate complex systems.
user research, and 0-to-1 feature development. Passionate about making complex
digital experiences feel effortless.
' '
dev_tier_override: null dev_tier_override: null
@ -18,9 +16,9 @@ inference_profile: remote
linkedin: '' linkedin: ''
mission_preferences: mission_preferences:
animal_welfare: '' animal_welfare: ''
education: Education technology is where design decisions have long-term impact on how people learn. education: ''
health: '' health: ''
music: Love designing for music and audio discovery — it combines craft with genuine emotional resonance. music: ''
social_impact: Want my work to reach people who need it most. social_impact: Want my work to reach people who need it most.
name: Demo User name: Demo User
nda_companies: [] nda_companies: []

View file

@ -1,259 +0,0 @@
-- jobs
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Spotify', 'https://www.linkedin.com/jobs/view/1000001', 'linkedin', 'Remote', '1', '$110k$140k', '94.0', 'approved', '2026-04-14', '2026-04-12', 'Dear Hiring Manager,
I''m excited to apply for the UX Designer role at Spotify. With five years of
experience designing for music discovery and cross-platform experiences, I''ve
consistently shipped features that make complex audio content feel effortless to
navigate. At my last role I led a redesign of the playlist creation flow that
reduced drop-off by 31%.
Spotify''s commitment to artist and listener discovery and its recent push into
audiobooks and podcast tooling aligns directly with the kind of cross-format
design challenges I''m most energised by.
I''d love to bring that focus to your product design team.
Warm regards,
[Your name]
', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Duolingo', 'https://www.linkedin.com/jobs/view/1000002', 'linkedin', 'Pittsburgh, PA', '0', '$95k$120k', '87.0', 'approved', '2026-04-13', '2026-04-10', 'Draft in progress — cover letter generating…', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Lead', 'NPR', 'https://www.indeed.com/viewjob?jk=1000003', 'indeed', 'Washington, DC', '1', '$120k$150k', '81.0', 'approved', '2026-04-12', '2026-04-08', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior UX Designer', 'Mozilla', 'https://www.linkedin.com/jobs/view/1000004', 'linkedin', 'Remote', '1', '$105k$130k', '81.0', 'pending', '2026-04-13', '2026-03-12', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Interaction Designer', 'Figma', 'https://www.indeed.com/viewjob?jk=1000005', 'indeed', 'San Francisco, CA', '1', '$115k$145k', '78.0', 'pending', '2026-04-11', '2026-04-09', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer II', 'Notion', 'https://www.linkedin.com/jobs/view/1000006', 'linkedin', 'Remote', '1', '$100k$130k', '76.0', 'pending', '2026-04-10', '2026-04-07', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Stripe', 'https://www.linkedin.com/jobs/view/1000007', 'linkedin', 'Remote', '1', '$120k$150k', '74.0', 'pending', '2026-04-09', '2026-04-06', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UI/UX Designer', 'Canva', 'https://www.indeed.com/viewjob?jk=1000008', 'indeed', 'Remote', '1', '$90k$115k', '72.0', 'pending', '2026-04-08', '2026-04-05', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior Product Designer', 'Asana', 'https://www.linkedin.com/jobs/view/1000009', 'linkedin', 'San Francisco, CA', '1', '$125k$155k', '69.0', 'pending', '2026-04-07', '2026-04-04', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Researcher', 'Intercom', 'https://www.indeed.com/viewjob?jk=1000010', 'indeed', 'Remote', '1', '$95k$120k', '67.0', 'pending', '2026-04-06', '2026-04-03', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Linear', 'https://www.linkedin.com/jobs/view/1000011', 'linkedin', 'Remote', '1', '$110k$135k', '65.0', 'pending', '2026-04-05', '2026-04-02', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Loom', 'https://www.indeed.com/viewjob?jk=1000012', 'indeed', 'Remote', '1', '$90k$110k', '62.0', 'pending', '2026-04-04', '2026-04-01', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior Product Designer', 'Asana', 'https://www.asana.com/jobs/1000013', 'linkedin', 'San Francisco, CA', '1', '$125k$155k', '91.0', 'phone_screen', '2026-04-01', '2026-03-30', NULL, '2026-04-08', '2026-04-15', NULL, NULL, NULL, '2026-04-15T14:00:00', NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Notion', 'https://www.notion.so/jobs/1000014', 'indeed', 'Remote', '1', '$100k$130k', '88.0', 'interviewing', '2026-03-25', '2026-03-23', NULL, '2026-04-01', '2026-04-05', '2026-04-12', NULL, NULL, '2026-04-22T10:00:00', NULL, NULL);
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Design Systems Designer', 'Figma', 'https://www.figma.com/jobs/1000015', 'linkedin', 'San Francisco, CA', '1', '$130k$160k', '96.0', 'hired', '2026-03-01', '2026-02-27', NULL, '2026-03-08', '2026-03-14', '2026-03-21', '2026-04-01', '2026-04-08', NULL, NULL, '{"factors":["clear_scope","great_manager","mission_aligned"],"notes":"Excited about design systems work. Salary met expectations."}');
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Slack', 'https://slack.com/jobs/1000016', 'indeed', 'Remote', '1', '$115k$140k', '79.0', 'applied', '2026-03-18', '2026-03-16', NULL, '2026-03-28', NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-- job_contacts
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (1, 'inbound', 'Excited to connect — UX Designer role at Spotify', 'jamie.chen@spotify.com', 'you@example.com', '2026-04-12', 'positive_response');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (1, 'outbound', 'Re: Excited to connect — UX Designer role at Spotify', 'you@example.com', 'jamie.chen@spotify.com', '2026-04-13', NULL);
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (13, 'inbound', 'Interview Confirmation — Senior Product Designer', 'recruiting@asana.com', 'you@example.com', '2026-04-13', 'interview_scheduled');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (14, 'inbound', 'Your panel interview is confirmed for Apr 22', 'recruiting@notion.so', 'you@example.com', '2026-04-12', 'interview_scheduled');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (14, 'inbound', 'Pre-interview prep resources', 'marcus.webb@notion.so', 'you@example.com', '2026-04-13', 'positive_response');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'inbound', 'Figma Design Systems — Offer Letter', 'offers@figma.com', 'you@example.com', '2026-04-01', 'offer_received');
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'outbound', 'Re: Figma Design Systems — Offer Letter (acceptance)', 'you@example.com', 'offers@figma.com', '2026-04-05', NULL);
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'inbound', 'Welcome to Figma! Onboarding next steps', 'onboarding@figma.com', 'you@example.com', '2026-04-08', NULL);
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (16, 'inbound', 'Thanks for applying to Slack', 'noreply@slack.com', 'you@example.com', '2026-03-28', NULL);
-- references_
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Dr. Priya Nair', 'priya.nair@example.com', 'Director of Design', 'Acme Corp', 'former_manager', 'Managed me for 3 years on the consumer app redesign. Enthusiastic reference.', '["manager","design"]', 'Hi Priya,
I hope you''re doing well! I''m currently interviewing for a few senior UX roles and would be so grateful if you''d be willing to serve as a reference.
Thank you!
[Your name]');
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Sam Torres', 'sam.torres@example.com', 'Senior Product Designer', 'Acme Corp', 'former_colleague', 'Worked together on design systems. Great at speaking to collaborative process.', '["colleague","design_systems"]', NULL);
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Jordan Kim', 'jordan.kim@example.com', 'VP of Product', 'Streamline Inc', 'former_manager', 'Led the product team I was embedded in. Can speak to business impact of design work.', '["manager","product"]', NULL);
-- resumes
INSERT INTO resumes (name, source, job_id, text, struct_json, word_count, is_default) VALUES (
'Base Resume',
'uploaded',
NULL,
'ALEX RIVERA
UX Designer · Product Design · Design Systems
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
Senior UX Designer with 6 years of experience designing for music, education, and media platforms. Led 0-to-1 product work and redesigned high-traffic flows used by tens of millions of users. Deep background in user research, interaction design, and cross-platform design systems. Strong collaborator with engineering and product comfortable in ambiguity, methodical about process.
EXPERIENCE
Senior UX Designer StreamNote (2023present)
- Led redesign of the core listening queue, reducing abandonment by 31% across mobile and web
- Built and maintained a component library (Figma tokens + React) used by 8 product squads
- Ran 60+ moderated user research sessions; findings shaped 3 major product bets
- Partnered with ML team to design recommendation transparency features for power users
UX Designer EduPath (20212023)
- Designed the onboarding and early-habit loop for a K12 learning app (2.4M DAU)
- Shipped streak redesign that improved D7 retention by 18%
- Drove accessibility audit and remediation (WCAG 2.1 AA); filed and closed 47 issues
- Mentored 2 junior designers; led weekly design critique
Product Designer Signal Media (20192021)
- Designed editorial tools and reader-facing article experiences for a digital news publisher
- Prototyped and shipped a "read later" feature that became the #2 most-used feature within 90 days
- Collaborated with editorial and engineering to establish a shared component system (reduces new-story design time by 60%)
SKILLS
Figma · Prototyping · User Research · Usability Testing · Design Systems · Interaction Design
Accessibility (WCAG 2.1) · Cross-Platform (iOS/Android/Web) · React (collaboration-level) · SQL (basic)
Workshop Facilitation · Stakeholder Communication
EDUCATION
B.F.A. Graphic Design, Minor in Human-Computer Interaction State University of the Arts, 2019
SELECTED PROJECTS
Playlist Flow Redesign (StreamNote) reduced creation drop-off 31%, won internal design award
D7 Retention Streak (EduPath) +18% weekly retention; featured in company all-hands
Accessibility Audit (EduPath) full WCAG 2.1 AA remediation across iOS, Android, web',
'{"contact":{"name":"Alex Rivera","email":"alex.rivera@example.com","linkedin":"linkedin.com/in/alexrivera","portfolio":"alexrivera.design"},"summary":"Senior UX Designer with 6 years of experience designing for music, education, and media platforms.","experience":[{"company":"StreamNote","title":"Senior UX Designer","dates":"2023present","bullets":["Led redesign of core listening queue, reducing abandonment by 31%","Built component library used by 8 product squads","Ran 60+ moderated user research sessions"]},{"company":"EduPath","title":"UX Designer","dates":"20212023","bullets":["Designed onboarding and early-habit loop for K12 app (2.4M DAU)","Shipped streak redesign that improved D7 retention by 18%","Drove accessibility audit (WCAG 2.1 AA)"]},{"company":"Signal Media","title":"Product Designer","dates":"20192021","bullets":["Designed editorial tools and reader-facing article experiences","Prototyped and shipped read-later feature (top 2 used within 90 days)"]}],"education":[{"institution":"State University of the Arts","degree":"B.F.A. Graphic Design, Minor in HCI","year":"2019"}],"skills":["Figma","Prototyping","User Research","Usability Testing","Design Systems","Interaction Design","Accessibility (WCAG 2.1)","Cross-Platform","React","SQL","Workshop Facilitation"]}',
320,
1
);
-- ATS resume optimizer data for approved jobs (Spotify=1, Duolingo=2, NPR=3)
-- Spotify: gap report highlights audio/podcast tooling keywords; optimized resume tailored
UPDATE jobs SET
ats_gap_report = '[{"term":"audio UX","section":"experience","priority":3,"rationale":"Spotify''s JD emphasizes audio product experience; resume mentions music broadly but not audio-specific UX patterns"},{"term":"podcast design","section":"experience","priority":2,"rationale":"Spotify is investing heavily in podcast tooling; related experience at Signal Media could be framed around audio content"},{"term":"cross-platform mobile","section":"skills","priority":2,"rationale":"JD specifies iOS and Android explicitly; resume lists cross-platform but not mobile-first framing"},{"term":"A/B testing","section":"experience","priority":1,"rationale":"JD mentions data-driven iteration; resume does not reference experimentation framework"}]',
optimized_resume = 'ALEX RIVERA
UX Designer · Audio Product · Cross-Platform Design
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
Senior UX Designer specializing in audio and media product design. 6 years of experience shipping cross-platform features used by millions with a focus on music discovery, content navigation, and habit-forming interactions. Comfortable moving from user research to pixel-perfect specs to cross-functional alignment.
EXPERIENCE
Senior UX Designer StreamNote (2023present)
- Led redesign of the core listening queue (audio UX) reduced abandonment 31% across iOS, Android, and web
- Designed podcast chapter navigation prototype; validated with 8 user sessions, handed off to eng in Q3
- Built Figma component library (tokens + variants) used by 8 product squads cut design-to-dev handoff time by 40%
- Drove A/B test framework with data team: 12 experiments shipped; 7 reached statistical significance
UX Designer EduPath (20212023)
- Designed cross-platform onboarding (iOS/Android/web) for K12 learning app, 2.4M DAU
- Shipped streak redesign with 3 A/B variants winning variant improved D7 retention by 18%
- Full WCAG 2.1 AA remediation across all platforms; filed and closed 47 issues
Product Designer Signal Media (20192021)
- Designed audio and editorial experiences for a digital media publisher
- Prototyped and shipped "listen later" feature for podcast content #2 most-used feature within 90 days
- Established shared design system that reduced new-story design time by 60%
SKILLS
Figma · Audio UX · Podcast Design · Cross-Platform (iOS/Android/Web) · Design Systems
A/B Testing · User Research · Usability Testing · Accessibility (WCAG 2.1) · Interaction Design
EDUCATION
B.F.A. Graphic Design, Minor in HCI State University of the Arts, 2019'
WHERE id = 1;
-- Duolingo: gap report highlights gamification, retention, and learning science keywords
UPDATE jobs SET
ats_gap_report = '[{"term":"gamification","section":"experience","priority":3,"rationale":"Duolingo''s entire product is built on gamification mechanics; streak work at EduPath is highly relevant but not explicitly framed"},{"term":"streak mechanics","section":"experience","priority":3,"rationale":"Duolingo invented the streak; EduPath streak redesign is directly applicable and should be foregrounded"},{"term":"learning science","section":"experience","priority":2,"rationale":"JD references behavioral psychology; resume does not mention research-backed habit design"},{"term":"localization","section":"skills","priority":1,"rationale":"Duolingo ships to 40+ languages; internationalization experience or awareness would strengthen application"}]',
optimized_resume = 'ALEX RIVERA
UX Designer · Gamification · Learning Products
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
UX Designer with 6 years of experience in education and media products. Designed habit-forming experiences grounded in behavioral research streak systems, onboarding flows, and retention mechanics for apps with millions of daily active users. Passionate about learning products that feel like play.
EXPERIENCE
UX Designer EduPath (20212023)
- Redesigned streak and gamification mechanics for K12 learning app (2.4M DAU) D7 retention +18%
- Applied behavioral science principles (variable reward, loss aversion, social proof) to onboarding flow redesign
- Led 30+ user research sessions with students, parents, and teachers; findings shaped product roadmap for 2 quarters
- Drove WCAG 2.1 AA accessibility remediation 47 issues filed and closed across iOS, Android, web
Senior UX Designer StreamNote (2023present)
- Designed habit-reinforcing listening queue with personalized recommendations surface abandonment -31%
- Built and scaled Figma design system used by 8 squads; reduced design-to-dev cycle by 40%
- Ran A/B tests with data team; 12 experiments across retention and discovery features
Product Designer Signal Media (20192021)
- Designed reader engagement and content-return mechanics for digital news platform
- "Read later" feature reached #2 usage within 90 days of launch
SKILLS
Figma · Gamification Design · Habit & Retention Mechanics · User Research · Behavioral UX
Learning Products · Accessibility (WCAG 2.1) · Cross-Platform (iOS/Android/Web) · Design Systems
EDUCATION
B.F.A. Graphic Design, Minor in HCI State University of the Arts, 2019'
WHERE id = 2;
-- NPR: gap report highlights public media, accessibility, and editorial tool experience
UPDATE jobs SET
ats_gap_report = '[{"term":"public media","section":"experience","priority":3,"rationale":"NPR is a public media org; framing experience around mission-driven media rather than commercial products strengthens fit"},{"term":"editorial tools","section":"experience","priority":3,"rationale":"NPR''s UX Lead role includes internal tools for journalists; Signal Media editorial tools work is directly applicable"},{"term":"accessibility standards","section":"experience","priority":2,"rationale":"NPR serves a broad public audience including listeners with disabilities; WCAG work at EduPath should be prominent"},{"term":"content discovery","section":"experience","priority":2,"rationale":"NPR''s JD mentions listener discovery; StreamNote queue redesign is relevant framing"}]',
optimized_resume = 'ALEX RIVERA
UX Lead · Public Media · Accessible Design
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
SUMMARY
Senior UX Designer with 6 years of experience in media, education, and content platforms. Led design for editorial tools, content discovery surfaces, and accessible experiences for mission-driven organizations. Believes design has an obligation to reach all users especially the ones the industry tends to forget.
EXPERIENCE
Senior UX Designer StreamNote (2023present)
- Led content discovery redesign (listening queue, personalized surfaces) abandonment -31%
- Designed and shipped podcast chapter navigation as a 0-to-1 feature
- Built scalable Figma component library used by 8 cross-functional squads
- Ran 60+ moderated research sessions; regularly presented findings to CPO and VP Product
Product Designer Signal Media (20192021)
- Designed editorial authoring tools used daily by 120+ journalists reduced story publish time by 35%
- Shipped "read later" feature for a digital news publisher #2 most-used feature within 90 days
- Established shared design system that cut new-template design time by 60%
UX Designer EduPath (20212023)
- Led full WCAG 2.1 AA accessibility audit and remediation across iOS, Android, and web
- Designed onboarding and retention flows for a public K12 learning app (2.4M DAU)
- D7 retention +18% following streak redesign; results shared at company all-hands
SKILLS
Figma · Editorial & Publishing Tools · Content Discovery UX · Accessibility (WCAG 2.1 AA)
Public-Facing Product Design · User Research · Cross-Platform · Design Systems
EDUCATION
B.F.A. Graphic Design, Minor in HCI State University of the Arts, 2019'
WHERE id = 3;
-- company_research for interview-stage jobs
-- Job 13: Asana (phone_screen, interview 2026-04-15)
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
13,
'2026-04-14T09:00:00',
'Asana is a work management platform founded in 2008 by Dustin Moskovitz and Justin Rosenstein (both ex-Facebook). Headquartered in San Francisco, Asana went public on the NYSE in September 2020 via a direct listing. The product focuses on project and task management for teams, with a strong emphasis on clarity of ownership and cross-functional coordination. It serves over 130,000 paying customers across 190+ countries. Asana''s design philosophy centers on removing ambiguity from work — a principle that directly shapes product design decisions. The company has made significant investments in AI-assisted task management through its "AI Studio" features, launched in 2024.',
'Dustin Moskovitz, co-founder and CEO, is known for a thoughtful management style and genuine interest in org design and well-being at work. He is a co-founder of the effective altruism movement and the Open Philanthropy Project. Expect questions and conversation that reflect a values-driven culture — mission alignment matters here. Anne Raimondi is COO and a well-regarded operations leader.',
'["Asana''s design team works closely with the Core Product and Platform squads — ask how design embeds with engineering","Recent focus on AI features (AI Studio, smart task assignment) — familiarity with AI UX patterns will land well","Asana''s brand voice is unusually distinct — understand their design language before the call","Ask about the cross-functional collaboration model: how does design influence roadmap priority?","The role is hybrid SF — clarify expectations around in-office days upfront"]',
'Asana is built primarily on React (frontend), Python and PHP (backend), and uses a proprietary data model (the Asana object graph) that drives their real-time sync. Their design team uses Figma heavily. They have invested in their own design system ("Alchemy") which underpins the entire product.',
'Asana went public via direct listing (NYSE: ASAN) in September 2020. Revenue in FY2025 was approximately $726M, with consistent double-digit YoY growth. The company has been investing in profitability — operating losses have narrowed significantly. No recent acquisition activity.',
'Primary competitors: Monday.com, ClickUp, Notion (project management use cases), Jira (for engineering teams), and Microsoft Project. Asana differentiates on simplicity, clear ownership model, and enterprise reliability over raw feature count.',
NULL,
'Asana has published an accessibility statement and maintains WCAG 2.1 AA compliance across their core product. Their employee ERGs include groups for disability and neurodiversity. The company scores above average on Glassdoor for work-life balance. Their San Francisco HQ has dedicated quiet spaces and standing desks.',
0,
'Asana company research generated for phone screen 2026-04-15. Sources: public filings, company blog, Glassdoor.'
);
-- Job 14: Notion (interviewing, panel 2026-04-22)
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
14,
'2026-04-11T14:30:00',
'Notion is an all-in-one workspace tool combining notes, docs, wikis, and project management. Founded in 2013, relaunched in 2018 after a near-failure. Headquartered in San Francisco, with a significant remote-first culture. Notion reached a $10B valuation in its 2021 funding round and has since focused on consolidation and profitability. The product is unusually design-forward — Notion''s UI is considered a benchmark in the industry for flexibility without overwhelming complexity. Their 20232024 push into AI (Notion AI) added LLM-powered writing and summarization directly into the workspace. The product design team is small-but-influential and works closely with the founders.',
'Ivan Zhao is co-founder and CEO, known for being deeply product-focused and aesthetically driven. He has described Notion as an attempt to make software feel like a craftsman''s tool. Akshay Kothari is co-founder and COO. The culture reflects the founders'' values: deliberate, high-craft, opinionated. Expect the panel to include designers or PMs who will probe your design sensibility and taste.',
'["Notion''s design team is small and influential — expect ownership of end-to-end features, not component-level work","AI features (Notion AI) are a major current initiative — come with opinions on how AI should integrate into a workspace without disrupting user flow","Notion''s design language is a competitive moat — study it carefully before the panel","Panel likely includes a PM, a senior designer, and possibly a founder — tailor your portfolio walk to each audience","Ask about the product design team structure: how many designers, how do they embed with eng, what does the IC path look like?"]',
'Notion is built on a React frontend with a custom block-based data model. Their backend uses Postgres and Kafka for real-time sync. Notion AI uses third-party LLM providers (Anthropic, OpenAI) via API. The design team uses Figma and maintains a well-documented internal design system.',
'Notion raised $275M at a $10B valuation in October 2021 (led by Sequoia and Coatue). The company has not announced further funding rounds; public commentary suggests a path to profitability. ARR estimated at $300500M as of 2024.',
'Competitors include Confluence (Atlassian), Coda, Linear (for engineering-focused workflows), Obsidian (local-first notes), and increasingly Asana and ClickUp for project management use cases. Notion''s differentiator is its flexible block model and strong brand identity with knowledge workers.',
'Some employee reviews mention that the small team size means high ownership but also that projects can pivot quickly. Design headcount has been stable post-2022 layoffs. Worth asking about team stability in the panel.',
'Notion has made public commitments to WCAG 2.1 AA compliance but has received community feedback that keyboard navigation in the block editor has gaps. Their 2024 accessibility roadmap addressed the most commonly reported issues. The company has a neurodiversity ERG and remote-first culture (async-friendly).',
0,
'Notion company research generated for panel interview 2026-04-22. Sources: public filings, company blog, community accessibility reports.'
);
-- Job 15: Figma (hired — research used during interview cycle)
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
15,
'2026-03-13T11:00:00',
'Figma is the leading browser-based design tool, founded in 2012 by Dylan Field and Evan Wallace. Headquartered in San Francisco. Figma disrupted the design tool market with its collaborative, multiplayer approach — Google Docs for design. The product includes Figma Design, FigJam (whiteboarding), and Dev Mode (engineering handoff). Adobe''s attempted $20B acquisition was blocked by UK and EU regulators in 2023; Figma received a $1B termination fee. Post-Adobe, Figma has accelerated independent investment in AI features and a new "Figma Make" prototyping tool. The Design Systems team (the role you accepted) is responsible for the core component and token infrastructure used across all Figma products.',
'Dylan Field, co-founder and CEO, is known for being deeply technical and product-obsessed. He joined the board of OpenAI. Post-Adobe-deal fallout, Field has been publicly focused on Figma''s independent growth trajectory. Expect a culture of high standards and genuine product craft. Noah Levin leads the design org.',
'["You are joining the Design Systems team — the infrastructure team for Figma''s own product design","Your work will directly impact every other designer at Figma — high visibility, high leverage","Figma uses its own product (dogfooding) — you will be designing in Figma for Figma","Key initiative: AI-assisted component generation in Figma Make — design systems input is critical","You are the first external hire in this role since the Adobe deal fell through — ask about team direction post-acquisition"]',
'Figma''s frontend is React with a custom WebGL rendering engine (written in Rust + WASM) for the canvas. This is some of the most sophisticated browser-based graphics code in production. Dev Mode connects to GitHub, Storybook, and VS Code. The design system team works in Figma and outputs tokens that connect to code via Figma''s token pipeline.',
'Figma received a $1B termination fee from Adobe when the acquisition was blocked in late 2023. The company raised $200M at a $10B valuation in 2021. With the termination fee and strong ARR, Figma is well-capitalized for independent growth. No IPO timeline announced publicly.',
'Primary competitor is Sketch (declining market share), with Adobe XD effectively sunset. Framer is a growing competitor for prototyping. Penpot (open-source) is gaining traction in privacy-conscious and European markets. Figma''s multiplayer and browser-based approach remains a strong moat.',
NULL,
'Figma has an active accessibility team and public blog posts on designing accessible components. Their design system (the one you will be contributing to) includes built-in accessibility annotations and ARIA guidance. The company has disability and neurodiversity ERGs. Remote-friendly with SF HQ.',
0,
'Figma company research generated for interviewing stage 2026-03-13. Sources: company blog, public filings, design community.'
);

3170
dev-api.py

File diff suppressed because it is too large Load diff

View file

@ -1,14 +0,0 @@
#!/bin/sh
# Start the cf-orch agent. Adds --advertise-host only when CF_ORCH_ADVERTISE_HOST is set.
set -e
ARGS="--coordinator ${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700} \
--node-id ${CF_ORCH_NODE_ID:-peregrine} \
--host 0.0.0.0 \
--port ${CF_ORCH_AGENT_PORT:-7701}"
if [ -n "${CF_ORCH_ADVERTISE_HOST}" ]; then
ARGS="$ARGS --advertise-host ${CF_ORCH_ADVERTISE_HOST}"
fi
exec cf-orch agent $ARGS

View file

@ -2,8 +2,6 @@ server {
listen 80; listen 80;
server_name _; server_name _;
client_max_body_size 20m;
root /usr/share/nginx/html; root /usr/share/nginx/html;
index index.html; index index.html;
@ -22,19 +20,6 @@ server {
add_header Cache-Control "public, immutable"; add_header Cache-Control "public, immutable";
} }
# Handle /peregrine/ base path used when accessed directly (no Caddy prefix stripping).
# ^~ blocks regex location matches so assets at /peregrine/assets/... are served correctly.
location ^~ /peregrine/assets/ {
alias /usr/share/nginx/html/assets/;
expires 1y;
add_header Cache-Control "public, immutable";
}
location /peregrine/ {
alias /usr/share/nginx/html/;
try_files $uri $uri/ /index.html;
}
# SPA fallback must come after API and assets # SPA fallback must come after API and assets
location / { location / {
try_files $uri $uri/ /index.html; try_files $uri $uri/ /index.html;

View file

@ -144,7 +144,7 @@ Shipped in v0.4.0. Ongoing maintenance and known decisions:
## Container Runtime ## Container Runtime
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `install.sh` detects existing Podman and skips Docker install. - ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean. - **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
--- ---

View file

@ -11,7 +11,7 @@ Thank you for your interest in contributing to Peregrine. This guide covers the
## Fork and Clone ## Fork and Clone
```bash ```bash
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine git clone https://git.circuitforge.io/circuitforge/peregrine
cd peregrine cd peregrine
``` ```
@ -102,23 +102,6 @@ Before opening a pull request:
--- ---
## Database Migrations
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
### Adding a migration
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
### Rollbacks
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
---
## What NOT to Do ## What NOT to Do
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored - Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored

View file

@ -7,7 +7,7 @@ This page walks through a full Peregrine installation from scratch.
## Prerequisites ## Prerequisites
- **Git** — to clone the repository - **Git** — to clone the repository
- **Internet connection**`install.sh` downloads Docker and other dependencies - **Internet connection**`setup.sh` downloads Docker and other dependencies
- **Operating system**: Ubuntu/Debian, Fedora/RHEL, Arch Linux, or macOS (with Docker Desktop) - **Operating system**: Ubuntu/Debian, Fedora/RHEL, Arch Linux, or macOS (with Docker Desktop)
!!! warning "Windows" !!! warning "Windows"
@ -18,19 +18,19 @@ This page walks through a full Peregrine installation from scratch.
## Step 1 — Clone the repository ## Step 1 — Clone the repository
```bash ```bash
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine git clone https://git.circuitforge.io/circuitforge/peregrine
cd peregrine cd peregrine
``` ```
--- ---
## Step 2 — Run install.sh ## Step 2 — Run setup.sh
```bash ```bash
bash install.sh bash setup.sh
``` ```
`install.sh` performs the following automatically: `setup.sh` performs the following automatically:
1. **Detects your platform** (Ubuntu/Debian, Fedora/RHEL, Arch, macOS) 1. **Detects your platform** (Ubuntu/Debian, Fedora/RHEL, Arch, macOS)
2. **Installs Git** if not already present 2. **Installs Git** if not already present
@ -40,10 +40,10 @@ bash install.sh
6. **Creates `.env` from `.env.example`** — edit `.env` to customise ports and model storage paths before starting 6. **Creates `.env` from `.env.example`** — edit `.env` to customise ports and model storage paths before starting
!!! note "macOS" !!! note "macOS"
`install.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script. `setup.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script.
!!! note "GPU requirement" !!! note "GPU requirement"
For GPU support, `nvidia-smi` must return output before you run `install.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present. For GPU support, `nvidia-smi` must return output before you run `setup.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present.
--- ---
@ -107,7 +107,7 @@ The first-run wizard launches automatically. See [First-Run Wizard](first-run-wi
Only NVIDIA GPUs are supported. AMD ROCm is not currently supported. Only NVIDIA GPUs are supported. AMD ROCm is not currently supported.
Requirements: Requirements:
- NVIDIA driver installed and `nvidia-smi` working before running `install.sh` - NVIDIA driver installed and `nvidia-smi` working before running `setup.sh`
- CUDA 12.x recommended (CUDA 11.x may work but is untested) - CUDA 12.x recommended (CUDA 11.x may work but is untested)
- Minimum 8 GB VRAM for `single-gpu` profile with default models - Minimum 8 GB VRAM for `single-gpu` profile with default models
- For `dual-gpu`: GPU 0 is assigned to Ollama, GPU 1 to vLLM - For `dual-gpu`: GPU 0 is assigned to Ollama, GPU 1 to vLLM

View file

@ -4,17 +4,15 @@
Peregrine automates the full job search lifecycle: discovery, matching, cover letter generation, application tracking, and interview preparation. It is privacy-first and local-first — your data never leaves your machine unless you configure an external integration. Peregrine automates the full job search lifecycle: discovery, matching, cover letter generation, application tracking, and interview preparation. It is privacy-first and local-first — your data never leaves your machine unless you configure an external integration.
![Peregrine dashboard](screenshots/01-dashboard.png)
--- ---
## Quick Start ## Quick Start
```bash ```bash
# 1. Clone and install dependencies # 1. Clone and install dependencies
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine git clone https://git.circuitforge.io/circuitforge/peregrine
cd peregrine cd peregrine
bash install.sh bash setup.sh
# 2. Start Peregrine # 2. Start Peregrine
make start # no GPU, API-only make start # no GPU, API-only
@ -31,23 +29,20 @@ The first-run wizard guides you through hardware detection, tier selection, iden
## Feature Overview ## Feature Overview
| Feature | Free | Paid | Premium | | Feature | Free | Paid | Premium |
|---------|------|-------|---------| |---------|------|------|---------|
| Job discovery (JobSpy + custom boards) | Yes | Yes | Yes | | Job discovery (JobSpy + custom boards) | Yes | Yes | Yes |
| Resume keyword matching | Yes | Yes | Yes | | Resume keyword matching | Yes | Yes | Yes |
| Cover letter generation | BYOK‡ | Yes | Yes | | Cover letter generation | - | Yes | Yes |
| Company research briefs | BYOK‡ | Yes | Yes | | Company research briefs | - | Yes | Yes |
| Interview prep & practice Q&A | BYOK‡ | Yes | Yes | | Interview prep & practice Q&A | - | Yes | Yes |
| Email sync & auto-classification | - | Yes | Yes | | Email sync & auto-classification | - | Yes | Yes |
| Survey assistant (culture-fit Q&A) | BYOK‡ | Yes | Yes | | Survey assistant (culture-fit Q&A) | - | Yes | Yes |
| Integration connectors (Notion, Airtable, etc.) | Partial | Yes | Yes | | Integration connectors (Notion, Airtable, etc.) | Partial | Yes | Yes |
| Calendar sync (Google, Apple) | - | Yes | Yes | | Calendar sync (Google, Apple) | - | Yes | Yes |
| Cover letter model fine-tuning | - | - | Yes | | Cover letter model fine-tuning | - | - | Yes |
| Multi-user support | - | - | Yes | | Multi-user support | - | - | Yes |
**Paid** gives access to CircuitForge's hosted inference — no API key required.
**BYOK** — configure any LLM backend in `config/llm.yaml` (local Ollama/vLLM or an API key) and these features unlock at no charge, regardless of tier.
See [Tier System](reference/tier-system.md) for the full feature gate table. See [Tier System](reference/tier-system.md) for the full feature gate table.
--- ---
@ -63,8 +58,8 @@ See [Tier System](reference/tier-system.md) for the full feature gate table.
## License ## License
Core discovery pipeline: [MIT](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/src/branch/main/LICENSE-MIT) Core discovery pipeline: [MIT](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-MIT)
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/src/branch/main/LICENSE-BSL) AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-BSL)
© 2026 Circuit Forge LLC © 2026 Circuit Forge LLC

View file

@ -1 +0,0 @@
(function(){var s=document.createElement("script");s.defer=true;s.dataset.domain="docs.circuitforge.tech,circuitforge.tech";s.dataset.api="https://analytics.circuitforge.tech/api/event";s.src="https://analytics.circuitforge.tech/js/script.js";document.head.appendChild(s);})();

View file

@ -337,7 +337,7 @@ webhook_url: "https://discord.com/api/webhooks/..."
## .env ## .env
Docker port and path overrides. Created from `.env.example` by `install.sh`. Gitignored. Docker port and path overrides. Created from `.env.example` by `setup.sh`. Gitignored.
```bash ```bash
# Ports (change if defaults conflict with existing services) # Ports (change if defaults conflict with existing services)

View file

@ -1,157 +0,0 @@
# Forgejo Feedback API — Schema & Bug Bot Setup
## API Endpoints Used
| Operation | Method | Endpoint |
|-----------|--------|----------|
| List labels | GET | `/repos/{owner}/{repo}/labels` |
| Create label | POST | `/repos/{owner}/{repo}/labels` |
| Create issue | POST | `/repos/{owner}/{repo}/issues` |
| Upload attachment | POST | `/repos/{owner}/{repo}/issues/{index}/assets` |
| Post comment | POST | `/repos/{owner}/{repo}/issues/{index}/comments` |
Base URL: `https://git.opensourcesolarpunk.com/api/v1`
---
## Issue Creation Payload
```json
POST /repos/{owner}/{repo}/issues
{
"title": "string",
"body": "markdown string",
"labels": [1, 2, 3] // array of label IDs (not names)
}
```
Response (201):
```json
{
"number": 42,
"html_url": "https://git.opensourcesolarpunk.com/pyr0ball/peregrine/issues/42"
}
```
---
## Issue Body Structure
The `build_issue_body()` function produces this markdown layout:
```markdown
## 🐛 Bug | ✨ Feature Request | 💬 Other
<user description>
### Reproduction Steps ← bug type only, when repro provided
<repro steps>
### Context
- **page:** Home
- **version:** v0.2.5-61-ga6d787f ← from `git describe`; "dev" inside Docker
- **tier:** free | paid | premium
- **llm_backend:** ollama | vllm | claude_code | ...
- **os:** Linux-6.8.0-65-generic-x86_64-with-glibc2.39
- **timestamp:** 2026-03-06T15:58:29Z
<details>
<summary>App Logs (last 100 lines)</summary>
```
... log content (PII masked) ...
```
</details>
### Recent Listings ← only when include_diag = True
- [Title @ Company](url)
---
*Submitted by: Name <email>* ← only when attribution consent checked
```
---
## Screenshot Attachment
Screenshots are uploaded as issue assets, then embedded inline via a follow-up comment:
```markdown
### Screenshot
![screenshot](https://git.opensourcesolarpunk.com/attachments/<uuid>)
```
This keeps the issue body clean and puts the screenshot in a distinct comment.
---
## Labels
| Label | Color | Applied when |
|-------|-------|-------------|
| `beta-feedback` | `#0075ca` | Always |
| `needs-triage` | `#e4e669` | Always |
| `bug` | `#d73a4a` | Type = Bug |
| `feature-request` | `#a2eeef` | Type = Feature Request |
| `question` | `#d876e3` | Type = Other |
Labels are looked up by name on each submission; missing ones are auto-created via `_ensure_labels()`.
---
## Bug Bot Account Setup
The token currently bundled in `.env` is pyr0ball's personal token. For beta distribution,
create a dedicated bot account so the token has limited scope and can be rotated independently.
### Why a bot account?
- Token gets bundled in beta testers' `.env` — shouldn't be tied to the repo owner's account
- Bot can be limited to issue write only (cannot push code, see private repos, etc.)
- Token rotation doesn't affect the owner's other integrations
### Steps (requires Forgejo admin panel — API admin access not available on this token)
1. **Create bot account** at `https://git.opensourcesolarpunk.com/-/admin/users/new`
- Username: `peregrine-bot` (or `cf-bugbot`)
- Email: a real address you control (e.g. `bot+peregrine@circuitforge.tech`)
- Set a strong password (store in your password manager)
- Check "Prohibit login" if you want a pure API-only account
2. **Add as collaborator** on `pyr0ball/peregrine`:
- Settings → Collaborators → Add `peregrine-bot` with **Write** access
- Write access is required to create labels; issue creation alone would need only Read+Comment
3. **Generate API token** (log in as the bot, or use admin impersonation):
- User Settings → Applications → Generate New Token
- Name: `peregrine-feedback`
- Scopes: `issue` (write) — no repo code access needed
- Copy the token — it won't be shown again
4. **Update environment**:
```
FORGEJO_API_TOKEN=<new bot token>
FORGEJO_REPO=pyr0ball/peregrine
FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
```
Update both `.env` (dev machine) and any beta tester `.env` files.
5. **Verify** the bot can create issues:
```bash
curl -s -X POST https://git.opensourcesolarpunk.com/api/v1/repos/pyr0ball/peregrine/issues \
-H "Authorization: token <bot-token>" \
-H "Content-Type: application/json" \
-d '{"title":"[TEST] bot token check","body":"safe to close","labels":[]}'
```
Expected: HTTP 201 with `number` and `html_url` in response.
### Future: Heimdall token management
Once Heimdall is live, the bot token should be served by the license server rather than
bundled in `.env`. The app fetches it at startup using the user's license key → token is
never stored on disk and can be rotated server-side. Track as a future Heimdall feature.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 220 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 72 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 99 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

View file

@ -1,7 +1,5 @@
# Apply Workspace # Apply Workspace
![Peregrine apply workspace with cover letter generator and ATS optimizer](../screenshots/03-apply.png)
The Apply Workspace is where you generate cover letters, export application documents, and record that you have applied to a job. The Apply Workspace is where you generate cover letters, export application documents, and record that you have applied to a job.
--- ---

View file

@ -1,7 +1,5 @@
# Job Review # Job Review
![Peregrine job review triage](../screenshots/02-review.png)
The Job Review page is where you approve or reject newly discovered jobs before they enter the application pipeline. The Job Review page is where you approve or reject newly discovered jobs before they enter the application pipeline.
--- ---

View file

@ -15,11 +15,6 @@ cd "$SCRIPT_DIR"
PROFILE="${PROFILE:-remote}" PROFILE="${PROFILE:-remote}"
# ── Compose engine detection ──────────────────────────────────────────────────
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
# ── Usage ──────────────────────────────────────────────────────────────────── # ── Usage ────────────────────────────────────────────────────────────────────
usage() { usage() {
echo "" echo ""
@ -33,10 +28,9 @@ usage() {
echo -e " ${GREEN}start${NC} Start Peregrine (preflight → up)" echo -e " ${GREEN}start${NC} Start Peregrine (preflight → up)"
echo -e " ${GREEN}stop${NC} Stop all services" echo -e " ${GREEN}stop${NC} Stop all services"
echo -e " ${GREEN}restart${NC} Restart all services" echo -e " ${GREEN}restart${NC} Restart all services"
echo -e " ${GREEN}build [service]${NC} Rebuild image(s) without restarting (default: api web)"
echo -e " ${GREEN}status${NC} Show running containers" echo -e " ${GREEN}status${NC} Show running containers"
echo -e " ${GREEN}logs [service]${NC} Tail logs (default: api)" echo -e " ${GREEN}logs [service]${NC} Tail logs (default: app)"
echo -e " ${GREEN}update${NC} Pull latest images + rebuild" echo -e " ${GREEN}update${NC} Pull latest images + rebuild app"
echo -e " ${GREEN}preflight${NC} Check ports + resources; write .env" echo -e " ${GREEN}preflight${NC} Check ports + resources; write .env"
echo -e " ${GREEN}models${NC} Check ollama models in config; pull any missing" echo -e " ${GREEN}models${NC} Check ollama models in config; pull any missing"
echo -e " ${GREEN}test${NC} Run test suite" echo -e " ${GREEN}test${NC} Run test suite"
@ -47,12 +41,6 @@ usage() {
echo -e " ${GREEN}clean${NC} Remove containers, images, volumes (DESTRUCTIVE)" echo -e " ${GREEN}clean${NC} Remove containers, images, volumes (DESTRUCTIVE)"
echo -e " ${GREEN}open${NC} Open the web UI in your browser" echo -e " ${GREEN}open${NC} Open the web UI in your browser"
echo "" echo ""
echo -e " Cloud / demo commands:"
echo -e " ${GREEN}cloud-start${NC} Start the cloud stack (peregrine-cloud)"
echo -e " ${GREEN}cloud-restart${NC} Rebuild + restart the cloud stack"
echo -e " ${GREEN}demo-start${NC} Start the demo stack (peregrine-demo)"
echo -e " ${GREEN}demo-restart${NC} Rebuild + restart the demo stack"
echo ""
echo " Profiles (set via --profile or PROFILE env var):" echo " Profiles (set via --profile or PROFILE env var):"
echo " remote API-only, no local inference (default)" echo " remote API-only, no local inference (default)"
echo " cpu Local Ollama inference on CPU" echo " cpu Local Ollama inference on CPU"
@ -82,7 +70,7 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
SERVICE="${1:-api}" # used by `logs` command SERVICE="${1:-app}" # used by `logs` command
# ── Dependency guard ────────────────────────────────────────────────────────── # ── Dependency guard ──────────────────────────────────────────────────────────
# Commands that delegate to make; others (status, logs, update, open, setup) run fine without it. # Commands that delegate to make; others (status, logs, update, open, setup) run fine without it.
@ -96,7 +84,7 @@ case "$CMD" in
setup) setup)
info "Running dependency installer..." info "Running dependency installer..."
bash install.sh bash setup.sh
;; ;;
preflight) preflight)
@ -113,7 +101,7 @@ case "$CMD" in
start) start)
info "Starting Peregrine (PROFILE=${PROFILE})..." info "Starting Peregrine (PROFILE=${PROFILE})..."
make start PROFILE="$PROFILE" make start PROFILE="$PROFILE"
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)" PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
success "Peregrine is up → http://localhost:${PORT}" success "Peregrine is up → http://localhost:${PORT}"
;; ;;
@ -126,30 +114,33 @@ case "$CMD" in
restart) restart)
info "Restarting (PROFILE=${PROFILE})..." info "Restarting (PROFILE=${PROFILE})..."
make restart PROFILE="$PROFILE" make restart PROFILE="$PROFILE"
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)" PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
success "Peregrine restarted → http://localhost:${PORT}" success "Peregrine restarted → http://localhost:${PORT}"
;; ;;
status) status)
# Auto-detect compose engine same way Makefile does
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
$COMPOSE ps $COMPOSE ps
;; ;;
logs) logs)
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
info "Tailing logs for: ${SERVICE}" info "Tailing logs for: ${SERVICE}"
$COMPOSE logs -f "$SERVICE" $COMPOSE logs -f "$SERVICE"
;; ;;
build)
BUILD_SVC="$([[ "${SERVICE}" == "api" ]] && echo "api web" || echo "${SERVICE}")"
info "Building ${BUILD_SVC}..."
$COMPOSE build $BUILD_SVC
success "Build complete. Run './manage.sh restart' to apply."
;;
update) update)
info "Pulling latest images and rebuilding..." info "Pulling latest images and rebuilding app..."
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
$COMPOSE pull searxng ollama 2>/dev/null || true $COMPOSE pull searxng ollama 2>/dev/null || true
$COMPOSE build api web $COMPOSE build app web
success "Update complete. Run './manage.sh restart' to apply." success "Update complete. Run './manage.sh restart' to apply."
;; ;;
@ -176,7 +167,7 @@ case "$CMD" in
;; ;;
open) open)
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)" PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
URL="http://localhost:${PORT}" URL="http://localhost:${PORT}"
info "Opening ${URL}" info "Opening ${URL}"
if command -v xdg-open &>/dev/null; then if command -v xdg-open &>/dev/null; then
@ -206,32 +197,6 @@ case "$CMD" in
-v "${@:3}" -v "${@:3}"
;; ;;
cloud-start)
info "Starting cloud stack (peregrine-cloud)..."
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud up -d
success "Cloud stack up → http://localhost:8508"
;;
cloud-restart)
info "Rebuilding + restarting cloud stack (peregrine-cloud)..."
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud build api web
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud up -d
success "Cloud stack restarted → http://localhost:8508"
;;
demo-start)
info "Starting demo stack (peregrine-demo)..."
$COMPOSE -f compose.demo.yml --project-name peregrine-demo up -d
success "Demo stack up → http://localhost:8504"
;;
demo-restart)
info "Rebuilding + restarting demo stack (peregrine-demo)..."
$COMPOSE -f compose.demo.yml --project-name peregrine-demo build api web
$COMPOSE -f compose.demo.yml --project-name peregrine-demo up -d
success "Demo stack restarted → http://localhost:8504"
;;
help|--help|-h) help|--help|-h)
usage usage
;; ;;

View file

@ -1,97 +0,0 @@
-- Migration 001: Baseline schema
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
company TEXT,
url TEXT UNIQUE,
source TEXT,
location TEXT,
is_remote INTEGER DEFAULT 0,
salary TEXT,
description TEXT,
match_score REAL,
keyword_gaps TEXT,
date_found TEXT,
status TEXT DEFAULT 'pending',
notion_page_id TEXT,
cover_letter TEXT,
applied_at TEXT,
interview_date TEXT,
rejection_stage TEXT,
phone_screen_at TEXT,
interviewing_at TEXT,
offer_at TEXT,
hired_at TEXT,
survey_at TEXT,
calendar_event_id TEXT,
optimized_resume TEXT,
ats_gap_report TEXT
);
CREATE TABLE IF NOT EXISTS job_contacts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
direction TEXT,
subject TEXT,
from_addr TEXT,
to_addr TEXT,
body TEXT,
received_at TEXT,
is_response_needed INTEGER DEFAULT 0,
responded_at TEXT,
message_id TEXT,
stage_signal TEXT,
suggestion_dismissed INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS company_research (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER UNIQUE,
generated_at TEXT,
company_brief TEXT,
ceo_brief TEXT,
talking_points TEXT,
raw_output TEXT,
tech_brief TEXT,
funding_brief TEXT,
competitors_brief TEXT,
red_flags TEXT,
scrape_used INTEGER DEFAULT 0,
accessibility_brief TEXT
);
CREATE TABLE IF NOT EXISTS background_tasks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_type TEXT,
job_id INTEGER,
params TEXT,
status TEXT DEFAULT 'pending',
error TEXT,
created_at TEXT,
started_at TEXT,
finished_at TEXT,
stage TEXT,
updated_at TEXT
);
CREATE TABLE IF NOT EXISTS survey_responses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
survey_name TEXT,
received_at TEXT,
source TEXT,
raw_input TEXT,
image_path TEXT,
mode TEXT,
llm_output TEXT,
reported_score REAL,
created_at TEXT
);
CREATE TABLE IF NOT EXISTS digest_queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_contact_id INTEGER UNIQUE,
created_at TEXT
);

View file

@ -1,7 +0,0 @@
-- Add ATS resume optimizer columns introduced in v0.8.x.
-- Existing DBs that were created before the baseline included these columns
-- need this migration to add them. Safe to run on new DBs: IF NOT EXISTS guards
-- are not available for ADD COLUMN in SQLite, so we use a try/ignore pattern
-- at the application level (db_migrate.py wraps each migration in a transaction).
ALTER TABLE jobs ADD COLUMN optimized_resume TEXT;
ALTER TABLE jobs ADD COLUMN ats_gap_report TEXT;

View file

@ -1,3 +0,0 @@
-- Resume review draft and version archive columns (migration 003)
ALTER TABLE jobs ADD COLUMN resume_draft_json TEXT;
ALTER TABLE jobs ADD COLUMN resume_archive_json TEXT;

View file

@ -1,5 +0,0 @@
-- Migration 004: add resume_final_struct to jobs table
-- Stores the approved resume as a structured JSON dict alongside the plain text
-- (resume_optimized_text). Enables YAML export and future re-processing without
-- re-parsing the plain text.
ALTER TABLE jobs ADD COLUMN resume_final_struct TEXT;

View file

@ -1,17 +0,0 @@
-- 005_resumes_table.sql
-- Resume library: named saved resumes per user (optimizer output, imports, manual)
CREATE TABLE IF NOT EXISTS resumes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
source TEXT NOT NULL DEFAULT 'manual',
job_id INTEGER REFERENCES jobs(id),
text TEXT NOT NULL,
struct_json TEXT,
word_count INTEGER,
is_default INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
ALTER TABLE jobs ADD COLUMN resume_id INTEGER REFERENCES resumes(id);

View file

@ -1,6 +0,0 @@
-- 006_date_posted.sql
-- Add date_posted column for shadow listing detection (stale/shadow score feature).
-- New DBs already have this column from the CREATE TABLE statement in db.py;
-- this migration adds it to existing user DBs.
ALTER TABLE jobs ADD COLUMN date_posted TEXT;

View file

@ -1,22 +0,0 @@
-- Migration 006: Add columns and tables present in the live DB but missing from migrations
-- These were added via direct ALTER TABLE after the v0.8.5 baseline was written.
-- date_posted: used for ghost-post shadow-score detection
ALTER TABLE jobs ADD COLUMN date_posted TEXT;
-- hired_feedback: JSON blob saved when a job reaches the 'hired' outcome
ALTER TABLE jobs ADD COLUMN hired_feedback TEXT;
-- references_ table: contacts who can provide references for applications
CREATE TABLE IF NOT EXISTS references_ (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
relationship TEXT,
company TEXT,
email TEXT,
phone TEXT,
notes TEXT,
tags TEXT,
prep_email TEXT,
role TEXT
);

View file

@ -1,3 +0,0 @@
-- 007_resume_sync.sql
-- Add synced_at to resumes: ISO datetime of last library↔profile sync, null = never synced.
ALTER TABLE resumes ADD COLUMN synced_at TEXT;

View file

@ -1,97 +0,0 @@
-- messages: manual log entries and LLM drafts
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER REFERENCES jobs(id) ON DELETE SET NULL,
job_contact_id INTEGER REFERENCES job_contacts(id) ON DELETE SET NULL,
type TEXT NOT NULL DEFAULT 'email',
direction TEXT,
subject TEXT,
body TEXT,
from_addr TEXT,
to_addr TEXT,
logged_at TEXT NOT NULL DEFAULT (datetime('now')),
approved_at TEXT,
template_id INTEGER REFERENCES message_templates(id) ON DELETE SET NULL,
osprey_call_id TEXT
);
-- message_templates: built-in seeds and user-created templates
CREATE TABLE IF NOT EXISTS message_templates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key TEXT UNIQUE,
title TEXT NOT NULL,
category TEXT NOT NULL DEFAULT 'custom',
subject_template TEXT,
body_template TEXT NOT NULL,
is_builtin INTEGER NOT NULL DEFAULT 0,
is_community INTEGER NOT NULL DEFAULT 0,
community_source TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
INSERT OR IGNORE INTO message_templates
(key, title, category, subject_template, body_template, is_builtin)
VALUES
(
'follow_up',
'Following up on my application',
'follow_up',
'Following up — {{role}} application',
'Hi {{recruiter_name}},
I wanted to follow up on my application for the {{role}} position at {{company}}. I remain very interested in the opportunity and would welcome the chance to discuss my background further.
Please let me know if there is anything else you need from me.
Best regards,
{{name}}',
1
),
(
'thank_you',
'Thank you for the interview',
'thank_you',
'Thank you — {{role}} interview',
'Hi {{recruiter_name}},
Thank you for taking the time to speak with me about the {{role}} role at {{company}}. I enjoyed learning more about the team and the work you are doing.
I am very excited about this opportunity and look forward to hearing about the next steps.
Best regards,
{{name}}',
1
),
(
'accommodation_request',
'Accommodation request',
'accommodation',
'Accommodation request — {{role}} interview',
'Hi {{recruiter_name}},
I am writing to request a reasonable accommodation for my upcoming interview for the {{role}} position. Specifically, I would appreciate:
{{accommodation_details}}
Please let me know if you need any additional information. I am happy to discuss this further.
Thank you,
{{name}}',
1
),
(
'withdrawal',
'Withdrawing my application',
'withdrawal',
'Application withdrawal — {{role}}',
'Hi {{recruiter_name}},
I am writing to let you know that I would like to withdraw my application for the {{role}} position at {{company}}.
Thank you for your time and consideration. I wish you and the team all the best.
Best regards,
{{name}}',
1
)

View file

@ -1 +0,0 @@
ALTER TABLE jobs ADD COLUMN excluded_from_training INTEGER DEFAULT 0;

View file

@ -70,6 +70,3 @@ nav:
- Tier System: reference/tier-system.md - Tier System: reference/tier-system.md
- LLM Router: reference/llm-router.md - LLM Router: reference/llm-router.md
- Config Files: reference/config-files.md - Config Files: reference/config-files.md
extra_javascript:
- plausible.js

View file

@ -1,92 +0,0 @@
#!/usr/bin/env bash
# podman-standalone.sh — Peregrine rootful Podman setup (no Compose)
#
# For beta testers running system Podman (non-rootless) with systemd.
# Mirrors the manage.sh "remote" profile: app + SearXNG only.
# Ollama/vLLM/vision are expected as host services if needed.
#
# ── Prerequisites ────────────────────────────────────────────────────────────
# 1. Clone the repo:
# sudo git clone <repo-url> /opt/peregrine
#
# 2. Build the app image:
# cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest .
#
# 3. Create a config directory and copy the example configs:
# sudo mkdir -p /opt/peregrine/{config,data}
# sudo cp /opt/peregrine/config/*.example /opt/peregrine/config/
# # Edit /opt/peregrine/config/llm.yaml, notion.yaml, etc. as needed
#
# 4. Run this script:
# sudo bash /opt/peregrine/podman-standalone.sh
#
# ── After setup — generate systemd unit files ────────────────────────────────
# sudo podman generate systemd --new --name peregrine-searxng \
# | sudo tee /etc/systemd/system/peregrine-searxng.service
# sudo podman generate systemd --new --name peregrine \
# | sudo tee /etc/systemd/system/peregrine.service
# sudo systemctl daemon-reload
# sudo systemctl enable --now peregrine-searxng peregrine
#
# ── SearXNG ──────────────────────────────────────────────────────────────────
# Peregrine expects a SearXNG instance with JSON format enabled.
# If you already run one, skip the SearXNG container and set the URL in
# config/llm.yaml (searxng_url key). The default is http://localhost:8888.
#
# ── Ports ────────────────────────────────────────────────────────────────────
# Peregrine UI → http://localhost:8501
#
# ── To use a different Streamlit port ────────────────────────────────────────
# Uncomment the CMD override at the bottom of the peregrine run block and
# set PORT= to your desired port. The Dockerfile default is 8501.
#
set -euo pipefail
REPO_DIR=/opt/peregrine
DATA_DIR=/opt/peregrine/data
DOCS_DIR=/Library/Documents/JobSearch # ← adjust to your docs path
TZ=America/Los_Angeles
# ── Peregrine App ─────────────────────────────────────────────────────────────
# Image is built locally — no registry auto-update label.
# To update: sudo podman build -t localhost/peregrine:latest /opt/peregrine
# sudo podman restart peregrine
#
# Env vars: ANTHROPIC_API_KEY, OPENAI_COMPAT_URL, OPENAI_COMPAT_KEY are
# optional — only needed if you're using those backends in config/llm.yaml.
#
sudo podman run -d \
--name=peregrine \
--restart=unless-stopped \
--net=host \
-v ${REPO_DIR}/config:/app/config:Z \
-v ${DATA_DIR}:/app/data:Z \
-v ${DOCS_DIR}:/docs:z \
-e STAGING_DB=/app/data/staging.db \
-e DOCS_DIR=/docs \
-e PYTHONUNBUFFERED=1 \
-e PYTHONLOGGING=WARNING \
-e TZ=${TZ} \
--health-cmd="curl -f http://localhost:8501/_stcore/health || exit 1" \
--health-interval=30s \
--health-timeout=10s \
--health-start-period=60s \
--health-retries=3 \
localhost/peregrine:latest
# To override the default port (8501), uncomment and edit the line below,
# then remove the image name above and place it at the end of the CMD:
# streamlit run app/app.py --server.port=8501 --server.headless=true --server.fileWatcherType=none
echo ""
echo "Peregrine is starting up."
echo " App: http://localhost:8501"
echo ""
echo "Check container health with:"
echo " sudo podman ps"
echo " sudo podman logs peregrine"
echo ""
echo "To register as a systemd service:"
echo " sudo podman generate systemd --new --name peregrine \\"
echo " | sudo tee /etc/systemd/system/peregrine.service"
echo " sudo systemctl daemon-reload"
echo " sudo systemctl enable --now peregrine"

View file

@ -1,33 +0,0 @@
[tool.ruff]
# app/ is the deprecated Streamlit UI (replaced by Vue+FastAPI).
# No new work goes there; exclude from linting rather than accumulate suppressions.
exclude = ["app/"]
[tool.ruff.lint.per-file-ignores]
# dev-api.py / dev_api.py (symlink): E702 semicolons in compact Pydantic model
# definitions — intentional style for dense data models with many simple fields.
# E402: mid-file module-level imports are intentional in dev-api.py for test patchability.
"dev-api.py" = ["E702", "E402"]
"dev_api.py" = ["E702", "E402"]
# finetune_local.py: E402 ML libs (torch, datasets, trl) are imported after
# runtime CUDA / Unsloth availability checks — conditional import pattern.
"scripts/finetune_local.py" = ["E402", "E741"]
# scripts/: E402 mid-file imports used for lazy loading or post-env-setup imports.
"scripts/task_runner.py" = ["E402"]
"scripts/migrate.py" = ["E741"]
# scrapers/: third-party script; minimal changes policy.
"scrapers/companyScraper.py" = ["E722"]
# tools/: deprecated label tool copy (canonical in avocet); suppress style warnings.
"tools/label_tool.py" = ["E741"]
# tests/: F841 unused variables are the standard mock-patch capture pattern
# (e.g., `original_fn = obj.method` before monkeypatching).
# E741 ambiguous `l` names and E402 conditional imports are common in test fixtures.
# E702 compact `con.commit(); con.close()` is a common SQLite test helper idiom.
"tests/**" = ["F841", "E741", "E402", "E702"]
"tests/test_wizard_steps.py" = ["F841", "E741", "E402", "E702"]
"scripts/test_email_classify.py" = ["E402", "F841"]

View file

@ -3,12 +3,10 @@
# Keep in sync with environment.yml # Keep in sync with environment.yml
# ── CircuitForge shared core ─────────────────────────────────────────────── # ── CircuitForge shared core ───────────────────────────────────────────────
# Requires circuitforge-core >= 0.8.0 (config.load_env, db, tasks; resources moved to circuitforge-orch).
# Local dev / Docker (parent-context build): path install works because # Local dev / Docker (parent-context build): path install works because
# circuitforge-core/ is a sibling directory. # circuitforge-core/ is a sibling directory.
# CI / fresh checkouts: falls back to the Forgejo VCS URL below. # CI / fresh checkouts: falls back to the Forgejo VCS URL below.
# To use local editable install run: pip install -e ../circuitforge-core # To use local editable install run: pip install -e ../circuitforge-core
# TODO: pin to @v0.7.0 tag once cf-core cuts a release tag.
git+https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git@main git+https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git@main
# ── Web UI ──────────────────────────────────────────────────────────────── # ── Web UI ────────────────────────────────────────────────────────────────

View file

@ -1,89 +0,0 @@
"""
Peregrine cloud session thin wrapper around cf_core.cloud_session.
Sets request-scoped ContextVars with the authenticated user_id, tier, and
custom writing model so that _allocate_orch_async in llm.py can forward them
to cf-orch without any service function signature changes.
Usage add to main.py once:
from app.cloud_session import session_middleware_dep
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
From that point, any route (and every service/llm function it calls)
has access to the current user context via llm.get_request_*() helpers.
Writing model resolution order (first match wins):
1. USER_WRITING_MODELS env var JSON dict mapping Directus UUID model name
e.g. USER_WRITING_MODELS={"5b99ca9f-...": "meghan-letter-writer:latest"}
Use this for Monday; no Heimdall changes required.
2. session.meta["custom_writing_model"] returned by Heimdall resolve endpoint
once Heimdall is updated to expose user_preferences fields.
"""
from __future__ import annotations
import json
import logging
import os
from fastapi import Depends, Request, Response
from circuitforge_core.cloud_session import CloudSessionFactory, CloudUser, detect_byok
log = logging.getLogger(__name__)
__all__ = ["CloudUser", "get_session", "require_tier", "session_middleware_dep"]
# JSON dict mapping Directus user UUID → custom writing model name.
# Used until Heimdall's resolve endpoint exposes user_preferences.
def _load_user_writing_models() -> dict[str, str]:
raw = os.environ.get("USER_WRITING_MODELS", "").strip()
if not raw:
return {}
try:
return json.loads(raw)
except json.JSONDecodeError:
log.warning("USER_WRITING_MODELS is not valid JSON — ignoring")
return {}
_USER_WRITING_MODELS: dict[str, str] = _load_user_writing_models()
_factory = CloudSessionFactory(
product="peregrine",
byok_detector=detect_byok,
)
get_session = _factory.dependency()
require_tier = _factory.require_tier
def session_middleware_dep(request: Request, response: Response) -> None:
"""Global FastAPI dependency — resolves the session and sets request-scoped
ContextVars so llm._allocate_orch_async can forward them to cf-orch.
Sets:
- user_id: real cloud UUID, or None for local/anon sessions
- tier: the resolved tier string (free/paid/premium/ultra/local)
- writing_model: custom fine-tuned model from Heimdall meta, or None
Add as a global dependency in main.py:
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
"""
from app.llm import set_request_tier, set_request_user_id, set_request_writing_model
session = _factory.resolve(request, response)
user_id = session.user_id
# Only forward real cloud UUIDs — local/dev/anon sessions use the shared catalog
if user_id in (None, "local", "local-dev") or (user_id or "").startswith("anon-"):
user_id = None
set_request_user_id(user_id)
set_request_tier(session.tier)
# Resolution order: env-var map (Monday path) → Heimdall meta (future path)
writing_model = (
_USER_WRITING_MODELS.get(session.user_id)
or session.meta.get("custom_writing_model")
)
set_request_writing_model(writing_model)

View file

@ -1,843 +0,0 @@
"""LiteLLM wrapper for multi-provider AI support."""
import json
import logging
import os
import re
from contextlib import asynccontextmanager
from contextvars import ContextVar
from dataclasses import dataclass
from typing import Any
import httpx
import litellm
from pydantic import BaseModel
from app.config import settings
# LLM timeout configuration (seconds) - base values
LLM_TIMEOUT_HEALTH_CHECK = 30
LLM_TIMEOUT_COMPLETION = 120
LLM_TIMEOUT_JSON = 180 # JSON completions may take longer
# LLM-004: OpenRouter JSON-capable models (explicit allowlist)
OPENROUTER_JSON_CAPABLE_MODELS = {
# Anthropic models
"anthropic/claude-3-opus",
"anthropic/claude-3-sonnet",
"anthropic/claude-3-haiku",
"anthropic/claude-3.5-sonnet",
"anthropic/claude-3.5-haiku",
"anthropic/claude-haiku-4-5-20251001",
"anthropic/claude-sonnet-4-20250514",
"anthropic/claude-opus-4-20250514",
# OpenAI models
"openai/gpt-4-turbo",
"openai/gpt-4",
"openai/gpt-4o",
"openai/gpt-4o-mini",
"openai/gpt-3.5-turbo",
"openai/gpt-5-nano-2025-08-07",
# Google models
"google/gemini-pro",
"google/gemini-1.5-pro",
"google/gemini-1.5-flash",
"google/gemini-2.0-flash",
"google/gemini-3-flash-preview",
# DeepSeek models
"deepseek/deepseek-chat",
"deepseek/deepseek-reasoner",
# Mistral models
"mistralai/mistral-large",
"mistralai/mistral-medium",
}
# JSON-010: JSON extraction safety limits
MAX_JSON_EXTRACTION_RECURSION = 10
MAX_JSON_CONTENT_SIZE = 1024 * 1024 # 1MB
# Request-scoped user_id — set once by session_middleware_dep, read inside _allocate_orch_async.
# ContextVar is safe for concurrent async requests: each request task gets its own copy.
_request_user_id: ContextVar[str | None] = ContextVar("request_user_id", default=None)
_request_tier: ContextVar[str | None] = ContextVar("request_tier", default=None)
# Custom writing model for premium/ultra users — populated from Heimdall license key meta.
# Set to None for all other tiers; complete() falls back to the shared base model.
_request_writing_model: ContextVar[str | None] = ContextVar("request_writing_model", default=None)
_PREMIUM_TIERS: frozenset[str] = frozenset({"premium", "ultra"})
def set_request_user_id(user_id: str | None) -> None:
_request_user_id.set(user_id)
def get_request_user_id() -> str | None:
return _request_user_id.get()
def set_request_tier(tier: str | None) -> None:
_request_tier.set(tier)
def get_request_tier() -> str | None:
return _request_tier.get()
def set_request_writing_model(model: str | None) -> None:
_request_writing_model.set(model)
def get_request_writing_model() -> str | None:
return _request_writing_model.get()
class LLMConfig(BaseModel):
"""LLM configuration model."""
provider: str
model: str
api_key: str
api_base: str | None = None
@dataclass
class _OrchAllocation:
allocation_id: str
url: str
service: str
@asynccontextmanager
async def _allocate_orch_async(
coordinator_url: str,
service: str,
model_candidates: list[str],
ttl_s: float,
caller: str,
):
"""Async context manager that allocates a cf-orch service and releases on exit."""
async with httpx.AsyncClient(timeout=120.0) as client:
payload: dict[str, Any] = {
"model_candidates": model_candidates,
"ttl_s": ttl_s,
"caller": caller,
}
uid = get_request_user_id()
if uid:
payload["user_id"] = uid
resp = await client.post(
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocate",
json=payload,
)
if not resp.is_success:
raise RuntimeError(
f"cf-orch allocation failed for {service!r}: "
f"HTTP {resp.status_code}{resp.text[:200]}"
)
data = resp.json()
alloc = _OrchAllocation(
allocation_id=data["allocation_id"],
url=data["url"],
service=service,
)
try:
yield alloc
finally:
try:
await client.delete(
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocations/{alloc.allocation_id}",
timeout=10.0,
)
except Exception as exc:
logging.debug("cf-orch release failed (non-fatal): %s", exc)
def _normalize_api_base(provider: str, api_base: str | None) -> str | None:
"""Normalize api_base for LiteLLM provider-specific expectations.
When using proxies/aggregators, users often paste a base URL that already
includes a version segment (e.g., `/v1`). Some LiteLLM provider handlers
append those segments internally, which can lead to duplicated paths like
`/v1/v1/...` and cause 404s.
"""
if not api_base:
return None
base = api_base.strip()
if not base:
return None
base = base.rstrip("/")
# Anthropic handler appends '/v1/messages'. If base already ends with '/v1',
# strip it to avoid '/v1/v1/messages'.
if provider == "anthropic" and base.endswith("/v1"):
base = base[: -len("/v1")].rstrip("/")
# Gemini handler appends '/v1/models/...'. If base already ends with '/v1',
# strip it to avoid '/v1/v1/models/...'.
if provider == "gemini" and base.endswith("/v1"):
base = base[: -len("/v1")].rstrip("/")
return base or None
def _extract_text_parts(value: Any, depth: int = 0, max_depth: int = 10) -> list[str]:
"""Recursively extract text segments from nested response structures.
Handles strings, lists, dicts with 'text'/'content'/'value' keys, and objects
with text/content attributes. Limits recursion depth to avoid cycles.
Args:
value: Input value that may contain text in strings, lists, dicts, or objects.
depth: Current recursion depth.
max_depth: Maximum recursion depth before returning no content.
Returns:
A list of extracted text segments.
"""
if depth >= max_depth:
return []
if value is None:
return []
if isinstance(value, str):
return [value]
if isinstance(value, list):
parts: list[str] = []
next_depth = depth + 1
for item in value:
parts.extend(_extract_text_parts(item, next_depth, max_depth))
return parts
if isinstance(value, dict):
next_depth = depth + 1
if "text" in value:
return _extract_text_parts(value.get("text"), next_depth, max_depth)
if "content" in value:
return _extract_text_parts(value.get("content"), next_depth, max_depth)
if "value" in value:
return _extract_text_parts(value.get("value"), next_depth, max_depth)
return []
next_depth = depth + 1
if hasattr(value, "text"):
return _extract_text_parts(getattr(value, "text"), next_depth, max_depth)
if hasattr(value, "content"):
return _extract_text_parts(getattr(value, "content"), next_depth, max_depth)
return []
def _join_text_parts(parts: list[str]) -> str | None:
"""Join text parts with newlines, filtering empty strings.
Args:
parts: Candidate text segments.
Returns:
Joined string or None if the result is empty.
"""
joined = "\n".join(part for part in parts if part).strip()
return joined or None
def _extract_message_text(message: Any) -> str | None:
"""Extract plain text from a LiteLLM message object across providers."""
content: Any = None
if hasattr(message, "content"):
content = message.content
elif isinstance(message, dict):
content = message.get("content")
return _join_text_parts(_extract_text_parts(content))
def _extract_choice_text(choice: Any) -> str | None:
"""Extract plain text from a LiteLLM choice object.
Tries message.content first, then choice.text, then choice.delta. Handles both
object attributes and dict keys.
Args:
choice: LiteLLM choice object or dict.
Returns:
Extracted text or None if no content is found.
"""
message: Any = None
if hasattr(choice, "message"):
message = choice.message
elif isinstance(choice, dict):
message = choice.get("message")
content = _extract_message_text(message)
if content:
return content
if hasattr(choice, "text"):
content = _join_text_parts(_extract_text_parts(getattr(choice, "text")))
if content:
return content
if isinstance(choice, dict) and "text" in choice:
content = _join_text_parts(_extract_text_parts(choice.get("text")))
if content:
return content
if hasattr(choice, "delta"):
content = _join_text_parts(_extract_text_parts(getattr(choice, "delta")))
if content:
return content
if isinstance(choice, dict) and "delta" in choice:
content = _join_text_parts(_extract_text_parts(choice.get("delta")))
if content:
return content
return None
def _to_code_block(content: str | None, language: str = "text") -> str:
"""Wrap content in a markdown code block for client display."""
text = (content or "").strip()
if not text:
text = "<empty>"
return f"```{language}\n{text}\n```"
def _load_stored_config() -> dict:
"""Load config from config.json file."""
config_path = settings.config_path
if config_path.exists():
try:
return json.loads(config_path.read_text())
except (json.JSONDecodeError, OSError):
return {}
return {}
def get_llm_config() -> LLMConfig:
"""Get current LLM configuration.
Priority: config.json file > environment variables/settings
"""
stored = _load_stored_config()
return LLMConfig(
provider=stored.get("provider", settings.llm_provider),
model=stored.get("model", settings.llm_model),
api_key=stored.get("api_key", settings.llm_api_key),
api_base=stored.get("api_base", settings.llm_api_base),
)
def get_model_name(config: LLMConfig) -> str:
"""Convert provider/model to LiteLLM format.
For most providers, adds the provider prefix if not already present.
For OpenRouter, always adds 'openrouter/' prefix since OpenRouter models
use nested prefixes like 'openrouter/anthropic/claude-3.5-sonnet'.
"""
provider_prefixes = {
"openai": "", # OpenAI models don't need prefix
"anthropic": "anthropic/",
"openrouter": "openrouter/",
"gemini": "gemini/",
"deepseek": "deepseek/",
"ollama": "ollama/",
}
prefix = provider_prefixes.get(config.provider, "")
# OpenRouter is special: always add openrouter/ prefix unless already present
# OpenRouter models use nested format: openrouter/anthropic/claude-3.5-sonnet
if config.provider == "openrouter":
if config.model.startswith("openrouter/"):
return config.model
return f"openrouter/{config.model}"
# For other providers, don't add prefix if model already has a known prefix
known_prefixes = ["openrouter/", "anthropic/", "gemini/", "deepseek/", "ollama/"]
if any(config.model.startswith(p) for p in known_prefixes):
return config.model
# Add provider prefix for models that need it
return f"{prefix}{config.model}" if prefix else config.model
def _supports_temperature(provider: str, model: str) -> bool:
"""Return whether passing `temperature` is supported for this model/provider combo.
Some models (e.g., OpenAI gpt-5 family) reject temperature values other than 1,
and LiteLLM may error when temperature is passed.
"""
_ = provider
model_lower = model.lower()
if "gpt-5" in model_lower:
return False
return True
def _get_reasoning_effort(provider: str, model: str) -> str | None:
"""Return a default reasoning_effort for models that require it.
Some OpenAI gpt-5 models may return empty message.content unless a supported
`reasoning_effort` is explicitly set. This keeps downstream JSON parsing reliable.
"""
_ = provider
model_lower = model.lower()
if "gpt-5" in model_lower:
return "minimal"
return None
async def check_llm_health(
config: LLMConfig | None = None,
*,
include_details: bool = False,
test_prompt: str | None = None,
) -> dict[str, Any]:
"""Check if the LLM provider is accessible and working."""
if config is None:
config = get_llm_config()
# Check if API key is configured (except for Ollama)
if config.provider != "ollama" and not config.api_key:
return {
"healthy": False,
"provider": config.provider,
"model": config.model,
"error_code": "api_key_missing",
}
model_name = get_model_name(config)
prompt = test_prompt or "Hi"
try:
# Make a minimal test call with timeout
# Pass API key directly to avoid race conditions with global os.environ
kwargs: dict[str, Any] = {
"model": model_name,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 16,
"api_key": config.api_key,
"api_base": _normalize_api_base(config.provider, config.api_base),
"timeout": LLM_TIMEOUT_HEALTH_CHECK,
}
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
response = await litellm.acompletion(**kwargs)
content = _extract_choice_text(response.choices[0])
if not content:
# LLM-003: Empty response should mark health check as unhealthy
logging.warning(
"LLM health check returned empty content",
extra={"provider": config.provider, "model": config.model},
)
result: dict[str, Any] = {
"healthy": False, # Fixed: empty content means unhealthy
"provider": config.provider,
"model": config.model,
"response_model": response.model if response else None,
"error_code": "empty_content", # Changed from warning_code
"message": "LLM returned empty response",
}
if include_details:
result["test_prompt"] = _to_code_block(prompt)
result["model_output"] = _to_code_block(None)
return result
result = {
"healthy": True,
"provider": config.provider,
"model": config.model,
"response_model": response.model if response else None,
}
if include_details:
result["test_prompt"] = _to_code_block(prompt)
result["model_output"] = _to_code_block(content)
return result
except Exception as e:
# Log full exception details server-side, but do not expose them to clients
logging.exception(
"LLM health check failed",
extra={"provider": config.provider, "model": config.model},
)
# Provide a minimal, actionable client-facing hint without leaking secrets.
error_code = "health_check_failed"
message = str(e)
if "404" in message and "/v1/v1/" in message:
error_code = "duplicate_v1_path"
elif "404" in message:
error_code = "not_found_404"
elif "<!doctype html" in message.lower() or "<html" in message.lower():
error_code = "html_response"
result = {
"healthy": False,
"provider": config.provider,
"model": config.model,
"error_code": error_code,
}
if include_details:
result["test_prompt"] = _to_code_block(prompt)
result["model_output"] = _to_code_block(None)
result["error_detail"] = _to_code_block(message)
return result
async def complete(
prompt: str,
system_prompt: str | None = None,
config: LLMConfig | None = None,
max_tokens: int = 4096,
temperature: float = 0.7,
) -> str:
"""Make a completion request to the LLM."""
if config is None:
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
if cf_orch_url:
try:
# Premium/ultra users get their personal fine-tuned writing model as the
# first candidate; the base model is the fallback so cf-orch can
# degrade gracefully if the personal model isn't loaded yet.
tier = get_request_tier()
writing_model = get_request_writing_model()
model_candidates: list[str] = (
[writing_model, "Qwen2.5-3B-Instruct"]
if writing_model and tier in _PREMIUM_TIERS
else ["Qwen2.5-3B-Instruct"]
)
async with _allocate_orch_async(
cf_orch_url,
"vllm",
model_candidates=model_candidates,
ttl_s=300.0,
caller="peregrine-resume-matcher",
) as alloc:
orch_config = LLMConfig(
provider="openai",
model="__auto__",
api_key="any",
api_base=alloc.url.rstrip("/") + "/v1",
)
return await complete(prompt, system_prompt, orch_config, max_tokens, temperature)
except Exception as exc:
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
config = get_llm_config()
model_name = get_model_name(config)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
# Pass API key directly to avoid race conditions with global os.environ
kwargs: dict[str, Any] = {
"model": model_name,
"messages": messages,
"max_tokens": max_tokens,
"api_key": config.api_key,
"api_base": _normalize_api_base(config.provider, config.api_base),
"timeout": LLM_TIMEOUT_COMPLETION,
}
if _supports_temperature(config.provider, model_name):
kwargs["temperature"] = temperature
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
response = await litellm.acompletion(**kwargs)
content = _extract_choice_text(response.choices[0])
if not content:
raise ValueError("Empty response from LLM")
return content
except Exception as e:
# Log the actual error server-side for debugging
logging.error(f"LLM completion failed: {e}", extra={"model": model_name})
raise ValueError(
"LLM completion failed. Please check your API configuration and try again."
) from e
def _supports_json_mode(provider: str, model: str) -> bool:
"""Check if the model supports JSON mode."""
# Models that support response_format={"type": "json_object"}
json_mode_providers = ["openai", "anthropic", "gemini", "deepseek"]
if provider in json_mode_providers:
return True
# LLM-004: OpenRouter models - use explicit allowlist instead of substring matching
if provider == "openrouter":
return model in OPENROUTER_JSON_CAPABLE_MODELS
return False
def _appears_truncated(data: dict) -> bool:
"""LLM-001: Check if JSON data appears to be truncated.
Detects suspicious patterns indicating incomplete responses.
"""
if not isinstance(data, dict):
return False
# Check for empty arrays that should typically have content
suspicious_empty_arrays = ["workExperience", "education", "skills"]
for key in suspicious_empty_arrays:
if key in data and data[key] == []:
# Log warning - these are rarely empty in real resumes
logging.warning(
"Possible truncation detected: '%s' is empty",
key,
)
return True
# Check for missing critical sections
required_top_level = ["personalInfo"]
for key in required_top_level:
if key not in data:
logging.warning(
"Possible truncation detected: missing required section '%s'",
key,
)
return True
return False
def _get_retry_temperature(attempt: int, base_temp: float = 0.1) -> float:
"""LLM-002: Get temperature for retry attempt - increases with each retry.
Higher temperature on retries gives the model more variation to produce
different (hopefully valid) output.
"""
temperatures = [base_temp, 0.3, 0.5, 0.7]
return temperatures[min(attempt, len(temperatures) - 1)]
def _calculate_timeout(
operation: str,
max_tokens: int = 4096,
provider: str = "openai",
) -> int:
"""LLM-005: Calculate adaptive timeout based on operation and parameters."""
base_timeouts = {
"health_check": LLM_TIMEOUT_HEALTH_CHECK,
"completion": LLM_TIMEOUT_COMPLETION,
"json": LLM_TIMEOUT_JSON,
}
base = base_timeouts.get(operation, LLM_TIMEOUT_COMPLETION)
# Scale by token count (relative to 4096 baseline)
token_factor = max(1.0, max_tokens / 4096)
# Provider-specific latency adjustments
provider_factors = {
"openai": 1.0,
"anthropic": 1.2,
"openrouter": 1.5, # More variable latency
"ollama": 2.0, # Local models can be slower
}
provider_factor = provider_factors.get(provider, 1.0)
return int(base * token_factor * provider_factor)
def _extract_json(content: str, _depth: int = 0) -> str:
"""Extract JSON from LLM response, handling various formats.
LLM-001: Improved to detect and reject likely truncated JSON.
LLM-007: Improved error messages for debugging.
JSON-010: Added recursion depth and size limits.
"""
# JSON-010: Safety limits
if _depth > MAX_JSON_EXTRACTION_RECURSION:
raise ValueError(f"JSON extraction exceeded max recursion depth: {_depth}")
if len(content) > MAX_JSON_CONTENT_SIZE:
raise ValueError(f"Content too large for JSON extraction: {len(content)} bytes")
original = content
# Remove markdown code blocks
if "```json" in content:
content = content.split("```json")[1].split("```")[0]
elif "```" in content:
parts = content.split("```")
if len(parts) >= 2:
content = parts[1]
# Remove language identifier if present (e.g., "json\n{...")
if content.startswith(("json", "JSON")):
content = content[4:]
content = content.strip()
# If content starts with {, find the matching }
if content.startswith("{"):
depth = 0
end_idx = -1
in_string = False
escape_next = False
for i, char in enumerate(content):
if escape_next:
escape_next = False
continue
if char == "\\":
escape_next = True
continue
if char == '"' and not escape_next:
in_string = not in_string
continue
if in_string:
continue
if char == "{":
depth += 1
elif char == "}":
depth -= 1
if depth == 0:
end_idx = i
break
# LLM-001: Check for unbalanced braces - loop ended without depth reaching 0
if end_idx == -1 and depth != 0:
logging.warning(
"JSON extraction found unbalanced braces (depth=%d), possible truncation",
depth,
)
if end_idx != -1:
return content[: end_idx + 1]
# Try to find JSON object in the content (only if not already at start)
start_idx = content.find("{")
if start_idx > 0:
# Only recurse if { is found after position 0 to avoid infinite recursion
return _extract_json(content[start_idx:], _depth + 1)
# LLM-007: Log unrecognized format for debugging
logging.error(
"Could not extract JSON from response format. Content preview: %s",
content[:200] if content else "<empty>",
)
raise ValueError(f"No JSON found in response: {original[:200]}")
async def complete_json(
prompt: str,
system_prompt: str | None = None,
config: LLMConfig | None = None,
max_tokens: int = 4096,
retries: int = 2,
) -> dict[str, Any]:
"""Make a completion request expecting JSON response.
Uses JSON mode when available, with retry logic for reliability.
"""
if config is None:
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
if cf_orch_url:
try:
async with _allocate_orch_async(
cf_orch_url,
"vllm",
model_candidates=["Qwen2.5-3B-Instruct"],
ttl_s=300.0,
caller="peregrine-resume-matcher",
) as alloc:
orch_config = LLMConfig(
provider="openai",
model="__auto__",
api_key="any",
api_base=alloc.url.rstrip("/") + "/v1",
)
return await complete_json(prompt, system_prompt, orch_config, max_tokens, retries)
except Exception as exc:
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
config = get_llm_config()
model_name = get_model_name(config)
# Build messages
json_system = (
system_prompt or ""
) + "\n\nYou must respond with valid JSON only. No explanations, no markdown."
messages = [
{"role": "system", "content": json_system},
{"role": "user", "content": prompt},
]
# Check if we can use JSON mode
use_json_mode = _supports_json_mode(config.provider, config.model)
last_error = None
for attempt in range(retries + 1):
try:
# Build request kwargs
# Pass API key directly to avoid race conditions with global os.environ
kwargs: dict[str, Any] = {
"model": model_name,
"messages": messages,
"max_tokens": max_tokens,
"api_key": config.api_key,
"api_base": _normalize_api_base(config.provider, config.api_base),
"timeout": _calculate_timeout("json", max_tokens, config.provider),
}
if _supports_temperature(config.provider, model_name):
# LLM-002: Increase temperature on retry for variation
kwargs["temperature"] = _get_retry_temperature(attempt)
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
if reasoning_effort:
kwargs["reasoning_effort"] = reasoning_effort
# Add JSON mode if supported
if use_json_mode:
kwargs["response_format"] = {"type": "json_object"}
response = await litellm.acompletion(**kwargs)
content = _extract_choice_text(response.choices[0])
if not content:
raise ValueError("Empty response from LLM")
logging.debug(f"LLM response (attempt {attempt + 1}): {content[:300]}")
# Extract and parse JSON
json_str = _extract_json(content)
result = json.loads(json_str)
# LLM-001: Check if parsed result appears truncated
if isinstance(result, dict) and _appears_truncated(result):
logging.warning(
"Parsed JSON appears truncated, but proceeding with result"
)
return result
except json.JSONDecodeError as e:
last_error = e
logging.warning(f"JSON parse failed (attempt {attempt + 1}): {e}")
if attempt < retries:
# Add hint to prompt for retry
messages[-1]["content"] = (
prompt
+ "\n\nIMPORTANT: Output ONLY a valid JSON object. Start with { and end with }."
)
continue
raise ValueError(f"Failed to parse JSON after {retries + 1} attempts: {e}")
except Exception as e:
last_error = e
logging.warning(f"LLM call failed (attempt {attempt + 1}): {e}")
if attempt < retries:
continue
raise
raise ValueError(f"Failed after {retries + 1} attempts: {last_error}")

View file

@ -1,88 +0,0 @@
"""FastAPI application entry point."""
import asyncio
import logging
import sys
from contextlib import asynccontextmanager
from fastapi import Depends, FastAPI
# Fix for Windows: Use ProactorEventLoop for subprocess support (Playwright)
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
logger = logging.getLogger(__name__)
from fastapi.middleware.cors import CORSMiddleware
from app import __version__
from app.cloud_session import session_middleware_dep
from app.config import settings
from app.database import db
from app.pdf import close_pdf_renderer, init_pdf_renderer
from app.routers import config_router, enrichment_router, health_router, jobs_router, resumes_router
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager."""
# Startup
settings.data_dir.mkdir(parents=True, exist_ok=True)
# PDF renderer uses lazy initialization - will initialize on first use
# await init_pdf_renderer()
yield
# Shutdown - wrap each cleanup in try-except to ensure all resources are released
try:
await close_pdf_renderer()
except Exception as e:
logger.error(f"Error closing PDF renderer: {e}")
try:
db.close()
except Exception as e:
logger.error(f"Error closing database: {e}")
app = FastAPI(
title="Resume Matcher API",
description="AI-powered resume tailoring for job descriptions",
version=__version__,
lifespan=lifespan,
dependencies=[Depends(session_middleware_dep)],
)
# CORS middleware - origins configurable via CORS_ORIGINS env var
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Include routers
app.include_router(health_router, prefix="/api/v1")
app.include_router(config_router, prefix="/api/v1")
app.include_router(resumes_router, prefix="/api/v1")
app.include_router(jobs_router, prefix="/api/v1")
app.include_router(enrichment_router, prefix="/api/v1")
@app.get("/")
async def root():
"""Root endpoint."""
return {
"name": "Resume Matcher API",
"version": __version__,
"docs": "/docs",
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host=settings.host,
port=settings.port,
reload=True,
)

View file

@ -14,6 +14,7 @@ Enhanced features:
import argparse import argparse
import csv import csv
import json
import os import os
import random import random
import re import re

View file

@ -31,6 +31,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.classifier_adapters import ( from scripts.classifier_adapters import (
LABELS, LABELS,
LABEL_DESCRIPTIONS,
ClassifierAdapter, ClassifierAdapter,
GLiClassAdapter, GLiClassAdapter,
RerankerAdapter, RerankerAdapter,

View file

@ -5,6 +5,7 @@ push updates the existing event rather than creating a duplicate.
""" """
from __future__ import annotations from __future__ import annotations
import uuid
import yaml import yaml
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path

View file

@ -277,8 +277,7 @@ def _load_resume_and_keywords() -> tuple[dict, list[str]]:
return resume, keywords return resume, keywords
def research_company(job: dict, use_scraper: bool = True, on_stage=None, def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict:
config_path: "Path | None" = None) -> dict:
""" """
Generate a pre-interview research brief for a job. Generate a pre-interview research brief for a job.
@ -296,7 +295,7 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None,
""" """
from scripts.llm_router import LLMRouter from scripts.llm_router import LLMRouter
router = LLMRouter(config_path=config_path) if config_path else LLMRouter() router = LLMRouter()
research_order = router.config.get("research_fallback_order") or router.config["fallback_order"] research_order = router.config.get("research_fallback_order") or router.config["fallback_order"]
company = job.get("company") or "the company" company = job.get("company") or "the company"
title = job.get("title") or "this role" title = job.get("title") or "this role"

View file

@ -130,32 +130,6 @@ CREATE TABLE IF NOT EXISTS digest_queue (
) )
""" """
CREATE_REFERENCES = """
CREATE TABLE IF NOT EXISTS references_ (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
relationship TEXT,
company TEXT,
email TEXT,
phone TEXT,
notes TEXT,
tags TEXT DEFAULT '[]',
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now'))
);
"""
CREATE_JOB_REFERENCES = """
CREATE TABLE IF NOT EXISTS job_references (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
reference_id INTEGER NOT NULL REFERENCES references_(id) ON DELETE CASCADE,
prep_email TEXT,
rec_letter TEXT,
UNIQUE(job_id, reference_id)
);
"""
_MIGRATIONS = [ _MIGRATIONS = [
("cover_letter", "TEXT"), ("cover_letter", "TEXT"),
("applied_at", "TEXT"), ("applied_at", "TEXT"),
@ -169,9 +143,6 @@ _MIGRATIONS = [
("calendar_event_id", "TEXT"), ("calendar_event_id", "TEXT"),
("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier) ("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier)
("ats_gap_report", "TEXT"), # JSON gap report (free tier) ("ats_gap_report", "TEXT"), # JSON gap report (free tier)
("date_posted", "TEXT"), # Original posting date from job board (shadow listing detection)
("hired_feedback", "TEXT"), # JSON: optional post-hire "what helped" response
("excluded_from_training", "INTEGER DEFAULT 0"), # opt-out of training export
] ]
@ -205,9 +176,6 @@ def _migrate_db(db_path: Path) -> None:
conn.execute("ALTER TABLE background_tasks ADD COLUMN params TEXT") conn.execute("ALTER TABLE background_tasks ADD COLUMN params TEXT")
except sqlite3.OperationalError: except sqlite3.OperationalError:
pass # column already exists pass # column already exists
# Ensure references tables exist (CREATE IF NOT EXISTS is idempotent)
conn.execute(CREATE_REFERENCES)
conn.execute(CREATE_JOB_REFERENCES)
conn.commit() conn.commit()
conn.close() conn.close()
@ -221,8 +189,6 @@ def init_db(db_path: Path = DEFAULT_DB) -> None:
conn.execute(CREATE_BACKGROUND_TASKS) conn.execute(CREATE_BACKGROUND_TASKS)
conn.execute(CREATE_SURVEY_RESPONSES) conn.execute(CREATE_SURVEY_RESPONSES)
conn.execute(CREATE_DIGEST_QUEUE) conn.execute(CREATE_DIGEST_QUEUE)
conn.execute(CREATE_REFERENCES)
conn.execute(CREATE_JOB_REFERENCES)
conn.commit() conn.commit()
conn.close() conn.close()
_migrate_db(db_path) _migrate_db(db_path)
@ -234,11 +200,10 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
return None return None
conn = sqlite3.connect(db_path) conn = sqlite3.connect(db_path)
try: try:
status = job.get("status", "pending")
cursor = conn.execute( cursor = conn.execute(
"""INSERT INTO jobs """INSERT INTO jobs
(title, company, url, source, location, is_remote, salary, description, date_found, date_posted, status) (title, company, url, source, location, is_remote, salary, description, date_found)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
( (
job.get("title", ""), job.get("title", ""),
job.get("company", ""), job.get("company", ""),
@ -249,8 +214,6 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
job.get("salary", ""), job.get("salary", ""),
job.get("description", ""), job.get("description", ""),
job.get("date_found", ""), job.get("date_found", ""),
job.get("date_posted", "") or "",
status,
), ),
) )
conn.commit() conn.commit()
@ -382,96 +345,6 @@ def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict
} }
def save_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None,
draft_json: str = "") -> None:
"""Persist a structured resume review draft (awaiting user approval)."""
if job_id is None:
return
conn = sqlite3.connect(db_path)
conn.execute(
"UPDATE jobs SET resume_draft_json = ? WHERE id = ?",
(draft_json or None, job_id),
)
conn.commit()
conn.close()
def get_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict | None:
"""Return the pending review draft, or None if no draft is waiting."""
if job_id is None:
return None
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT resume_draft_json FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.close()
if not row or not row["resume_draft_json"]:
return None
import json
try:
return json.loads(row["resume_draft_json"])
except Exception:
return None
def finalize_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
final_text: str = "") -> None:
"""Save approved resume text, archive the previous version, and clear draft."""
if job_id is None:
return
import json
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT optimized_resume, resume_archive_json FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.row_factory = None
# Archive current finalized version if present
archive: list = []
if row:
if row["resume_archive_json"]:
try:
archive = json.loads(row["resume_archive_json"])
except Exception:
archive = []
if row["optimized_resume"]:
from datetime import datetime
archive.append({
"archived_at": datetime.now().isoformat()[:16],
"text": row["optimized_resume"],
})
conn.execute(
"UPDATE jobs SET optimized_resume = ?, resume_draft_json = NULL, "
"resume_archive_json = ? WHERE id = ?",
(final_text, json.dumps(archive), job_id),
)
conn.commit()
conn.close()
def get_resume_archive(db_path: Path = DEFAULT_DB, job_id: int = None) -> list:
"""Return list of past finalized resume versions (newest archived first)."""
if job_id is None:
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT resume_archive_json FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.close()
if not row or not row["resume_archive_json"]:
return []
import json
try:
entries = json.loads(row["resume_archive_json"])
return list(reversed(entries)) # newest first
except Exception:
return []
_UPDATABLE_JOB_COLS = { _UPDATABLE_JOB_COLS = {
"title", "company", "url", "source", "location", "is_remote", "title", "company", "url", "source", "location", "is_remote",
"salary", "description", "match_score", "keyword_gaps", "salary", "description", "match_score", "keyword_gaps",
@ -510,19 +383,6 @@ def mark_applied(db_path: Path = DEFAULT_DB, ids: list[int] = None) -> None:
conn.close() conn.close()
def cancel_task(db_path: Path = DEFAULT_DB, task_id: int = 0) -> bool:
"""Cancel a single queued/running task by id. Returns True if a row was updated."""
conn = sqlite3.connect(db_path)
count = conn.execute(
"UPDATE background_tasks SET status='failed', error='Cancelled by user',"
" finished_at=datetime('now') WHERE id=? AND status IN ('queued','running')",
(task_id,),
).rowcount
conn.commit()
conn.close()
return count > 0
def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int: def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int:
"""Mark all queued/running background tasks as failed. Returns count killed.""" """Mark all queued/running background tasks as failed. Returns count killed."""
conn = sqlite3.connect(db_path) conn = sqlite3.connect(db_path)
@ -958,286 +818,3 @@ def get_task_for_job(db_path: Path = DEFAULT_DB, task_type: str = "",
).fetchone() ).fetchone()
conn.close() conn.close()
return dict(row) if row else None return dict(row) if row else None
# ── Resume library helpers ────────────────────────────────────────────────────
def _resume_as_dict(row) -> dict:
"""Convert a sqlite3.Row from the resumes table to a plain dict."""
return {
"id": row["id"],
"name": row["name"],
"source": row["source"],
"job_id": row["job_id"],
"text": row["text"],
"struct_json": row["struct_json"],
"word_count": row["word_count"],
"is_default": row["is_default"],
"created_at": row["created_at"],
"updated_at": row["updated_at"],
"synced_at": row["synced_at"] if "synced_at" in row.keys() else None,
}
def create_resume(
db_path: Path = DEFAULT_DB,
name: str = "",
text: str = "",
source: str = "manual",
job_id: int | None = None,
struct_json: str | None = None,
) -> dict:
"""Insert a new resume into the library. Returns the created row as a dict."""
word_count = len(text.split()) if text else 0
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
cur = conn.execute(
"""INSERT INTO resumes (name, source, job_id, text, struct_json, word_count)
VALUES (?, ?, ?, ?, ?, ?)""",
(name, source, job_id, text, struct_json, word_count),
)
conn.commit()
row = conn.execute("SELECT * FROM resumes WHERE id=?", (cur.lastrowid,)).fetchone()
return _resume_as_dict(row)
finally:
conn.close()
def list_resumes(db_path: Path = DEFAULT_DB) -> list[dict]:
"""Return all resumes ordered by default-first then newest-first."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT * FROM resumes ORDER BY is_default DESC, created_at DESC"
).fetchall()
return [_resume_as_dict(r) for r in rows]
finally:
conn.close()
def get_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> dict | None:
"""Return a single resume by id, or None if not found."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
return _resume_as_dict(row) if row else None
finally:
conn.close()
def update_resume(
db_path: Path = DEFAULT_DB,
resume_id: int = 0,
name: str | None = None,
text: str | None = None,
) -> dict | None:
"""Update name and/or text of a resume. Returns updated row or None."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
if name is not None:
conn.execute(
"UPDATE resumes SET name=?, updated_at=datetime('now') WHERE id=?",
(name, resume_id),
)
if text is not None:
word_count = len(text.split())
conn.execute(
"UPDATE resumes SET text=?, word_count=?, updated_at=datetime('now') WHERE id=?",
(text, word_count, resume_id),
)
conn.commit()
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
return _resume_as_dict(row) if row else None
finally:
conn.close()
def delete_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
"""Delete a resume by id."""
conn = sqlite3.connect(db_path)
try:
conn.execute("DELETE FROM resumes WHERE id=?", (resume_id,))
conn.commit()
finally:
conn.close()
def set_default_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
"""Set one resume as default, clearing the flag on all others."""
conn = sqlite3.connect(db_path)
try:
conn.execute("UPDATE resumes SET is_default=0")
conn.execute("UPDATE resumes SET is_default=1 WHERE id=?", (resume_id,))
conn.commit()
finally:
conn.close()
def update_resume_synced_at(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
"""Mark a library entry as synced to the profile (library→profile direction)."""
conn = sqlite3.connect(db_path)
try:
conn.execute(
"UPDATE resumes SET synced_at=datetime('now') WHERE id=?",
(resume_id,),
)
conn.commit()
finally:
conn.close()
def update_resume_content(
db_path: Path = DEFAULT_DB,
resume_id: int = 0,
text: str = "",
struct_json: str | None = None,
) -> None:
"""Update text, struct_json, and synced_at for a library entry.
Called by the profilelibrary sync path (PUT /api/settings/resume).
"""
word_count = len(text.split()) if text else 0
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""UPDATE resumes
SET text=?, struct_json=?, word_count=?,
synced_at=datetime('now'), updated_at=datetime('now')
WHERE id=?""",
(text, struct_json, word_count, resume_id),
)
conn.commit()
finally:
conn.close()
def get_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0) -> dict | None:
"""Return the resume for a job: job-specific first, then default, then None."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
row = conn.execute(
"""SELECT r.* FROM resumes r
JOIN jobs j ON j.resume_id = r.id
WHERE j.id=?""",
(job_id,),
).fetchone()
if row:
return _resume_as_dict(row)
row = conn.execute(
"SELECT * FROM resumes WHERE is_default=1 LIMIT 1"
).fetchone()
return _resume_as_dict(row) if row else None
finally:
conn.close()
def set_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0, resume_id: int = 0) -> None:
"""Attach a specific resume to a job (overrides default for that job)."""
conn = sqlite3.connect(db_path)
try:
conn.execute("UPDATE jobs SET resume_id=? WHERE id=?", (resume_id, job_id))
conn.commit()
finally:
conn.close()
# ── Training export helpers ───────────────────────────────────────────────────
def _strip_greeting(text: str) -> str:
"""Remove 'Dear X,' greeting line from cover letter text."""
lines = text.splitlines()
for i, line in enumerate(lines):
stripped_line = line.strip()
if stripped_line.lower().startswith("dear ") and stripped_line.endswith((",", ":")):
rest = lines[i + 1:]
while rest and not rest[0].strip():
rest = rest[1:]
result = "\n".join(rest).strip()
return result if result else text.strip()
return text.strip()
def get_db_pairs(db_path: Path) -> list[dict]:
"""Return curation metadata for ALL qualifying jobs (included and excluded).
Used by the curation UI. Includes excluded=True rows so users can restore them.
"""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT id, title, company, description, status, "
" excluded_from_training "
"FROM jobs "
"WHERE status IN ('applied','phone_screen','interviewing','offer','hired') "
" AND cover_letter IS NOT NULL AND cover_letter != '' "
"ORDER BY applied_at DESC",
).fetchall()
finally:
conn.close()
return [
{
"job_id": row["id"],
"title": row["title"] or "",
"company": row["company"] or "",
"status": row["status"],
"instruction": (
f"Write a cover letter for the {row['title'] or 'unknown'} "
f"position at {row['company'] or 'unknown'}."
),
"input_preview": (row["description"] or "")[:200],
"excluded": bool(row["excluded_from_training"]),
}
for row in rows
]
def get_training_pairs(db_path: Path) -> list[dict]:
"""Return Alpaca-format training pairs for non-excluded qualifying jobs.
Used by the JSONL export endpoint.
"""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT id, title, company, description, cover_letter "
"FROM jobs "
"WHERE status IN ('applied','phone_screen','interviewing','offer','hired') "
" AND cover_letter IS NOT NULL AND cover_letter != '' "
" AND excluded_from_training = 0 "
"ORDER BY applied_at DESC",
).fetchall()
finally:
conn.close()
return [
{
"instruction": (
f"Write a cover letter for the {row['title'] or 'unknown'} "
f"position at {row['company'] or 'unknown'}."
),
"input": row["description"] or "",
"output": _strip_greeting(row["cover_letter"]),
"source": "db",
"job_id": row["id"],
}
for row in rows
]
def set_training_exclusion(db_path: Path, job_id: int, excluded: bool) -> None:
"""Set excluded_from_training flag on a job."""
conn = sqlite3.connect(db_path)
try:
conn.execute(
"UPDATE jobs SET excluded_from_training = ? WHERE id = ?",
(1 if excluded else 0, job_id),
)
conn.commit()
finally:
conn.close()

View file

@ -1,122 +0,0 @@
"""
db_migrate.py Rails-style numbered SQL migration runner for Peregrine user DBs.
Migration files live in migrations/ (sibling to this script's parent directory),
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
order and tracked in the schema_migrations table so each runs exactly once.
Usage:
from scripts.db_migrate import migrate_db
migrate_db(Path("/path/to/user.db"))
"""
import logging
import sqlite3
from pathlib import Path
log = logging.getLogger(__name__)
# Resolved at import time: peregrine repo root / migrations/
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
_CREATE_MIGRATIONS_TABLE = """
CREATE TABLE IF NOT EXISTS schema_migrations (
version TEXT PRIMARY KEY,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)
"""
def migrate_db(db_path: Path) -> list[str]:
"""Apply any pending migrations to db_path. Returns list of applied versions."""
applied: list[str] = []
con = sqlite3.connect(db_path)
try:
con.execute(_CREATE_MIGRATIONS_TABLE)
con.commit()
if not _MIGRATIONS_DIR.is_dir():
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
return applied
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
if not migration_files:
return applied
already_applied = {
row[0] for row in con.execute("SELECT version FROM schema_migrations")
}
for path in migration_files:
version = path.stem # e.g. "001_baseline"
if version in already_applied:
continue
sql = path.read_text(encoding="utf-8")
log.info("Applying migration %s to %s", version, db_path.name)
try:
# Execute statements individually so that ALTER TABLE ADD COLUMN
# errors caused by already-existing columns (pre-migration DBs
# created from a newer schema) are treated as no-ops rather than
# fatal failures.
statements = [s.strip() for s in sql.split(";") if s.strip()]
for stmt in statements:
# Strip leading SQL comment lines (-- ...) before processing.
# Checking startswith("--") on the raw chunk would skip entire
# multi-line statements whose first line is a comment.
stripped_lines = [
ln for ln in stmt.splitlines()
if not ln.strip().startswith("--")
]
stmt = "\n".join(stripped_lines).strip()
if not stmt:
continue
# Pre-check: if this is ADD COLUMN and the column already exists, skip.
# This guards against schema_migrations being ahead of the actual schema
# (e.g. DB reset after migrations were recorded).
stmt_upper = stmt.upper()
if "ALTER TABLE" in stmt_upper and "ADD COLUMN" in stmt_upper:
# Extract table name and column name from the statement
import re as _re
m = _re.match(
r"ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)",
stmt, _re.IGNORECASE
)
if m:
tbl, col = m.group(1), m.group(2)
existing = {
row[1]
for row in con.execute(f"PRAGMA table_info({tbl})")
}
if col in existing:
log.info(
"Migration %s: column %s.%s already exists, skipping",
version, tbl, col,
)
continue
try:
con.execute(stmt)
except sqlite3.OperationalError as stmt_exc:
msg = str(stmt_exc).lower()
if "duplicate column name" in msg or "already exists" in msg:
log.info(
"Migration %s: statement already applied, skipping: %s",
version, stmt_exc,
)
else:
raise
con.execute(
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
)
con.commit()
applied.append(version)
log.info("Migration %s applied successfully", version)
except Exception as exc:
con.rollback()
log.error("Migration %s failed: %s", version, exc)
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
finally:
con.close()
return applied

View file

@ -34,38 +34,11 @@ CUSTOM_SCRAPERS: dict[str, object] = {
} }
def _normalize_profiles(raw: dict) -> dict:
"""Normalize search_profiles.yaml to the canonical {profiles: [...]} format.
The onboarding wizard (pre-fix) wrote a flat `default: {...}` structure.
Canonical format is `profiles: [{name, titles/job_titles, boards, ...}]`.
This converts on load so both formats work without a migration.
"""
if "profiles" in raw:
return raw
# Wizard-written format: top-level keys are profile names (usually "default")
profiles = []
for name, body in raw.items():
if not isinstance(body, dict):
continue
# job_boards: [{name, enabled}] → boards: [name] (enabled only)
job_boards = body.pop("job_boards", None)
if job_boards and "boards" not in body:
body["boards"] = [b["name"] for b in job_boards if b.get("enabled", True)]
# blocklist_* keys live in load_blocklist, not per-profile — drop them
body.pop("blocklist_companies", None)
body.pop("blocklist_industries", None)
body.pop("blocklist_locations", None)
profiles.append({"name": name, **body})
return {"profiles": profiles}
def load_config(config_dir: Path | None = None) -> tuple[dict, dict]: def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
cfg = config_dir or CONFIG_DIR cfg = config_dir or CONFIG_DIR
profiles_path = cfg / "search_profiles.yaml" profiles_path = cfg / "search_profiles.yaml"
notion_path = cfg / "notion.yaml" notion_path = cfg / "notion.yaml"
raw = yaml.safe_load(profiles_path.read_text()) or {} profiles = yaml.safe_load(profiles_path.read_text())
profiles = _normalize_profiles(raw)
notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None} notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
return profiles, notion_cfg return profiles, notion_cfg
@ -239,43 +212,14 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
_rp = profile.get("remote_preference", "both") _rp = profile.get("remote_preference", "both")
_is_remote: bool | None = True if _rp == "remote" else (False if _rp == "onsite" else None) _is_remote: bool | None = True if _rp == "remote" else (False if _rp == "onsite" else None)
# When filtering for remote-only, also drop hybrid roles at the description level.
# Job boards (especially LinkedIn) tag hybrid listings as is_remote=True, so the
# board-side filter alone is not reliable. We match specific work-arrangement
# phrases to avoid false positives like "hybrid cloud" or "hybrid architecture".
_HYBRID_PHRASES = [
"hybrid role", "hybrid position", "hybrid work", "hybrid schedule",
"hybrid model", "hybrid arrangement", "hybrid opportunity",
"in-office/remote", "in office/remote", "remote/in-office",
"remote/office", "office/remote",
"days in office", "days per week in", "days onsite", "days on-site",
"required to be in office", "required in office",
]
if _rp == "remote":
exclude_kw = exclude_kw + _HYBRID_PHRASES
for location in profile["locations"]: for location in profile["locations"]:
# ── JobSpy boards ────────────────────────────────────────────────── # ── JobSpy boards ──────────────────────────────────────────────────
if boards: if boards:
# Validate boards against the installed JobSpy Site enum. print(f" [jobspy] {location} — boards: {', '.join(boards)}")
# One unsupported name in the list aborts the entire scrape_jobs() call.
try:
from jobspy import Site as _Site
_valid = {s.value for s in _Site}
_filtered = [b for b in boards if b in _valid]
_dropped = [b for b in boards if b not in _valid]
if _dropped:
print(f" [jobspy] Skipping unsupported boards: {', '.join(_dropped)}")
except ImportError:
_filtered = boards # fallback: pass through unchanged
if not _filtered:
print(f" [jobspy] No valid boards for {location} — skipping")
continue
print(f" [jobspy] {location} — boards: {', '.join(_filtered)}")
try: try:
jobspy_kwargs: dict = dict( jobspy_kwargs: dict = dict(
site_name=_filtered, site_name=boards,
search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))), search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
location=location, location=location,
results_wanted=results_per_board, results_wanted=results_per_board,
@ -307,10 +251,6 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
elif job_dict.get("salary_source") and str(job_dict["salary_source"]) not in ("nan", "None", ""): elif job_dict.get("salary_source") and str(job_dict["salary_source"]) not in ("nan", "None", ""):
salary_str = str(job_dict["salary_source"]) salary_str = str(job_dict["salary_source"])
_dp = job_dict.get("date_posted")
date_posted_str = (
_dp.isoformat() if hasattr(_dp, "isoformat") else str(_dp)
) if _dp and str(_dp) not in ("nan", "None", "") else ""
row = { row = {
"url": url, "url": url,
"title": _s(job_dict.get("title")), "title": _s(job_dict.get("title")),
@ -320,7 +260,6 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
"is_remote": bool(job_dict.get("is_remote", False)), "is_remote": bool(job_dict.get("is_remote", False)),
"salary": salary_str, "salary": salary_str,
"description": _s(job_dict.get("description")), "description": _s(job_dict.get("description")),
"date_posted": date_posted_str,
"_exclude_kw": exclude_kw, "_exclude_kw": exclude_kw,
} }
if _insert_if_new(row, _s(job_dict.get("site"))): if _insert_if_new(row, _s(job_dict.get("site"))):

View file

@ -323,6 +323,6 @@ if gguf_path and gguf_path.exists():
else: else:
print(f"\n{'='*60}") print(f"\n{'='*60}")
print(" Adapter saved (no GGUF produced).") print(" Adapter saved (no GGUF produced).")
print(" Re-run without --no-gguf to generate a GGUF for Ollama registration.") print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
print(f" Adapter path: {adapter_path}") print(f" Adapter path: {adapter_path}")
print(f"{'='*60}\n") print(f"{'='*60}\n")

View file

@ -16,8 +16,6 @@ import re
import sys import sys
from pathlib import Path from pathlib import Path
import yaml
sys.path.insert(0, str(Path(__file__).parent.parent)) sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.user_profile import UserProfile from scripts.user_profile import UserProfile
@ -42,57 +40,107 @@ def _build_system_context(profile=None) -> str:
return " ".join(parts) return " ".join(parts)
SYSTEM_CONTEXT = _build_system_context() SYSTEM_CONTEXT = _build_system_context()
_candidate = _profile.name if _profile else "the candidate"
# ── Mission-alignment detection ─────────────────────────────────────────────── # ── Mission-alignment detection ───────────────────────────────────────────────
# Domains and their keyword signals are loaded from config/mission_domains.yaml. # When a company/JD signals one of these preferred industries, the cover letter
# prompt injects a hint so Para 3 can reflect genuine personal connection.
# This does NOT disclose any personal disability or family information. # This does NOT disclose any personal disability or family information.
_MISSION_DOMAINS_PATH = Path(__file__).parent.parent / "config" / "mission_domains.yaml"
def load_mission_domains(path: Path | None = None) -> dict[str, dict]:
"""Load mission domain config from YAML. Returns dict keyed by domain name."""
p = path or _MISSION_DOMAINS_PATH
if not p.exists():
return {}
with p.open(encoding="utf-8") as fh:
data = yaml.safe_load(fh)
return data.get("domains", {}) if data else {}
_MISSION_DOMAINS: dict[str, dict] = load_mission_domains()
_MISSION_SIGNALS: dict[str, list[str]] = { _MISSION_SIGNALS: dict[str, list[str]] = {
domain: cfg.get("signals", []) for domain, cfg in _MISSION_DOMAINS.items() "music": [
"music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music",
"distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl",
"streaming", "artist", "label", "live nation", "ticketmaster", "aeg",
"songkick", "concert", "venue", "festival", "audio", "podcast",
"studio", "record", "musician", "playlist",
],
"animal_welfare": [
"animal", "shelter", "rescue", "humane society", "spca", "aspca",
"veterinary", "vet ", "wildlife", "pet ", "adoption", "foster",
"dog", "cat", "feline", "canine", "sanctuary", "zoo",
],
"education": [
"education", "school", "learning", "student", "edtech", "classroom",
"curriculum", "tutoring", "academic", "university", "kids", "children",
"youth", "literacy", "khan academy", "duolingo", "chegg", "coursera",
"instructure", "canvas lms", "clever", "district", "teacher",
"k-12", "k12", "grade", "pedagogy",
],
"social_impact": [
"nonprofit", "non-profit", "501(c)", "social impact", "mission-driven",
"public benefit", "community", "underserved", "equity", "justice",
"humanitarian", "advocacy", "charity", "foundation", "ngo",
"social good", "civic", "public health", "mental health", "food security",
"housing", "homelessness", "poverty", "workforce development",
],
# Health is listed last — it's a genuine but lower-priority connection than
# music/animals/education/social_impact. detect_mission_alignment returns on first
# match, so dict order = preference order.
"health": [
"patient", "patients", "healthcare", "health tech", "healthtech",
"pharma", "pharmaceutical", "clinical", "medical",
"hospital", "clinic", "therapy", "therapist",
"rare disease", "life sciences", "life science",
"treatment", "prescription", "biotech", "biopharma", "medtech",
"behavioral health", "population health",
"care management", "care coordination", "oncology", "specialty pharmacy",
"provider network", "payer", "health plan", "benefits administration",
"ehr", "emr", "fhir", "hipaa",
],
}
_candidate = _profile.name if _profile else "the candidate"
_MISSION_DEFAULTS: dict[str, str] = {
"music": (
f"This company is in the music industry — an industry {_candidate} finds genuinely "
"compelling. Para 3 should warmly and specifically reflect this authentic alignment, "
"not as a generic fan statement, but as an honest statement of where they'd love to "
"apply their skills."
),
"animal_welfare": (
f"This organization works in animal welfare/rescue — a mission {_candidate} finds "
"genuinely meaningful. Para 3 should reflect this authentic connection warmly and "
"specifically, tying their skills to this mission."
),
"education": (
f"This company works in education or EdTech — a domain that resonates with "
f"{_candidate}'s values. Para 3 should reflect this authentic connection specifically "
"and warmly."
),
"social_impact": (
f"This organization is mission-driven / social impact focused — exactly the kind of "
f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine "
"desire to apply their skills to work that makes a real difference in people's lives."
),
"health": (
f"This company works in healthcare, life sciences, or patient care. "
f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an "
"industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies "
"exist to serve — those navigating complex, often invisible, or unusual health journeys; "
"patients facing rare or poorly understood conditions; individuals whose situations don't "
"fit a clean category. The connection is to the humans behind the data, not the industry. "
"If the user has provided a personal note, use that to anchor Para 3 specifically."
),
} }
def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]: def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]:
"""Merge user's custom mission notes with YAML defaults. """Merge user's custom mission notes with generic defaults."""
For domains defined in mission_domains.yaml the default_note is used when
the user has not provided a custom note in user.yaml mission_preferences.
For user-defined domains (keys in mission_preferences that are NOT in the
YAML config), the custom note is used as-is; no signal detection applies.
"""
p = profile or _profile p = profile or _profile
name = candidate_name or (p.name if p else "the candidate") name = candidate_name or _candidate
prefs = p.mission_preferences if p else {} prefs = p.mission_preferences if p else {}
notes: dict[str, str] = {} notes = {}
for industry, default_note in _MISSION_DEFAULTS.items():
for domain, cfg in _MISSION_DOMAINS.items(): custom = (prefs.get(industry) or "").strip()
default_note = (cfg.get("default_note") or "").strip()
custom = (prefs.get(domain) or "").strip()
if custom: if custom:
notes[domain] = ( notes[industry] = (
f"Mission alignment — {name} shared: \"{custom}\". " f"Mission alignment — {name} shared: \"{custom}\". "
"Para 3 should warmly and specifically reflect this authentic connection." "Para 3 should warmly and specifically reflect this authentic connection."
) )
else: else:
notes[domain] = default_note notes[industry] = default_note
return notes return notes
@ -102,15 +150,12 @@ _MISSION_NOTES = _build_mission_notes()
def detect_mission_alignment( def detect_mission_alignment(
company: str, description: str, mission_notes: dict | None = None company: str, description: str, mission_notes: dict | None = None
) -> str | None: ) -> str | None:
"""Return a mission hint string if company/JD matches a configured domain, else None. """Return a mission hint string if company/JD matches a preferred industry, else None."""
Checks domains in YAML file order (dict order = match priority).
"""
notes = mission_notes if mission_notes is not None else _MISSION_NOTES notes = mission_notes if mission_notes is not None else _MISSION_NOTES
text = f"{company} {description}".lower() text = f"{company} {description}".lower()
for domain, signals in _MISSION_SIGNALS.items(): for industry, signals in _MISSION_SIGNALS.items():
if any(sig in text for sig in signals): if any(sig in text for sig in signals):
return notes.get(domain) return notes[industry]
return None return None
@ -186,7 +231,7 @@ def build_prompt(
) )
parts.append(f"{recruiter_note}\n") parts.append(f"{recruiter_note}\n")
parts.append("Now write a new cover letter for:") parts.append(f"Now write a new cover letter for:")
parts.append(f" Role: {title}") parts.append(f" Role: {title}")
parts.append(f" Company: {company}") parts.append(f" Company: {company}")
if description: if description:

View file

@ -1,254 +0,0 @@
#!/usr/bin/env python3
"""
Generate demo/seed.sql committed seed INSERT statements for the demo DB.
Run whenever seed data needs to change:
conda run -n cf python scripts/generate_demo_seed.py
Outputs pure INSERT SQL (no DDL). Schema migrations are handled by db_migrate.py
at container startup. The seed SQL is loaded after migrations complete.
"""
from __future__ import annotations
from datetime import date, timedelta
from pathlib import Path
OUT_PATH = Path(__file__).parent.parent / "demo" / "seed.sql"
TODAY = date.today()
def _dago(n: int) -> str:
return (TODAY - timedelta(days=n)).isoformat()
def _dfrom(n: int) -> str:
return (TODAY + timedelta(days=n)).isoformat()
COVER_LETTER_SPOTIFY = """\
Dear Hiring Manager,
I'm excited to apply for the UX Designer role at Spotify. With five years of
experience designing for music discovery and cross-platform experiences, I've
consistently shipped features that make complex audio content feel effortless to
navigate. At my last role I led a redesign of the playlist creation flow that
reduced drop-off by 31%.
Spotify's commitment to artist and listener discovery — and its recent push into
audiobooks and podcast tooling aligns directly with the kind of cross-format
design challenges I'm most energised by.
I'd love to bring that focus to your product design team.
Warm regards,
[Your name]
"""
SQL_PARTS: list[str] = []
# ── Jobs ──────────────────────────────────────────────────────────────────────
# Columns: title, company, url, source, location, is_remote, salary,
# match_score, status, date_found, date_posted, cover_letter,
# applied_at, phone_screen_at, interviewing_at, offer_at, hired_at,
# interview_date, rejection_stage, hired_feedback
JOBS: list[tuple] = [
# ---- Review queue (12 jobs — mix of pending + approved) ------------------
("UX Designer",
"Spotify", "https://www.linkedin.com/jobs/view/1000001",
"linkedin", "Remote", 1, "$110k$140k",
94.0, "approved", _dago(1), _dago(3), COVER_LETTER_SPOTIFY,
None, None, None, None, None, None, None, None),
("Product Designer",
"Duolingo", "https://www.linkedin.com/jobs/view/1000002",
"linkedin", "Pittsburgh, PA", 0, "$95k$120k",
87.0, "approved", _dago(2), _dago(5), "Draft in progress — cover letter generating…",
None, None, None, None, None, None, None, None),
("UX Lead",
"NPR", "https://www.indeed.com/viewjob?jk=1000003",
"indeed", "Washington, DC", 1, "$120k$150k",
81.0, "approved", _dago(3), _dago(7), None,
None, None, None, None, None, None, None, None),
# Ghost post — date_posted 34 days ago → shadow indicator
("Senior UX Designer",
"Mozilla", "https://www.linkedin.com/jobs/view/1000004",
"linkedin", "Remote", 1, "$105k$130k",
81.0, "pending", _dago(2), _dago(34), None,
None, None, None, None, None, None, None, None),
("Interaction Designer",
"Figma", "https://www.indeed.com/viewjob?jk=1000005",
"indeed", "San Francisco, CA", 1, "$115k$145k",
78.0, "pending", _dago(4), _dago(6), None,
None, None, None, None, None, None, None, None),
("Product Designer II",
"Notion", "https://www.linkedin.com/jobs/view/1000006",
"linkedin", "Remote", 1, "$100k$130k",
76.0, "pending", _dago(5), _dago(8), None,
None, None, None, None, None, None, None, None),
("UX Designer",
"Stripe", "https://www.linkedin.com/jobs/view/1000007",
"linkedin", "Remote", 1, "$120k$150k",
74.0, "pending", _dago(6), _dago(9), None,
None, None, None, None, None, None, None, None),
("UI/UX Designer",
"Canva", "https://www.indeed.com/viewjob?jk=1000008",
"indeed", "Remote", 1, "$90k$115k",
72.0, "pending", _dago(7), _dago(10), None,
None, None, None, None, None, None, None, None),
("Senior Product Designer",
"Asana", "https://www.linkedin.com/jobs/view/1000009",
"linkedin", "San Francisco, CA", 1, "$125k$155k",
69.0, "pending", _dago(8), _dago(11), None,
None, None, None, None, None, None, None, None),
("UX Researcher",
"Intercom", "https://www.indeed.com/viewjob?jk=1000010",
"indeed", "Remote", 1, "$95k$120k",
67.0, "pending", _dago(9), _dago(12), None,
None, None, None, None, None, None, None, None),
("Product Designer",
"Linear", "https://www.linkedin.com/jobs/view/1000011",
"linkedin", "Remote", 1, "$110k$135k",
65.0, "pending", _dago(10), _dago(13), None,
None, None, None, None, None, None, None, None),
("UX Designer",
"Loom", "https://www.indeed.com/viewjob?jk=1000012",
"indeed", "Remote", 1, "$90k$110k",
62.0, "pending", _dago(11), _dago(14), None,
None, None, None, None, None, None, None, None),
# ---- Pipeline jobs (applied → hired) ------------------------------------
("Senior Product Designer",
"Asana", "https://www.asana.com/jobs/1000013",
"linkedin", "San Francisco, CA", 1, "$125k$155k",
91.0, "phone_screen", _dago(14), _dago(16), None,
_dago(7), _dfrom(0), None, None, None,
f"{_dfrom(0)}T14:00:00", None, None),
("Product Designer",
"Notion", "https://www.notion.so/jobs/1000014",
"indeed", "Remote", 1, "$100k$130k",
88.0, "interviewing", _dago(21), _dago(23), None,
_dago(14), _dago(10), _dago(3), None, None,
f"{_dfrom(7)}T10:00:00", None, None),
("Design Systems Designer",
"Figma", "https://www.figma.com/jobs/1000015",
"linkedin", "San Francisco, CA", 1, "$130k$160k",
96.0, "hired", _dago(45), _dago(47), None,
_dago(38), _dago(32), _dago(25), _dago(14), _dago(7),
None, None,
'{"factors":["clear_scope","great_manager","mission_aligned"],"notes":"Excited about design systems work. Salary met expectations."}'),
("UX Designer",
"Slack", "https://slack.com/jobs/1000016",
"indeed", "Remote", 1, "$115k$140k",
79.0, "applied", _dago(28), _dago(30), None,
_dago(18), None, None, None, None, None, None, None),
]
def _q(v: object) -> str:
"""SQL-quote a Python value."""
if v is None:
return "NULL"
return "'" + str(v).replace("'", "''") + "'"
_JOB_COLS = (
"title, company, url, source, location, is_remote, salary, "
"match_score, status, date_found, date_posted, cover_letter, "
"applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, "
"interview_date, rejection_stage, hired_feedback"
)
SQL_PARTS.append("-- jobs")
for job in JOBS:
vals = ", ".join(_q(v) for v in job)
SQL_PARTS.append(f"INSERT INTO jobs ({_JOB_COLS}) VALUES ({vals});")
# ── Contacts ──────────────────────────────────────────────────────────────────
# (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal)
CONTACTS: list[tuple] = [
(1, "inbound", "Excited to connect — UX Designer role at Spotify",
"jamie.chen@spotify.com", "you@example.com", _dago(3), "positive_response"),
(1, "outbound", "Re: Excited to connect — UX Designer role at Spotify",
"you@example.com", "jamie.chen@spotify.com", _dago(2), None),
(13, "inbound", "Interview Confirmation — Senior Product Designer",
"recruiting@asana.com", "you@example.com", _dago(2), "interview_scheduled"),
(14, "inbound", "Your panel interview is confirmed for Apr 22",
"recruiting@notion.so", "you@example.com", _dago(3), "interview_scheduled"),
(14, "inbound", "Pre-interview prep resources",
"marcus.webb@notion.so", "you@example.com", _dago(2), "positive_response"),
(15, "inbound", "Figma Design Systems — Offer Letter",
"offers@figma.com", "you@example.com", _dago(14), "offer_received"),
(15, "outbound", "Re: Figma Design Systems — Offer Letter (acceptance)",
"you@example.com", "offers@figma.com", _dago(10), None),
(15, "inbound", "Welcome to Figma! Onboarding next steps",
"onboarding@figma.com", "you@example.com", _dago(7), None),
(16, "inbound", "Thanks for applying to Slack",
"noreply@slack.com", "you@example.com", _dago(18), None),
]
SQL_PARTS.append("\n-- job_contacts")
for c in CONTACTS:
job_id, direction, subject, from_addr, to_addr, received_at, stage_signal = c
SQL_PARTS.append(
f"INSERT INTO job_contacts "
f"(job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) "
f"VALUES ({job_id}, {_q(direction)}, {_q(subject)}, {_q(from_addr)}, "
f"{_q(to_addr)}, {_q(received_at)}, {_q(stage_signal)});"
)
# ── References ────────────────────────────────────────────────────────────────
# (name, email, role, company, relationship, notes, tags, prep_email)
REFERENCES: list[tuple] = [
("Dr. Priya Nair", "priya.nair@example.com", "Director of Design", "Acme Corp",
"former_manager",
"Managed me for 3 years on the consumer app redesign. Enthusiastic reference.",
'["manager","design"]',
"Hi Priya,\n\nI hope you're doing well! I'm currently interviewing for a few senior UX roles "
"and would be so grateful if you'd be willing to serve as a reference.\n\nThank you!\n[Your name]"),
("Sam Torres", "sam.torres@example.com", "Senior Product Designer", "Acme Corp",
"former_colleague",
"Worked together on design systems. Great at speaking to collaborative process.",
'["colleague","design_systems"]', None),
("Jordan Kim", "jordan.kim@example.com", "VP of Product", "Streamline Inc",
"former_manager",
"Led the product team I was embedded in. Can speak to business impact of design work.",
'["manager","product"]', None),
]
SQL_PARTS.append("\n-- references_")
for ref in REFERENCES:
name, email, role, company, relationship, notes, tags, prep_email = ref
SQL_PARTS.append(
f"INSERT INTO references_ "
f"(name, email, role, company, relationship, notes, tags, prep_email) "
f"VALUES ({_q(name)}, {_q(email)}, {_q(role)}, {_q(company)}, "
f"{_q(relationship)}, {_q(notes)}, {_q(tags)}, {_q(prep_email)});"
)
# ── Write output ──────────────────────────────────────────────────────────────
output = "\n".join(SQL_PARTS) + "\n"
OUT_PATH.write_text(output, encoding="utf-8")
print(
f"Wrote {OUT_PATH} "
f"({len(JOBS)} jobs, {len(CONTACTS)} contacts, {len(REFERENCES)} references)"
)

View file

@ -392,7 +392,6 @@ def _has_todo_keyword(subject: str) -> bool:
_LINKEDIN_ALERT_SENDER = "jobalerts-noreply@linkedin.com" _LINKEDIN_ALERT_SENDER = "jobalerts-noreply@linkedin.com"
_INDEED_ALERT_SENDER = "jobalerts@indeed.com"
# Social-proof / nav lines to skip when parsing alert blocks # Social-proof / nav lines to skip when parsing alert blocks
_ALERT_SKIP_PHRASES = { _ALERT_SKIP_PHRASES = {
@ -448,75 +447,6 @@ def parse_linkedin_alert(body: str) -> list[dict]:
return jobs return jobs
def parse_indeed_alert(body: str) -> list[dict]:
"""
Parse the HTML body of an Indeed Job Alert email.
Returns a list of dicts: {title, company, location, salary, url}.
URL is canonicalised to https://www.indeed.com/viewjob?jk=<id>
(tracking parameters stripped).
"""
try:
from bs4 import BeautifulSoup as _BS
except ImportError:
return []
jobs: list[dict] = []
soup = _BS(body, "html.parser")
# Each job card is an <a> wrapping a job title — Indeed uses several layouts
# across their email templates. We try two strategies:
#
# Strategy A (2023+ layout): <td> blocks containing an <a> with /viewjob?jk=
# Strategy B (older layout): <tr class="job"> blocks
#
# Both extract the canonical jk= key from the href.
seen_jks: set[str] = set()
for anchor in soup.find_all("a", href=True):
href: str = anchor["href"]
jk_m = re.search(r"[?&]jk=([a-z0-9]+)", href, re.IGNORECASE)
if not jk_m:
continue
jk = jk_m.group(1)
if jk in seen_jks:
continue
seen_jks.add(jk)
title = anchor.get_text(separator=" ", strip=True)
if not title or len(title) < 3:
continue
# Walk up to find the container cell/row and extract company + location
container = anchor.find_parent(["td", "tr", "div"])
company = location = salary = ""
if container:
text_lines = [
t.strip() for t in container.get_text(separator="\n").splitlines()
if t.strip() and t.strip().lower() != title.lower()
]
if text_lines:
company = text_lines[0]
if len(text_lines) > 1:
location = text_lines[1]
# salary line often contains "$" or "/yr"
for line in text_lines[2:]:
if "$" in line or "/yr" in line.lower() or "/hour" in line.lower():
salary = line
break
jobs.append({
"title": title,
"company": company,
"location": location,
"salary": salary,
"url": f"https://www.indeed.com/viewjob?jk={jk}",
})
return jobs
def _scan_todo_label(conn: imaplib.IMAP4, cfg: dict, db_path: Path, def _scan_todo_label(conn: imaplib.IMAP4, cfg: dict, db_path: Path,
active_jobs: list[dict], active_jobs: list[dict],
known_message_ids: set) -> int: known_message_ids: set) -> int:
@ -628,29 +558,20 @@ def _scan_unmatched_leads(conn: imaplib.IMAP4, cfg: dict,
if mid in known_message_ids: if mid in known_message_ids:
continue continue
# ── Job alert digests — parse each card deterministically ─────── # ── LinkedIn Job Alert digest — parse each card individually ──────
from_lower = parsed["from_addr"].lower() if _LINKEDIN_ALERT_SENDER in parsed["from_addr"].lower():
alert_cards: list[dict] = [] cards = parse_linkedin_alert(parsed["body"])
alert_source = "" for card in cards:
if _LINKEDIN_ALERT_SENDER in from_lower:
alert_cards = parse_linkedin_alert(parsed["body"])
alert_source = "linkedin"
elif _INDEED_ALERT_SENDER in from_lower:
alert_cards = parse_indeed_alert(parsed["body"])
alert_source = "indeed"
if alert_cards:
for card in alert_cards:
if card["url"] in existing_urls: if card["url"] in existing_urls:
continue continue
job_id = insert_job(db_path, { job_id = insert_job(db_path, {
"title": card["title"], "title": card["title"],
"company": card["company"], "company": card["company"],
"url": card["url"], "url": card["url"],
"source": alert_source, "source": "linkedin",
"location": card.get("location", ""), "location": card["location"],
"is_remote": 0, "is_remote": 0,
"salary": card.get("salary", ""), "salary": "",
"description": "", "description": "",
"date_found": datetime.now().isoformat()[:10], "date_found": datetime.now().isoformat()[:10],
}) })
@ -659,7 +580,7 @@ def _scan_unmatched_leads(conn: imaplib.IMAP4, cfg: dict,
submit_task(db_path, "scrape_url", job_id) submit_task(db_path, "scrape_url", job_id)
existing_urls.add(card["url"]) existing_urls.add(card["url"])
new_leads += 1 new_leads += 1
print(f"[imap] {alert_source} alert → {card['company']}{card['title']}") print(f"[imap] LinkedIn alert → {card['company']}{card['title']}")
known_message_ids.add(mid) known_message_ids.add(mid)
continue # skip normal LLM extraction path continue # skip normal LLM extraction path

View file

@ -1,5 +1,5 @@
from __future__ import annotations from __future__ import annotations
from datetime import datetime from datetime import datetime, timedelta, timezone
from scripts.integrations.base import IntegrationBase from scripts.integrations.base import IntegrationBase

View file

@ -12,53 +12,10 @@ Usage:
""" """
from __future__ import annotations from __future__ import annotations
import logging
import math import math
import re import re
from datetime import datetime, timezone from datetime import datetime, timezone
_log = logging.getLogger(__name__)
# Max jobs passed to the reranker (avoids excessive inference time on large stacks)
_RERANK_POOL = 50
def _try_rerank(resume_text: str, jobs: list[dict]) -> list[dict]:
"""Rerank jobs by cross-encoder relevance to resume text.
Returns jobs sorted best-first by the reranker. Falls back silently to the
input order if the reranker package is unavailable or inference fails.
"""
if not jobs:
return jobs
try:
from circuitforge_core.reranker import rerank
except ImportError:
return jobs
try:
descriptions = [j.get("description") or j.get("title", "") for j in jobs]
results = rerank(resume_text, descriptions, top_n=len(jobs))
# Map ranked candidates back to job dicts, handling duplicate descriptions
idx_queue: dict[str, list[int]] = {}
for i, d in enumerate(descriptions):
idx_queue.setdefault(d, []).append(i)
reranked: list[dict] = []
used: set[int] = set()
for r in results:
for idx in idx_queue.get(r.candidate, []):
if idx not in used:
reranked.append(jobs[idx])
used.add(idx)
break
# Safety: append anything the reranker didn't return
for i, j in enumerate(jobs):
if i not in used:
reranked.append(j)
return reranked
except Exception:
_log.warning("Reranker pass failed; using stack_score order.", exc_info=True)
return jobs
# ── TUNING ───────────────────────────────────────────────────────────────────── # ── TUNING ─────────────────────────────────────────────────────────────────────
# Adjust these constants to change how jobs are ranked. # Adjust these constants to change how jobs are ranked.
@ -332,7 +289,6 @@ def rank_jobs(
user_level: int = 3, user_level: int = 3,
limit: int = 10, limit: int = 10,
min_score: float = 20.0, min_score: float = 20.0,
resume_text: str = "",
) -> list[dict]: ) -> list[dict]:
"""Score and rank pending jobs; return top-N above min_score. """Score and rank pending jobs; return top-N above min_score.
@ -343,10 +299,6 @@ def rank_jobs(
user_level: Seniority level 17 (use seniority_from_experience()). user_level: Seniority level 17 (use seniority_from_experience()).
limit: Stack size; pass 0 to return all qualifying jobs. limit: Stack size; pass 0 to return all qualifying jobs.
min_score: Minimum stack_score to include (0100). min_score: Minimum stack_score to include (0100).
resume_text: Plain-text resume for cross-encoder reranking pass.
When provided, the top-_RERANK_POOL candidates are
reranked by (resume, description) relevance before
the limit is applied. Graceful no-op when empty.
Returns: Returns:
Sorted list (best first) with 'stack_score' key added to each dict. Sorted list (best first) with 'stack_score' key added to each dict.
@ -358,10 +310,4 @@ def rank_jobs(
scored.append({**job, "stack_score": s}) scored.append({**job, "stack_score": s})
scored.sort(key=lambda j: j["stack_score"], reverse=True) scored.sort(key=lambda j: j["stack_score"], reverse=True)
if resume_text and scored:
pool = scored[:_RERANK_POOL]
pool = _try_rerank(resume_text, pool)
scored = pool + scored[_RERANK_POOL:]
return scored[:limit] if limit > 0 else scored return scored[:limit] if limit > 0 else scored

View file

@ -1,42 +0,0 @@
# BSL 1.1 — see LICENSE-BSL
"""LLM-assisted reply draft generation for inbound job contacts (BSL 1.1)."""
from __future__ import annotations
from pathlib import Path
from typing import Optional
_SYSTEM = (
"You are drafting a professional email reply on behalf of a job seeker. "
"Be concise and professional. Do not fabricate facts. If you are uncertain "
"about a detail, leave a [TODO: fill in] placeholder. "
"Output the reply body only — no subject line, no salutation preamble."
)
def _build_prompt(subject: str, from_addr: str, body: str, user_name: str, target_role: str) -> str:
return (
f"ORIGINAL EMAIL:\n"
f"Subject: {subject}\n"
f"From: {from_addr}\n"
f"Body:\n{body}\n\n"
f"USER PROFILE CONTEXT:\n"
f"Name: {user_name}\n"
f"Target role: {target_role}\n\n"
"Write a concise, professional reply to this email."
)
def generate_draft_reply(
subject: str,
from_addr: str,
body: str,
user_name: str,
target_role: str,
config_path: Optional[Path] = None,
) -> str:
"""Return a draft reply body string."""
from scripts.llm_router import LLMRouter
router = LLMRouter(config_path=config_path)
prompt = _build_prompt(subject, from_addr, body, user_name, target_role)
return router.complete(system=_SYSTEM, user=prompt).strip()

View file

@ -1,46 +1,19 @@
""" """
LLM abstraction layer with priority fallback chain. LLM abstraction layer with priority fallback chain.
Config lookup order: Reads config/llm.yaml. Tries backends in order; falls back on any error.
1. <repo>/config/llm.yaml per-install local config
2. ~/.config/circuitforge/llm.yaml user-level config (circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, )
""" """
from pathlib import Path from pathlib import Path
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
# from this module continue to work.
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml" CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
class LLMRouter(_CoreLLMRouter): class LLMRouter(_CoreLLMRouter):
"""Peregrine-specific LLMRouter — tri-level config path priority. """Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
When ``config_path`` is supplied (e.g. in tests) it is passed straight def __init__(self, config_path: Path = CONFIG_PATH):
through to the core. When omitted, the lookup order is: super().__init__(config_path)
1. <repo>/config/llm.yaml (per-install local config)
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST )
"""
def __init__(self, config_path: Path | None = None) -> None:
if config_path is not None:
# Explicit path supplied — use it directly (e.g. tests, CLI override).
super().__init__(config_path)
return
local = Path(__file__).parent.parent / "config" / "llm.yaml"
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
if local.exists():
super().__init__(local)
elif user_level.exists():
super().__init__(user_level)
else:
# No yaml found — let circuitforge-core's env-var auto-config run.
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
# won't exist either, so _auto_config_from_env() will be triggered.
super().__init__()
# Module-level singleton for convenience # Module-level singleton for convenience

View file

@ -1,285 +0,0 @@
"""
DB helpers for the messaging feature.
Messages table: manual log entries and LLM drafts (one row per message).
Message templates table: built-in seeds and user-created templates.
Conventions (match scripts/db.py):
- All functions take db_path: Path as first argument.
- sqlite3.connect(db_path), row_factory = sqlite3.Row
- Return plain dicts (dict(row))
- Always close connection in finally
"""
import sqlite3
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _connect(db_path: Path) -> sqlite3.Connection:
con = sqlite3.connect(db_path)
con.row_factory = sqlite3.Row
return con
def _now_utc() -> str:
"""Return current UTC time as ISO 8601 string."""
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
# ---------------------------------------------------------------------------
# Messages
# ---------------------------------------------------------------------------
def create_message(
db_path: Path,
*,
job_id: Optional[int],
job_contact_id: Optional[int],
type: str,
direction: str,
subject: Optional[str],
body: Optional[str],
from_addr: Optional[str],
to_addr: Optional[str],
template_id: Optional[int],
logged_at: Optional[str] = None,
) -> dict:
"""Insert a new message row and return it as a dict."""
con = _connect(db_path)
try:
cur = con.execute(
"""
INSERT INTO messages
(job_id, job_contact_id, type, direction, subject, body,
from_addr, to_addr, logged_at, template_id)
VALUES
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(job_id, job_contact_id, type, direction, subject, body,
from_addr, to_addr, logged_at or _now_utc(), template_id),
)
con.commit()
row = con.execute(
"SELECT * FROM messages WHERE id = ?", (cur.lastrowid,)
).fetchone()
return dict(row)
finally:
con.close()
def list_messages(
db_path: Path,
*,
job_id: Optional[int] = None,
type: Optional[str] = None,
direction: Optional[str] = None,
limit: int = 100,
) -> list[dict]:
"""Return messages, optionally filtered. Ordered by logged_at DESC."""
conditions: list[str] = []
params: list = []
if job_id is not None:
conditions.append("job_id = ?")
params.append(job_id)
if type is not None:
conditions.append("type = ?")
params.append(type)
if direction is not None:
conditions.append("direction = ?")
params.append(direction)
where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
params.append(limit)
con = _connect(db_path)
try:
rows = con.execute(
f"SELECT * FROM messages {where} ORDER BY logged_at DESC LIMIT ?",
params,
).fetchall()
return [dict(r) for r in rows]
finally:
con.close()
def delete_message(db_path: Path, message_id: int) -> None:
"""Delete a message by id. Raises KeyError if not found."""
con = _connect(db_path)
try:
row = con.execute(
"SELECT id FROM messages WHERE id = ?", (message_id,)
).fetchone()
if row is None:
raise KeyError(f"Message {message_id} not found")
con.execute("DELETE FROM messages WHERE id = ?", (message_id,))
con.commit()
finally:
con.close()
def approve_message(db_path: Path, message_id: int) -> dict:
"""Set approved_at to now for the given message. Raises KeyError if not found."""
con = _connect(db_path)
try:
row = con.execute(
"SELECT id FROM messages WHERE id = ?", (message_id,)
).fetchone()
if row is None:
raise KeyError(f"Message {message_id} not found")
con.execute(
"UPDATE messages SET approved_at = ? WHERE id = ?",
(_now_utc(), message_id),
)
con.commit()
updated = con.execute(
"SELECT * FROM messages WHERE id = ?", (message_id,)
).fetchone()
return dict(updated)
finally:
con.close()
# ---------------------------------------------------------------------------
# Templates
# ---------------------------------------------------------------------------
def list_templates(db_path: Path) -> list[dict]:
"""Return all templates ordered by is_builtin DESC, then title ASC."""
con = _connect(db_path)
try:
rows = con.execute(
"SELECT * FROM message_templates ORDER BY is_builtin DESC, title ASC"
).fetchall()
return [dict(r) for r in rows]
finally:
con.close()
def create_template(
db_path: Path,
*,
title: str,
category: str = "custom",
subject_template: Optional[str] = None,
body_template: str,
) -> dict:
"""Insert a new user-defined template and return it as a dict."""
con = _connect(db_path)
try:
cur = con.execute(
"""
INSERT INTO message_templates
(title, category, subject_template, body_template, is_builtin)
VALUES
(?, ?, ?, ?, 0)
""",
(title, category, subject_template, body_template),
)
con.commit()
row = con.execute(
"SELECT * FROM message_templates WHERE id = ?", (cur.lastrowid,)
).fetchone()
return dict(row)
finally:
con.close()
def update_template(db_path: Path, template_id: int, **fields) -> dict:
"""
Update allowed fields on a user-defined template.
Raises PermissionError if the template is a built-in (is_builtin=1).
Raises KeyError if the template is not found.
"""
if not fields:
# Nothing to update — just return current state
con = _connect(db_path)
try:
row = con.execute(
"SELECT * FROM message_templates WHERE id = ?", (template_id,)
).fetchone()
if row is None:
raise KeyError(f"Template {template_id} not found")
return dict(row)
finally:
con.close()
_ALLOWED_FIELDS = {
"title", "category", "subject_template", "body_template",
}
invalid = set(fields) - _ALLOWED_FIELDS
if invalid:
raise ValueError(f"Cannot update field(s): {invalid}")
con = _connect(db_path)
try:
row = con.execute(
"SELECT id, is_builtin FROM message_templates WHERE id = ?",
(template_id,),
).fetchone()
if row is None:
raise KeyError(f"Template {template_id} not found")
if row["is_builtin"]:
raise PermissionError(
f"Template {template_id} is a built-in and cannot be modified"
)
set_clause = ", ".join(f"{col} = ?" for col in fields)
values = list(fields.values()) + [_now_utc(), template_id]
con.execute(
f"UPDATE message_templates SET {set_clause}, updated_at = ? WHERE id = ?",
values,
)
con.commit()
updated = con.execute(
"SELECT * FROM message_templates WHERE id = ?", (template_id,)
).fetchone()
return dict(updated)
finally:
con.close()
def delete_template(db_path: Path, template_id: int) -> None:
"""
Delete a user-defined template.
Raises PermissionError if the template is a built-in (is_builtin=1).
Raises KeyError if the template is not found.
"""
con = _connect(db_path)
try:
row = con.execute(
"SELECT id, is_builtin FROM message_templates WHERE id = ?",
(template_id,),
).fetchone()
if row is None:
raise KeyError(f"Template {template_id} not found")
if row["is_builtin"]:
raise PermissionError(
f"Template {template_id} is a built-in and cannot be deleted"
)
con.execute("DELETE FROM message_templates WHERE id = ?", (template_id,))
con.commit()
finally:
con.close()
def update_message_body(db_path: Path, message_id: int, body: str) -> dict:
"""Update the body text of a draft message before approval. Returns updated row."""
con = _connect(db_path)
try:
row = con.execute("SELECT id FROM messages WHERE id=?", (message_id,)).fetchone()
if not row:
raise KeyError(f"message {message_id} not found")
con.execute("UPDATE messages SET body=? WHERE id=?", (body, message_id))
con.commit()
updated = con.execute("SELECT * FROM messages WHERE id=?", (message_id,)).fetchone()
return dict(updated)
finally:
con.close()

View file

@ -25,6 +25,7 @@ import argparse
import shutil import shutil
import sys import sys
from pathlib import Path from pathlib import Path
from textwrap import dedent
import yaml import yaml

View file

@ -348,14 +348,14 @@ def write_compose_override(ports: dict[str, dict]) -> None:
for name, info in to_disable.items(): for name, info in to_disable.items():
lines += [ lines += [
f" {name}: # adopted — host service on :{info['resolved']}", f" {name}: # adopted — host service on :{info['resolved']}",
" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]", f" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]",
" ports: []", f" ports: []",
" healthcheck:", f" healthcheck:",
" test: [\"CMD\", \"true\"]", f" test: [\"CMD\", \"true\"]",
" interval: 1s", f" interval: 1s",
" timeout: 1s", f" timeout: 1s",
" start_period: 0s", f" start_period: 0s",
" retries: 1", f" retries: 1",
] ]
OVERRIDE_YML.write_text("\n".join(lines) + "\n") OVERRIDE_YML.write_text("\n".join(lines) + "\n")
@ -492,12 +492,6 @@ def main() -> None:
# binds a harmless free port instead of conflicting with the external service. # binds a harmless free port instead of conflicting with the external service.
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()} env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
env_updates["RECOMMENDED_PROFILE"] = profile env_updates["RECOMMENDED_PROFILE"] = profile
# When Ollama is adopted from the host process, write OLLAMA_HOST so
# LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
ollama_info = ports.get("ollama")
if ollama_info and ollama_info.get("external"):
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
if offload_gb > 0: if offload_gb > 0:
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb) env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
# GPU info for the app container (which lacks nvidia-smi access) # GPU info for the app container (which lacks nvidia-smi access)

View file

@ -19,6 +19,7 @@ from __future__ import annotations
import json import json
import logging import logging
import re import re
from pathlib import Path
from typing import Any from typing import Any
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -69,12 +70,7 @@ def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
# Extract JSON array from response (LLM may wrap it in markdown) # Extract JSON array from response (LLM may wrap it in markdown)
match = re.search(r"\[.*\]", raw, re.DOTALL) match = re.search(r"\[.*\]", raw, re.DOTALL)
if match: if match:
json_str = match.group(0) llm_signals = json.loads(match.group(0))
# LLMs occasionally emit invalid JSON escape sequences (e.g. \s, \d, \p)
# that are valid regex but not valid JSON. Replace bare backslashes that
# aren't followed by a recognised JSON escape character.
json_str = re.sub(r'\\([^"\\/bfnrtu])', r'\1', json_str)
llm_signals = json.loads(json_str)
llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()] llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
except Exception: except Exception:
log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True) log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
@ -232,22 +228,6 @@ def rewrite_for_ats(
from scripts.llm_router import LLMRouter from scripts.llm_router import LLMRouter
router = LLMRouter() router = LLMRouter()
# Rerank gaps by JD relevance so the most impactful terms are injected first.
# Falls back silently to the incoming priority ordering on any error.
jd_text = job.get("description", "")
if jd_text and prioritized_gaps:
try:
from circuitforge_core.reranker import rerank as _rerank
terms = [g["term"] for g in prioritized_gaps]
results = _rerank(jd_text, terms, top_n=len(terms))
term_rank = {r.candidate: r.rank for r in results}
prioritized_gaps = sorted(
prioritized_gaps,
key=lambda g: term_rank.get(g["term"], len(prioritized_gaps)),
)
except Exception:
pass # keep original priority ordering
# Group gaps by target section # Group gaps by target section
by_section: dict[str, list[str]] = {} by_section: dict[str, list[str]] = {}
for gap in prioritized_gaps: for gap in prioritized_gaps:
@ -277,8 +257,7 @@ def rewrite_for_ats(
f"3. Only rephrase existing content — replace vague verbs/nouns with the " f"3. Only rephrase existing content — replace vague verbs/nouns with the "
f" ATS-preferred equivalents listed above.\n" f" ATS-preferred equivalents listed above.\n"
f"4. Keep the same number of bullet points in experience entries.\n" f"4. Keep the same number of bullet points in experience entries.\n"
f"5. Do NOT use markdown formatting — no **, __, or * for bullets.\n" f"5. Return ONLY the rewritten section content, no labels or explanation."
f"6. Return ONLY the rewritten section content, no labels or explanation."
f"{voice_note}\n\n" f"{voice_note}\n\n"
f"Original {section} section:\n{original_content}" f"Original {section} section:\n{original_content}"
) )
@ -305,8 +284,7 @@ def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
for exp in resume.get("experience", []): for exp in resume.get("experience", []):
lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}{exp['end_date']})") lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}{exp['end_date']})")
for b in exp.get("bullets", []): for b in exp.get("bullets", []):
clean_b = re.sub(r"^[•\-–—*◦▪▸►\s]+", "", b).strip() lines.append(f"{b}")
lines.append(f"{clean_b}")
return "\n".join(lines) if lines else "(empty)" return "\n".join(lines) if lines else "(empty)"
return "(unsupported section)" return "(unsupported section)"
@ -315,7 +293,7 @@ def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str)
"""Return a new resume dict with the given section replaced by rewritten text.""" """Return a new resume dict with the given section replaced by rewritten text."""
updated = dict(resume) updated = dict(resume)
if section == "summary": if section == "summary":
updated["career_summary"] = _clean_summary_markup(rewritten) updated["career_summary"] = rewritten
elif section == "skills": elif section == "skills":
# LLM returns comma-separated or newline-separated skills # LLM returns comma-separated or newline-separated skills
skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()] skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
@ -323,23 +301,10 @@ def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str)
elif section == "experience": elif section == "experience":
# For experience, we keep the structured entries but replace the bullets. # For experience, we keep the structured entries but replace the bullets.
# The LLM rewrites the whole section as plain text; we re-parse the bullets. # The LLM rewrites the whole section as plain text; we re-parse the bullets.
updated["experience"] = _reparse_experience_bullets(resume.get("experience", []), rewritten) updated["experience"] = _reparse_experience_bullets(resume["experience"], rewritten)
return updated return updated
def _clean_summary_markup(text: str) -> str:
"""Strip markdown/plain-text bullet markers from career summary lines.
LLMs sometimes format summary content with '* item' or '• item' markdown.
This converts those lines to unmarked text so the summary renders cleanly.
"""
lines = []
for line in text.splitlines():
cleaned = re.sub(r"^[•*\-–—◦▪▸►]\s+", "", line.lstrip())
lines.append(cleaned)
return "\n".join(lines).strip()
def _reparse_experience_bullets( def _reparse_experience_bullets(
original_entries: list[dict], original_entries: list[dict],
rewritten_text: str, rewritten_text: str,
@ -369,9 +334,9 @@ def _reparse_experience_bullets(
chunk = remaining chunk = remaining
bullets = [ bullets = [
re.sub(r"^([•\-–—*◦▪▸►]\s*)+", "", line.strip()).strip() re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
for line in chunk.splitlines() for line in chunk.splitlines()
if re.match(r"^\s*[•\-–—*◦▪▸►]", line) if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
] ]
new_entry = dict(entry) new_entry = dict(entry)
new_entry["bullets"] = bullets if bullets else entry["bullets"] new_entry["bullets"] = bullets if bullets else entry["bullets"]
@ -380,208 +345,6 @@ def _reparse_experience_bullets(
return result return result
# ── Gap framing ───────────────────────────────────────────────────────────────
def frame_skill_gaps(
struct: dict[str, Any],
gap_framings: list[dict],
job: dict[str, Any],
candidate_voice: str = "",
) -> dict[str, Any]:
"""Inject honest framing language for skills the candidate doesn't have directly.
For each gap framing decision the user provided:
- mode "adjacent": user has related experience injects one bridging sentence
into the most relevant experience entry's bullets
- mode "learning": actively developing the skill prepends a structured
"Developing: X (context)" note to the skills list
- mode "skip": no connection at all no change
The user-supplied context text is the source of truth. The LLM's job is only
to phrase it naturally in resume style not to invent new claims.
Args:
struct: Resume dict (already processed by apply_review_decisions).
gap_framings: List of dicts with keys:
skill the ATS term the candidate lacks
mode "adjacent" | "learning" | "skip"
context candidate's own words describing their related background
job: Job dict for role context in prompts.
candidate_voice: Free-text style note from user.yaml.
Returns:
New resume dict with framing language injected.
"""
from scripts.llm_router import LLMRouter
router = LLMRouter()
updated = dict(struct)
updated["experience"] = [dict(e) for e in (struct.get("experience") or [])]
adjacent_framings = [f for f in gap_framings if f.get("mode") == "adjacent" and f.get("context")]
learning_framings = [f for f in gap_framings if f.get("mode") == "learning" and f.get("context")]
# ── Adjacent experience: inject bridging sentence into most relevant entry ─
for framing in adjacent_framings:
skill = framing["skill"]
context = framing["context"]
# Find the experience entry most likely to be relevant (simple keyword match)
best_entry_idx = _find_most_relevant_entry(updated["experience"], skill)
if best_entry_idx is None:
continue
entry = updated["experience"][best_entry_idx]
bullets = list(entry.get("bullets") or [])
voice_note = (
f'\n\nCandidate voice/style: "{candidate_voice}". Match this tone.'
) if candidate_voice else ""
prompt = (
f"You are adding one honest framing sentence to a resume bullet list.\n\n"
f"The candidate does not have direct experience with '{skill}', "
f"but they have relevant background they described as:\n"
f' "{context}"\n\n'
f"Job context: {job.get('title', '')} at {job.get('company', '')}.\n\n"
f"RULES:\n"
f"1. Add exactly ONE new bullet point that bridges their background to '{skill}'.\n"
f"2. Do NOT fabricate anything beyond what their context description says.\n"
f"3. Use honest language: 'adjacent experience in', 'strong foundation applicable to', "
f" 'directly transferable background in', etc.\n"
f"4. Return ONLY the single new bullet text — no prefix, no explanation."
f"{voice_note}\n\n"
f"Existing bullets for context:\n"
+ "\n".join(f"{b}" for b in bullets[:3])
)
try:
new_bullet = router.complete(prompt).strip()
new_bullet = re.sub(r"^[•\-–—*◦▪▸►]\s*", "", new_bullet).strip()
if new_bullet:
bullets.append(new_bullet)
new_entry = dict(entry)
new_entry["bullets"] = bullets
updated["experience"][best_entry_idx] = new_entry
except Exception:
log.warning(
"[resume_optimizer] frame_skill_gaps adjacent failed for skill %r", skill,
exc_info=True,
)
# ── Learning framing: add structured note to skills list ──────────────────
if learning_framings:
skills = list(updated.get("skills") or [])
for framing in learning_framings:
skill = framing["skill"]
context = framing["context"].strip()
# Format: "Developing: Kubernetes (strong Docker/container orchestration background)"
note = f"Developing: {skill} ({context})" if context else f"Developing: {skill}"
if note not in skills:
skills.append(note)
updated["skills"] = skills
return updated
def _find_most_relevant_entry(
experience: list[dict],
skill: str,
) -> int | None:
"""Return the index of the experience entry most relevant to a skill term.
Uses simple keyword overlap between the skill and entry title/bullets.
Falls back to the most recent (first) entry if no match found.
"""
if not experience:
return None
skill_words = set(skill.lower().split())
best_idx = 0
best_score = -1
for i, entry in enumerate(experience):
entry_text = (
(entry.get("title") or "") + " " +
" ".join(entry.get("bullets") or [])
).lower()
entry_words = set(entry_text.split())
score = len(skill_words & entry_words)
if score > best_score:
best_score = score
best_idx = i
return best_idx
def apply_review_decisions(
draft: dict[str, Any],
decisions: dict[str, Any],
) -> dict[str, Any]:
"""Apply user section-level review decisions to the rewritten struct.
Handles approved skills, summary accept/reject, and per-entry experience
accept/reject. Returns the updated struct; does not call the LLM.
Args:
draft: The review draft dict from build_review_diff (contains
"sections" and "rewritten_struct").
decisions: Dict of per-section decisions from the review UI:
skills: {"approved_additions": [...]}
summary: {"accepted": bool}
experience: {"accepted_entries": [{"title", "company", "accepted"}]}
Returns:
Updated resume struct ready for gap framing and final render.
"""
struct = dict(draft.get("rewritten_struct") or {})
sections = draft.get("sections") or []
# ── Skills: keep original + only approved additions ────────────────────
skills_decision = decisions.get("skills", {})
approved_additions = set(skills_decision.get("approved_additions") or [])
for sec in sections:
if sec["section"] == "skills":
original_kept = set(sec.get("kept") or [])
struct["skills"] = sorted(original_kept | approved_additions)
break
# ── Summary: accept/reject + optional user-edited text ─────────────────
summary_dec = decisions.get("summary", {})
if not summary_dec.get("accepted", True):
for sec in sections:
if sec["section"] == "summary":
struct["career_summary"] = sec.get("original", struct.get("career_summary", ""))
break
else:
edited_text = summary_dec.get("edited_text")
if edited_text is not None:
struct["career_summary"] = edited_text.strip()
# ── Experience: per-entry accept/reject + optional user-edited bullets ──
exp_entry_map: dict[str, dict] = {
f"{ed.get('title', '')}|{ed.get('company', '')}": ed
for ed in (decisions.get("experience", {}).get("accepted_entries") or [])
}
for sec in sections:
if sec["section"] == "experience":
for entry_diff in (sec.get("entries") or []):
key = f"{entry_diff['title']}|{entry_diff['company']}"
entry_dec = exp_entry_map.get(key, {})
accepted = entry_dec.get("accepted", True)
edited_bullets = entry_dec.get("edited_bullets")
for exp_entry in (struct.get("experience") or []):
if (exp_entry.get("title") == entry_diff["title"] and
exp_entry.get("company") == entry_diff["company"]):
if not accepted:
exp_entry["bullets"] = entry_diff["original_bullets"]
elif edited_bullets is not None:
exp_entry["bullets"] = [b for b in edited_bullets if b.strip()]
break
return struct
# ── Hallucination guard ─────────────────────────────────────────────────────── # ── Hallucination guard ───────────────────────────────────────────────────────
def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool: def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
@ -674,207 +437,3 @@ def render_resume_text(resume: dict[str, Any]) -> str:
lines.append("") lines.append("")
return "\n".join(lines) return "\n".join(lines)
# ── Review diff builder ────────────────────────────────────────────────────────
def build_review_diff(
original: dict[str, Any],
rewritten: dict[str, Any],
) -> dict[str, Any]:
"""Build a structured diff between original and rewritten resume for the review UI.
Returns a dict with:
sections: list of per-section diffs
rewritten_struct: the full rewritten resume dict (used by finalize endpoint)
Each section diff has:
section: "skills" | "summary" | "experience"
type: "skills_diff" | "text_diff" | "bullets_diff"
For skills_diff:
added: list of new skill strings (each requires user approval)
removed: list of removed skill strings
kept: list of unchanged skills
For text_diff (summary):
original: str
proposed: str
For bullets_diff (experience):
entries: list of {title, company, original_bullets, proposed_bullets}
"""
sections = []
# ── Skills diff ────────────────────────────────────────────────────────
orig_skills = set(s.strip() for s in (original.get("skills") or []))
new_skills = set(s.strip() for s in (rewritten.get("skills") or []))
added = sorted(new_skills - orig_skills)
removed = sorted(orig_skills - new_skills)
kept = sorted(orig_skills & new_skills)
if added or removed:
sections.append({
"section": "skills",
"type": "skills_diff",
"added": added,
"removed": removed,
"kept": kept,
})
# ── Summary diff ───────────────────────────────────────────────────────
orig_summary = (original.get("career_summary") or "").strip()
new_summary = (rewritten.get("career_summary") or "").strip()
if orig_summary != new_summary and new_summary:
sections.append({
"section": "summary",
"type": "text_diff",
"original": orig_summary,
"proposed": new_summary,
})
# ── Experience diff ────────────────────────────────────────────────────
orig_exp = original.get("experience") or []
new_exp = rewritten.get("experience") or []
entry_diffs = []
for orig_entry, new_entry in zip(orig_exp, new_exp):
orig_bullets = orig_entry.get("bullets") or []
new_bullets = new_entry.get("bullets") or []
if orig_bullets != new_bullets:
entry_diffs.append({
"title": orig_entry.get("title", ""),
"company": orig_entry.get("company", ""),
"original_bullets": orig_bullets,
"proposed_bullets": new_bullets,
})
if entry_diffs:
sections.append({
"section": "experience",
"type": "bullets_diff",
"entries": entry_diffs,
})
return {
"sections": sections,
"rewritten_struct": rewritten,
}
# ── PDF export ─────────────────────────────────────────────────────────────────
def export_pdf(resume: dict[str, Any], output_path: str) -> None:
"""Render a structured resume dict to a clean PDF using reportlab.
Uses a single-column layout with section headers, consistent spacing,
and a readable sans-serif body font suitable for ATS submission.
Args:
resume: Structured resume dict (same format as resume_parser output).
output_path: Absolute path for the output .pdf file.
"""
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.units import inch
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib.enums import TA_CENTER, TA_LEFT
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
from reportlab.lib import colors
MARGIN = 0.75 * inch
name_style = ParagraphStyle(
"name", fontName="Helvetica-Bold", fontSize=16, leading=20,
alignment=TA_CENTER, spaceAfter=2,
)
contact_style = ParagraphStyle(
"contact", fontName="Helvetica", fontSize=9, leading=12,
alignment=TA_CENTER, spaceAfter=6,
textColor=colors.HexColor("#555555"),
)
section_style = ParagraphStyle(
"section", fontName="Helvetica-Bold", fontSize=10, leading=14,
spaceBefore=10, spaceAfter=2,
textColor=colors.HexColor("#1a1a2e"),
)
body_style = ParagraphStyle(
"body", fontName="Helvetica", fontSize=9, leading=13, alignment=TA_LEFT,
)
role_style = ParagraphStyle(
"role", fontName="Helvetica-Bold", fontSize=9, leading=13,
)
meta_style = ParagraphStyle(
"meta", fontName="Helvetica-Oblique", fontSize=8, leading=12,
textColor=colors.HexColor("#555555"), spaceAfter=2,
)
bullet_style = ParagraphStyle(
"bullet", fontName="Helvetica", fontSize=9, leading=13, leftIndent=12,
)
def hr():
return HRFlowable(width="100%", thickness=0.5,
color=colors.HexColor("#cccccc"),
spaceAfter=4, spaceBefore=2)
story = []
if resume.get("name"):
story.append(Paragraph(resume["name"], name_style))
contact_parts = [p for p in (
resume.get("email", ""), resume.get("phone", ""),
resume.get("location", ""), resume.get("linkedin", ""),
) if p]
if contact_parts:
story.append(Paragraph(" | ".join(contact_parts), contact_style))
story.append(hr())
summary = (resume.get("career_summary") or "").strip()
if summary:
story.append(Paragraph("SUMMARY", section_style))
story.append(hr())
story.append(Paragraph(summary, body_style))
story.append(Spacer(1, 4))
if resume.get("experience"):
story.append(Paragraph("EXPERIENCE", section_style))
story.append(hr())
for exp in resume["experience"]:
dates = f"{exp.get('start_date', '')}{exp.get('end_date', '')}"
story.append(Paragraph(
f"{exp.get('title', '')} | {exp.get('company', '')}", role_style
))
story.append(Paragraph(dates, meta_style))
for bullet in (exp.get("bullets") or []):
story.append(Paragraph(f"{bullet}", bullet_style))
story.append(Spacer(1, 4))
if resume.get("education"):
story.append(Paragraph("EDUCATION", section_style))
story.append(hr())
for edu in resume["education"]:
degree = f"{edu.get('degree', '')} {edu.get('field', '')}".strip()
story.append(Paragraph(
f"{degree} | {edu.get('institution', '')} {edu.get('graduation_year', '')}".strip(),
body_style,
))
story.append(Spacer(1, 4))
if resume.get("skills"):
story.append(Paragraph("SKILLS", section_style))
story.append(hr())
story.append(Paragraph(", ".join(resume["skills"]), body_style))
story.append(Spacer(1, 4))
if resume.get("achievements"):
story.append(Paragraph("ACHIEVEMENTS", section_style))
story.append(hr())
for a in resume["achievements"]:
story.append(Paragraph(f"{a}", bullet_style))
doc = SimpleDocTemplate(
output_path, pagesize=LETTER,
leftMargin=MARGIN, rightMargin=MARGIN,
topMargin=MARGIN, bottomMargin=MARGIN,
)
doc.build(story)

View file

@ -9,9 +9,11 @@ Falls back to empty dict on unrecoverable errors — caller shows the form build
from __future__ import annotations from __future__ import annotations
import io import io
import json
import logging import logging
import re import re
import zipfile import zipfile
from pathlib import Path
from xml.etree import ElementTree as ET from xml.etree import ElementTree as ET
import pdfplumber import pdfplumber

View file

@ -1,217 +0,0 @@
"""
Resume format transform library profile.
Converts between:
- Library format: struct_json produced by resume_parser.parse_resume()
{name, email, phone, career_summary, experience[{title,company,start_date,end_date,location,bullets[]}],
education[{institution,degree,field,start_date,end_date}], skills[], achievements[]}
- Profile content format: ResumePayload content fields (plain_text_resume.yaml)
{name, surname, email, phone, career_summary,
experience[{title,company,period,location,industry,responsibilities,skills[]}],
education[{institution,degree,field,start_date,end_date}],
skills[], achievements[]}
Profile metadata fields (salary, work prefs, self-ID, PII) are never touched here.
License: MIT
"""
from __future__ import annotations
from datetime import date
from typing import Any
_CONTENT_FIELDS = frozenset({
"name", "surname", "email", "phone", "career_summary",
"experience", "skills", "education", "achievements",
})
def library_to_profile_content(struct_json: dict[str, Any]) -> dict[str, Any]:
"""Transform a library struct_json to ResumePayload content fields.
Returns only content fields. Caller is responsible for merging with existing
metadata fields (salary, preferences, self-ID) so they are not overwritten.
Lossy for experience[].industry (always blank parser does not capture it).
name is split on first space into name/surname.
"""
full_name: str = struct_json.get("name") or ""
parts = full_name.split(" ", 1)
name = parts[0]
surname = parts[1] if len(parts) > 1 else ""
experience = []
for exp in struct_json.get("experience") or []:
start = (exp.get("start_date") or "").strip()
end = (exp.get("end_date") or "").strip()
if start and end:
period = f"{start} \u2013 {end}"
elif start:
period = start
elif end:
period = end
else:
period = ""
bullets: list[str] = exp.get("bullets") or []
responsibilities = "\n".join(b for b in bullets if b)
experience.append({
"title": exp.get("title") or "",
"company": exp.get("company") or "",
"period": period,
"location": exp.get("location") or "",
"industry": "", # not captured by parser
"responsibilities": responsibilities,
"skills": [],
})
education = []
for edu in struct_json.get("education") or []:
education.append({
"institution": edu.get("institution") or "",
"degree": edu.get("degree") or "",
"field": edu.get("field") or "",
"start_date": edu.get("start_date") or "",
"end_date": edu.get("end_date") or "",
})
return {
"name": name,
"surname": surname,
"email": struct_json.get("email") or "",
"phone": struct_json.get("phone") or "",
"career_summary": struct_json.get("career_summary") or "",
"experience": experience,
"skills": list(struct_json.get("skills") or []),
"education": education,
"achievements": list(struct_json.get("achievements") or []),
}
def profile_to_library(payload: dict[str, Any]) -> tuple[str, dict[str, Any]]:
"""Transform ResumePayload content fields to (plain_text, struct_json).
Inverse of library_to_profile_content. The plain_text is a best-effort
reconstruction for display and re-parsing. struct_json is the canonical
structured representation stored in the resumes table.
"""
name_parts = [payload.get("name") or "", payload.get("surname") or ""]
full_name = " ".join(p for p in name_parts if p).strip()
career_summary = (payload.get("career_summary") or "").strip()
lines: list[str] = []
if full_name:
lines.append(full_name)
email = payload.get("email") or ""
phone = payload.get("phone") or ""
if email:
lines.append(email)
if phone:
lines.append(phone)
if career_summary:
lines += ["", "SUMMARY", career_summary]
experience_structs = []
for exp in payload.get("experience") or []:
title = (exp.get("title") or "").strip()
company = (exp.get("company") or "").strip()
period = (exp.get("period") or "").strip()
location = (exp.get("location") or "").strip()
# Split period back to start_date / end_date.
# Split on the dash/dash separator BEFORE normalising to plain hyphens
# so that ISO dates like "2023-01 2025-03" round-trip correctly.
if "\u2013" in period: # en-dash
date_parts = [p.strip() for p in period.split("\u2013", 1)]
elif "\u2014" in period: # em-dash
date_parts = [p.strip() for p in period.split("\u2014", 1)]
else:
date_parts = [period.strip()] if period.strip() else []
start_date = date_parts[0] if date_parts else ""
end_date = date_parts[1] if len(date_parts) > 1 else ""
resp = (exp.get("responsibilities") or "").strip()
bullets = [b.strip() for b in resp.split("\n") if b.strip()]
if title or company:
header = " | ".join(p for p in [title, company, period] if p)
lines += ["", header]
if location:
lines.append(location)
for b in bullets:
lines.append(f"\u2022 {b}")
experience_structs.append({
"title": title,
"company": company,
"start_date": start_date,
"end_date": end_date,
"location": location,
"bullets": bullets,
})
skills: list[str] = list(payload.get("skills") or [])
if skills:
lines += ["", "SKILLS", ", ".join(skills)]
education_structs = []
for edu in payload.get("education") or []:
institution = (edu.get("institution") or "").strip()
degree = (edu.get("degree") or "").strip()
field = (edu.get("field") or "").strip()
start_date = (edu.get("start_date") or "").strip()
end_date = (edu.get("end_date") or "").strip()
if institution or degree:
label = " ".join(p for p in [degree, field] if p)
lines.append(f"{label} \u2014 {institution}" if institution else label)
education_structs.append({
"institution": institution,
"degree": degree,
"field": field,
"start_date": start_date,
"end_date": end_date,
})
achievements: list[str] = list(payload.get("achievements") or [])
struct_json: dict[str, Any] = {
"name": full_name,
"email": email,
"phone": phone,
"career_summary": career_summary,
"experience": experience_structs,
"skills": skills,
"education": education_structs,
"achievements": achievements,
}
plain_text = "\n".join(lines).strip()
return plain_text, struct_json
def make_auto_backup_name(source_name: str) -> str:
"""Generate a timestamped auto-backup name.
Example: "Auto-backup before Senior Engineer Resume — 2026-04-16"
"""
today = date.today().isoformat()
return f"Auto-backup before {source_name} \u2014 {today}"
def blank_fields_on_import(struct_json: dict[str, Any]) -> list[str]:
"""Return content field names that will be blank after a library→profile import.
Used to warn the user in the confirmation modal so they know what to fill in.
"""
blank: list[str] = []
if struct_json.get("experience"):
# industry is always blank — parser never captures it
blank.append("experience[].industry")
# location may be blank for some entries
if any(not (e.get("location") or "").strip() for e in struct_json["experience"]):
blank.append("experience[].location")
return blank

View file

@ -57,7 +57,7 @@ _TIMEOUT = 12
def _detect_board(url: str) -> str: def _detect_board(url: str) -> str:
"""Return 'linkedin', 'indeed', 'glassdoor', 'jobgether', 'oracle_hcm', or 'generic'.""" """Return 'linkedin', 'indeed', 'glassdoor', or 'generic'."""
url_lower = url.lower() url_lower = url.lower()
if "linkedin.com" in url_lower: if "linkedin.com" in url_lower:
return "linkedin" return "linkedin"
@ -67,8 +67,6 @@ def _detect_board(url: str) -> str:
return "glassdoor" return "glassdoor"
if "jobgether.com" in url_lower: if "jobgether.com" in url_lower:
return "jobgether" return "jobgether"
if "oraclecloud.com" in url_lower and "hcmui" in url_lower:
return "oracle_hcm"
return "generic" return "generic"
@ -203,70 +201,6 @@ def _scrape_jobgether(url: str) -> dict:
return {"company": company, "source": "jobgether"} if company else {} return {"company": company, "source": "jobgether"} if company else {}
def _scrape_oracle_hcm(url: str) -> dict:
"""Scrape an Oracle HCM CandidateExperience job page via Playwright.
Oracle HCM portals are React SPAs that require JS execution. The prospect
token in the URL path grants public access no auth needed.
"""
try:
from playwright.sync_api import sync_playwright
except ImportError:
print("[scrape_url] Oracle HCM: Playwright not installed, falling back to generic")
return _scrape_generic(url)
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
try:
ctx = browser.new_context(user_agent=_HEADERS["User-Agent"])
page = ctx.new_page()
page.goto(url, timeout=30_000)
page.wait_for_load_state("networkidle", timeout=20_000)
result = page.evaluate("""() => {
const sel = (s) => document.querySelector(s)?.textContent?.trim() || '';
const selInner = (s) => document.querySelector(s)?.innerText?.trim() || '';
// Title: try known HCM selectors then fall back to first h1
const title = sel('[class*="requisition-title"]')
|| sel('[class*="JobTitle"]')
|| sel('.job-title')
|| sel('h1');
// Company: page header logo alt text, meta, or site-name span
const companyMeta = document.querySelector('meta[property="og:site_name"]')
?.getAttribute('content') || '';
const company = sel('[class*="company-name"]')
|| sel('[class*="siteName"]')
|| sel('[class*="site-name"]')
|| companyMeta;
// Location: job detail list items
const location = sel('[class*="job-location"]')
|| sel('[data-testid*="location"]')
|| sel('[class*="location"]');
// Description: main content div
const description = selInner('[class*="job-description"]')
|| selInner('[class*="requisition-description"]')
|| selInner('[class*="JobDescription"]')
|| selInner('main article')
|| selInner('main');
return { title, company, location, description };
}""")
finally:
browser.close()
result["source"] = "oracle_hcm"
return {k: v for k, v in result.items() if v}
except Exception as exc:
print(f"[scrape_url] Oracle HCM Playwright error for {url}: {exc}")
return {}
def _parse_json_ld_or_og(html: str) -> dict: def _parse_json_ld_or_og(html: str) -> dict:
"""Extract job fields from JSON-LD structured data, then og: meta tags.""" """Extract job fields from JSON-LD structured data, then og: meta tags."""
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
@ -344,8 +278,6 @@ def scrape_job_url(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
fields = _scrape_glassdoor(url) fields = _scrape_glassdoor(url)
elif board == "jobgether": elif board == "jobgether":
fields = _scrape_jobgether(url) fields = _scrape_jobgether(url)
elif board == "oracle_hcm":
fields = _scrape_oracle_hcm(url)
else: else:
fields = _scrape_generic(url) fields = _scrape_generic(url)
except requests.RequestException as exc: except requests.RequestException as exc:

View file

@ -1,85 +0,0 @@
# MIT License — see LICENSE
"""Survey assistant: prompt builders and LLM inference for culture-fit survey analysis.
Extracted from dev-api.py so task_runner can import this without importing the
FastAPI application. Callable directly or via the survey_analyze background task.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Optional
log = logging.getLogger(__name__)
SURVEY_SYSTEM = (
"You are a job application advisor helping a candidate answer a culture-fit survey. "
"The candidate values collaborative teamwork, clear communication, growth, and impact. "
"Choose answers that present them in the best professional light."
)
def build_text_prompt(text: str, mode: str) -> str:
if mode == "quick":
return (
"Answer each survey question below. For each, give ONLY the letter of the best "
"option and a single-sentence reason. Format exactly as:\n"
"1. B — reason here\n2. A — reason here\n\n"
f"Survey:\n{text}"
)
return (
"Analyze each survey question below. For each question:\n"
"- Briefly evaluate each option (1 sentence each)\n"
"- State your recommendation with reasoning\n\n"
f"Survey:\n{text}"
)
def build_image_prompt(mode: str) -> str:
if mode == "quick":
return (
"This is a screenshot of a culture-fit survey. Read all questions and answer each "
"with the letter of the best option for a collaborative, growth-oriented candidate. "
"Format: '1. B — brief reason' on separate lines."
)
return (
"This is a screenshot of a culture-fit survey. For each question, evaluate each option "
"and recommend the best choice for a collaborative, growth-oriented candidate. "
"Include a brief breakdown per option and a clear recommendation."
)
def run_survey_analyze(
text: Optional[str],
image_b64: Optional[str],
mode: str,
config_path: Optional[Path] = None,
) -> dict:
"""Run LLM inference for survey analysis.
Returns {"output": str, "source": "text_paste" | "screenshot"}.
Raises on LLM failure caller is responsible for error handling.
"""
from scripts.llm_router import LLMRouter
router = LLMRouter(config_path=config_path) if config_path else LLMRouter()
if image_b64:
prompt = build_image_prompt(mode)
output = router.complete(
prompt,
images=[image_b64],
fallback_order=router.config.get("vision_fallback_order"),
)
source = "screenshot"
else:
prompt = build_text_prompt(text or "", mode)
output = router.complete(
prompt,
system=SURVEY_SYSTEM,
fallback_order=router.config.get("research_fallback_order"),
)
source = "text_paste"
return {"output": output, "source": source}

View file

@ -16,61 +16,6 @@ from pathlib import Path
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def _normalize_aihawk_resume(raw: dict) -> dict:
"""Convert a plain_text_resume.yaml (AIHawk format) into the optimizer struct.
Handles two AIHawk variants:
- Newer Peregrine wizard output: already uses bullets/start_date/end_date/career_summary
- Older raw AIHawk format: uses responsibilities (str), period ("YYYY Present")
"""
import re as _re
def _split_responsibilities(text: str) -> list[str]:
lines = [ln.strip() for ln in text.strip().splitlines() if ln.strip()]
return lines if lines else [text.strip()]
def _parse_period(period: str) -> tuple[str, str]:
parts = _re.split(r"\s*[–—-]\s*", period, maxsplit=1)
start = parts[0].strip() if parts else ""
end = parts[1].strip() if len(parts) > 1 else "Present"
return start, end
experience = []
for entry in raw.get("experience", []):
if "responsibilities" in entry:
bullets = _split_responsibilities(entry["responsibilities"])
else:
bullets = entry.get("bullets", [])
if "period" in entry:
start_date, end_date = _parse_period(entry["period"])
else:
start_date = entry.get("start_date", "")
end_date = entry.get("end_date", "Present")
experience.append({
"title": entry.get("title", ""),
"company": entry.get("company", ""),
"start_date": start_date,
"end_date": end_date,
"bullets": bullets,
})
# career_summary may be a string or absent; assessment field is a legacy bool in some profiles
career_summary = raw.get("career_summary", "")
if not isinstance(career_summary, str):
career_summary = ""
return {
"career_summary": career_summary,
"experience": experience,
"education": raw.get("education", []),
"skills": raw.get("skills", []),
"achievements": raw.get("achievements", []),
}
from scripts.db import ( from scripts.db import (
DEFAULT_DB, DEFAULT_DB,
insert_task, insert_task,
@ -251,12 +196,9 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
elif task_type == "company_research": elif task_type == "company_research":
from scripts.company_research import research_company from scripts.company_research import research_company
_cfg_dir = Path(db_path).parent / "config"
_user_llm_cfg = _cfg_dir / "llm.yaml"
result = research_company( result = research_company(
job, job,
on_stage=lambda s: update_task_stage(db_path, task_id, s), on_stage=lambda s: update_task_stage(db_path, task_id, s),
config_path=_user_llm_cfg if _user_llm_cfg.exists() else None,
) )
save_research(db_path, job_id=job_id, **result) save_research(db_path, job_id=job_id, **result)
@ -341,28 +283,17 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
prioritize_gaps, prioritize_gaps,
rewrite_for_ats, rewrite_for_ats,
hallucination_check, hallucination_check,
render_resume_text,
) )
from scripts.user_profile import load_user_profile from scripts.user_profile import load_user_profile
_user_yaml = Path(db_path).parent / "config" / "user.yaml"
description = job.get("description", "") description = job.get("description", "")
resume_path = load_user_profile(str(_user_yaml)).get("resume_path", "") resume_path = load_user_profile().get("resume_path", "")
# Parse the candidate's resume # Parse the candidate's resume
update_task_stage(db_path, task_id, "parsing resume") update_task_stage(db_path, task_id, "parsing resume")
_plain_yaml = Path(db_path).parent / "config" / "plain_text_resume.yaml" resume_text = Path(resume_path).read_text(errors="replace") if resume_path else ""
if resume_path and Path(resume_path).exists(): resume_struct, parse_err = structure_resume(resume_text)
resume_text = Path(resume_path).read_text(errors="replace")
resume_struct, parse_err = structure_resume(resume_text)
elif _plain_yaml.exists():
import yaml as _yaml
_raw = _yaml.safe_load(_plain_yaml.read_text(encoding="utf-8")) or {}
resume_struct = _normalize_aihawk_resume(_raw)
resume_text = resume_struct.get("career_summary", "")
parse_err = ""
else:
resume_text = ""
resume_struct, parse_err = structure_resume("")
# Extract keyword gaps and build gap report (free tier) # Extract keyword gaps and build gap report (free tier)
update_task_stage(db_path, task_id, "extracting keyword gaps") update_task_stage(db_path, task_id, "extracting keyword gaps")
@ -370,56 +301,21 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
prioritized = prioritize_gaps(gaps, resume_struct) prioritized = prioritize_gaps(gaps, resume_struct)
gap_report = _json.dumps(prioritized, indent=2) gap_report = _json.dumps(prioritized, indent=2)
# Full rewrite (paid tier only) → enters awaiting_review, not completed # Full rewrite (paid tier only)
rewritten_text = ""
p = _json.loads(params or "{}") p = _json.loads(params or "{}")
selected_gaps = p.get("selected_gaps", None)
if selected_gaps is not None:
selected_set = set(selected_gaps)
prioritized = [g for g in prioritized if g.get("term") in selected_set]
if p.get("full_rewrite", False): if p.get("full_rewrite", False):
update_task_stage(db_path, task_id, "rewriting resume sections") update_task_stage(db_path, task_id, "rewriting resume sections")
candidate_voice = load_user_profile(str(_user_yaml)).get("candidate_voice", "") candidate_voice = load_user_profile().get("candidate_voice", "")
rewritten = rewrite_for_ats(resume_struct, prioritized, job, candidate_voice) rewritten = rewrite_for_ats(resume_struct, prioritized, job, candidate_voice)
if hallucination_check(resume_struct, rewritten): if hallucination_check(resume_struct, rewritten):
from scripts.resume_optimizer import build_review_diff rewritten_text = render_resume_text(rewritten)
from scripts.db import save_resume_draft
draft = build_review_diff(resume_struct, rewritten)
# Attach gap report to draft for reference in the review UI
draft["gap_report"] = prioritized
save_resume_draft(db_path, job_id=job_id,
draft_json=_json.dumps(draft))
# Save gap report now; final text written after user review
save_optimized_resume(db_path, job_id=job_id,
text="", gap_report=gap_report)
# Park task in awaiting_review — finalize endpoint resolves it
update_task_status(db_path, task_id, "awaiting_review")
return
else: else:
log.warning("[task_runner] resume_optimize hallucination check failed for job %d", job_id) log.warning("[task_runner] resume_optimize hallucination check failed for job %d", job_id)
save_optimized_resume(db_path, job_id=job_id,
text="", gap_report=gap_report)
else:
# Gap-only run (free tier): save report, no draft
save_optimized_resume(db_path, job_id=job_id,
text="", gap_report=gap_report)
elif task_type == "survey_analyze": save_optimized_resume(db_path, job_id=job_id,
import json as _json text=rewritten_text,
from scripts.survey_assistant import run_survey_analyze gap_report=gap_report)
p = _json.loads(params or "{}")
_cfg_path = Path(db_path).parent / "config" / "llm.yaml"
update_task_stage(db_path, task_id, "analyzing survey")
result = run_survey_analyze(
text=p.get("text"),
image_b64=p.get("image_b64"),
mode=p.get("mode", "quick"),
config_path=_cfg_path if _cfg_path.exists() else None,
)
update_task_status(
db_path, task_id, "completed",
error=_json.dumps(result),
)
return
elif task_type == "prepare_training": elif task_type == "prepare_training":
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT

View file

@ -15,13 +15,14 @@ Public API (unchanged — callers do not need to change):
from __future__ import annotations from __future__ import annotations
import logging import logging
import os
import threading import threading
from pathlib import Path from pathlib import Path
from typing import Callable, Optional from typing import Callable, Optional
from circuitforge_core.tasks.scheduler import ( from circuitforge_core.tasks.scheduler import (
LocalScheduler as _CoreTaskScheduler, TaskSpec, # re-export unchanged
TaskSpec, # noqa: F401 — re-exported as part of public API; tests import from here TaskScheduler as _CoreTaskScheduler,
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -33,7 +34,6 @@ LLM_TASK_TYPES: frozenset[str] = frozenset({
"company_research", "company_research",
"wizard_generate", "wizard_generate",
"resume_optimize", "resume_optimize",
"survey_analyze",
}) })
# Conservative peak VRAM estimates (GB) per task type. # Conservative peak VRAM estimates (GB) per task type.
@ -43,7 +43,6 @@ DEFAULT_VRAM_BUDGETS: dict[str, float] = {
"company_research": 5.0, # llama3.1:8b or vllm model "company_research": 5.0, # llama3.1:8b or vllm model
"wizard_generate": 2.5, # same model family as cover_letter "wizard_generate": 2.5, # same model family as cover_letter
"resume_optimize": 5.0, # section-by-section rewrite; same budget as research "resume_optimize": 5.0, # section-by-section rewrite; same budget as research
"survey_analyze": 2.5, # text: phi3:mini; visual: vision service (own VRAM pool)
} }
_DEFAULT_MAX_QUEUE_DEPTH = 500 _DEFAULT_MAX_QUEUE_DEPTH = 500
@ -95,6 +94,15 @@ class TaskScheduler(_CoreTaskScheduler):
def __init__(self, db_path: Path, run_task_fn: Callable) -> None: def __init__(self, db_path: Path, run_task_fn: Callable) -> None:
budgets, max_depth = _load_config_overrides(db_path) budgets, max_depth = _load_config_overrides(db_path)
# Resolve VRAM using module-level _get_gpus so tests can monkeypatch it
try:
gpus = _get_gpus()
available_vram: float = (
sum(g["vram_total_gb"] for g in gpus) if gpus else 999.0
)
except Exception:
available_vram = 999.0
# Warn under this module's logger for any task types with no VRAM budget # Warn under this module's logger for any task types with no VRAM budget
# (mirrors the core warning but captures under scripts.task_scheduler # (mirrors the core warning but captures under scripts.task_scheduler
# so existing tests using caplog.at_level(logger="scripts.task_scheduler") pass) # so existing tests using caplog.at_level(logger="scripts.task_scheduler") pass)
@ -105,12 +113,19 @@ class TaskScheduler(_CoreTaskScheduler):
"defaulting to 0.0 GB (unlimited concurrency for this type)", t "defaulting to 0.0 GB (unlimited concurrency for this type)", t
) )
coordinator_url = os.environ.get(
"CF_ORCH_URL", "http://localhost:7700"
).rstrip("/")
super().__init__( super().__init__(
db_path=db_path, db_path=db_path,
run_task_fn=run_task_fn, run_task_fn=run_task_fn,
task_types=LLM_TASK_TYPES, task_types=LLM_TASK_TYPES,
vram_budgets=budgets, vram_budgets=budgets,
available_vram_gb=available_vram,
max_queue_depth=max_depth, max_queue_depth=max_depth,
coordinator_url=coordinator_url,
service_name="peregrine",
) )
def enqueue( def enqueue(

View file

@ -29,7 +29,6 @@ _DEFAULTS = {
"tier": "free", "tier": "free",
"dev_tier_override": None, "dev_tier_override": None,
"wizard_complete": False, "wizard_complete": False,
"training_export_opt_in": False,
"wizard_step": 0, "wizard_step": 0,
"dismissed_banners": [], "dismissed_banners": [],
"ui_preference": "streamlit", "ui_preference": "streamlit",
@ -78,7 +77,6 @@ class UserProfile:
self.tier: str = data.get("tier", "free") self.tier: str = data.get("tier", "free")
self.dev_tier_override: str | None = data.get("dev_tier_override") or None self.dev_tier_override: str | None = data.get("dev_tier_override") or None
self.wizard_complete: bool = bool(data.get("wizard_complete", False)) self.wizard_complete: bool = bool(data.get("wizard_complete", False))
self.training_export_opt_in: bool = bool(data.get("training_export_opt_in", False))
self.wizard_step: int = int(data.get("wizard_step", 0)) self.wizard_step: int = int(data.get("wizard_step", 0))
self.dismissed_banners: list[str] = list(data.get("dismissed_banners", [])) self.dismissed_banners: list[str] = list(data.get("dismissed_banners", []))
raw_pref = data.get("ui_preference", "streamlit") raw_pref = data.get("ui_preference", "streamlit")
@ -106,7 +104,6 @@ class UserProfile:
"tier": self.tier, "tier": self.tier,
"dev_tier_override": self.dev_tier_override, "dev_tier_override": self.dev_tier_override,
"wizard_complete": self.wizard_complete, "wizard_complete": self.wizard_complete,
"training_export_opt_in": self.training_export_opt_in,
"wizard_step": self.wizard_step, "wizard_step": self.wizard_step,
"dismissed_banners": self.dismissed_banners, "dismissed_banners": self.dismissed_banners,
"ui_preference": self.ui_preference, "ui_preference": self.ui_preference,

View file

@ -1,5 +1,5 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# install.sh — Peregrine dependency installer # setup.sh — Peregrine dependency installer
# Installs Docker, Docker Compose v2, and (optionally) NVIDIA Container Toolkit. # Installs Docker, Docker Compose v2, and (optionally) NVIDIA Container Toolkit.
# Supports: Ubuntu/Debian, Fedora/RHEL/CentOS, Arch Linux, macOS (Homebrew). # Supports: Ubuntu/Debian, Fedora/RHEL/CentOS, Arch Linux, macOS (Homebrew).
# Windows: not supported — use WSL2 with Ubuntu. # Windows: not supported — use WSL2 with Ubuntu.
@ -90,11 +90,15 @@ configure_git_safe_dir() {
} }
activate_git_hooks() { activate_git_hooks() {
local repo_dir local repo_dir hooks_installer
repo_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" repo_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [[ -d "$repo_dir/.githooks" ]]; then hooks_installer="/Library/Development/CircuitForge/circuitforge-hooks/install.sh"
if [[ -f "$hooks_installer" ]]; then
bash "$hooks_installer" --quiet
success "CircuitForge hooks activated (circuitforge-hooks)."
elif [[ -d "$repo_dir/.githooks" ]]; then
git -C "$repo_dir" config core.hooksPath .githooks git -C "$repo_dir" config core.hooksPath .githooks
success "Git hooks activated (.githooks/)." success "Git hooks activated (.githooks/) — circuitforge-hooks not found, using local fallback."
fi fi
} }
@ -337,31 +341,6 @@ setup_env() {
fi fi
} }
# ── License key (optional) ────────────────────────────────────────────────────
capture_license_key() {
[[ ! -t 0 ]] && return # skip in non-interactive installs (curl | bash)
local env_file
env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
[[ ! -f "$env_file" ]] && return # setup_env() creates it; nothing to write into yet
echo ""
info "License key (optional)"
echo -e " Peregrine works without a key for personal self-hosted use."
echo -e " Paid-tier users: enter your ${YELLOW}CFG-XXXX-…${NC} key to unlock cloud LLM and integrations."
echo ""
read -rp " CircuitForge license key [press Enter to skip]: " _key || true
if [[ -n "$_key" ]]; then
if echo "$_key" | grep -qE '^CFG-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}$'; then
_update_env_key "$env_file" "CF_LICENSE_KEY" "$_key"
_update_env_key "$env_file" "HEIMDALL_URL" "https://license.circuitforge.tech"
success "License key saved — paid-tier features enabled."
else
warn "Key format looks wrong (expected CFG-XXXX-AAAA-BBBB-CCCC) — skipping."
info "Add it manually to .env as CF_LICENSE_KEY= later."
fi
fi
}
# ── Model weights storage ─────────────────────────────────────────────────────── # ── Model weights storage ───────────────────────────────────────────────────────
_update_env_key() { _update_env_key() {
# Portable in-place key=value update for .env files (Linux + macOS). # Portable in-place key=value update for .env files (Linux + macOS).
@ -437,15 +416,8 @@ main() {
fi fi
install_ollama_macos install_ollama_macos
setup_env setup_env
capture_license_key
configure_model_paths configure_model_paths
# Read the actual port from .env so next-steps reflects any customisation
local _script_dir _port
_script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
_port="$(grep -E '^STREAMLIT_PORT=' "$_script_dir/.env" 2>/dev/null | cut -d= -f2-)"
_port="${_port:-8502}"
echo "" echo ""
success "All dependencies installed." success "All dependencies installed."
echo "" echo ""
@ -457,7 +429,7 @@ main() {
else else
echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (CPU)" echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (CPU)"
fi fi
echo -e " 2. Open ${YELLOW}http://localhost:${_port}${NC} — the setup wizard will guide you" echo -e " 2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)" echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
echo "" echo ""
if groups "$USER" 2>/dev/null | grep -q docker; then if groups "$USER" 2>/dev/null | grep -q docker; then

View file

@ -12,7 +12,7 @@ import pytest
from dotenv import load_dotenv from dotenv import load_dotenv
from playwright.sync_api import Page, BrowserContext from playwright.sync_api import Page, BrowserContext
from tests.e2e.models import ErrorRecord, ModeConfig from tests.e2e.models import ErrorRecord, ModeConfig, diff_errors
from tests.e2e.modes.demo import DEMO from tests.e2e.modes.demo import DEMO
from tests.e2e.modes.cloud import CLOUD from tests.e2e.modes.cloud import CLOUD
from tests.e2e.modes.local import LOCAL from tests.e2e.modes.local import LOCAL

View file

@ -9,9 +9,9 @@ from __future__ import annotations
import pytest import pytest
from tests.e2e.conftest import ( from tests.e2e.conftest import (
wait_for_streamlit, screenshot_on_fail, wait_for_streamlit, get_page_errors, screenshot_on_fail,
) )
from tests.e2e.models import diff_errors from tests.e2e.models import ModeConfig, diff_errors
from tests.e2e.pages.home_page import HomePage from tests.e2e.pages.home_page import HomePage
from tests.e2e.pages.job_review_page import JobReviewPage from tests.e2e.pages.job_review_page import JobReviewPage
from tests.e2e.pages.apply_page import ApplyPage from tests.e2e.pages.apply_page import ApplyPage

View file

@ -7,7 +7,8 @@ Run: pytest tests/e2e/test_smoke.py --mode=demo
from __future__ import annotations from __future__ import annotations
import pytest import pytest
from tests.e2e.conftest import wait_for_streamlit, screenshot_on_fail from tests.e2e.conftest import wait_for_streamlit, get_page_errors, get_console_errors, screenshot_on_fail
from tests.e2e.models import ModeConfig
from tests.e2e.pages.home_page import HomePage from tests.e2e.pages.home_page import HomePage
from tests.e2e.pages.job_review_page import JobReviewPage from tests.e2e.pages.job_review_page import JobReviewPage
from tests.e2e.pages.apply_page import ApplyPage from tests.e2e.pages.apply_page import ApplyPage

Some files were not shown because too many files have changed in this diff Show more