Compare commits

...

No commits in common. "v0.4.1" and "main" have entirely different histories.
v0.4.1 ... main

198 changed files with 33367 additions and 3459 deletions

16
.env.e2e.example Normal file
View file

@ -0,0 +1,16 @@
# Peregrine E2E test harness credentials
# Copy to .env.e2e and fill in real values — .env.e2e is gitignored
HEIMDALL_ADMIN_TOKEN=changeme
HEIMDALL_URL=http://localhost:8900
# Cloud auth — Strategy A (preferred): Directus user/pass → fresh JWT per run
E2E_DIRECTUS_EMAIL=e2e@circuitforge.tech
E2E_DIRECTUS_PASSWORD=changeme
E2E_DIRECTUS_URL=http://172.31.0.2:8055
# Cloud auth — Strategy B (fallback): persistent JWT (uncomment to use)
# E2E_DIRECTUS_JWT=changeme
E2E_HEADLESS=true
E2E_SLOW_MO=0

View file

@ -12,10 +12,21 @@ VISION_REVISION=2025-01-09
DOCS_DIR=~/Documents/JobSearch
OLLAMA_MODELS_DIR=~/models/ollama
VLLM_MODELS_DIR=~/models/vllm
VLLM_MODEL=Ouro-1.4B
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
OLLAMA_DEFAULT_MODEL=llama3.2:3b
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
# Set any of these to configure LLM backends without needing a config/llm.yaml.
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
# API keys (required for remote profile)
ANTHROPIC_API_KEY=
OPENAI_COMPAT_URL=
@ -28,6 +39,12 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
# GITHUB_TOKEN= # future — enable when public mirror is active
# GITHUB_REPO= # future
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
CF_LICENSE_KEY=
CF_ORCH_URL=https://orch.circuitforge.tech
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
CLOUD_MODE=false
CLOUD_DATA_ROOT=/devl/menagerie-data

84
.githooks/pre-commit Executable file
View file

@ -0,0 +1,84 @@
#!/usr/bin/env bash
# .githooks/pre-commit — blocks sensitive files and credential patterns from being committed
set -euo pipefail
RED='\033[0;31m'; YELLOW='\033[1;33m'; BOLD='\033[1m'; NC='\033[0m'
BLOCKED=0
STAGED=$(git diff --cached --name-only --diff-filter=ACM 2>/dev/null)
if [[ -z "$STAGED" ]]; then
exit 0
fi
# ── Blocked filenames ──────────────────────────────────────────────────────────
BLOCKED_FILES=(
".env"
".env.local"
".env.production"
".env.staging"
"*.pem"
"*.key"
"*.p12"
"*.pfx"
"id_rsa"
"id_ecdsa"
"id_ed25519"
"id_dsa"
"*.ppk"
"secrets.yml"
"secrets.yaml"
"credentials.json"
"service-account*.json"
"*.keystore"
"htpasswd"
".htpasswd"
)
while IFS= read -r file; do
filename="$(basename "$file")"
for pattern in "${BLOCKED_FILES[@]}"; do
# shellcheck disable=SC2254
case "$filename" in
$pattern)
echo -e "${RED}BLOCKED:${NC} ${BOLD}$file${NC} matches blocked filename pattern '${YELLOW}$pattern${NC}'"
BLOCKED=1
;;
esac
done
done <<< "$STAGED"
# ── Blocked content patterns ───────────────────────────────────────────────────
declare -A CONTENT_PATTERNS=(
["RSA/EC private key header"]="-----BEGIN (RSA|EC|DSA|OPENSSH) PRIVATE KEY"
["AWS access key"]="AKIA[0-9A-Z]{16}"
["GitHub token"]="ghp_[A-Za-z0-9]{36}"
["Generic API key assignment"]="(api_key|API_KEY|secret_key|SECRET_KEY)\s*=\s*['\"][A-Za-z0-9_\-]{16,}"
["Stripe secret key"]="sk_(live|test)_[A-Za-z0-9]{24,}"
["Forgejo/Gitea token (40 hex chars)"]="[a-f0-9]{40}"
)
while IFS= read -r file; do
# Skip binary files
if git diff --cached -- "$file" | grep -qP "^\+.*\x00"; then
continue
fi
for label in "${!CONTENT_PATTERNS[@]}"; do
pattern="${CONTENT_PATTERNS[$label]}"
matches=$(git diff --cached -- "$file" | grep "^+" | grep -cP "$pattern" 2>/dev/null || true)
if [[ "$matches" -gt 0 ]]; then
echo -e "${RED}BLOCKED:${NC} ${BOLD}$file${NC} contains pattern matching '${YELLOW}$label${NC}'"
BLOCKED=1
fi
done
done <<< "$STAGED"
# ── Result ─────────────────────────────────────────────────────────────────────
if [[ "$BLOCKED" -eq 1 ]]; then
echo ""
echo -e "${RED}Commit rejected.${NC} Remove sensitive files/content before committing."
echo -e "To bypass in an emergency: ${YELLOW}git commit --no-verify${NC} (use with extreme caution)"
exit 1
fi
exit 0

View file

@ -0,0 +1,26 @@
---
name: Support Request
about: Ask a question or get help using Peregrine
title: '[Support] '
labels: question
assignees: ''
---
## What are you trying to do?
<!-- Describe what you're trying to accomplish -->
## What have you tried?
<!-- Steps you've already taken, docs you've read, etc. -->
## Environment
- OS: <!-- e.g. Ubuntu 22.04, macOS 14 -->
- Install method: <!-- Docker / Podman / source -->
- Peregrine version: <!-- run `./manage.sh status` or check the UI footer -->
- LLM backend: <!-- Ollama / vLLM / OpenAI / other -->
## Logs or screenshots
<!-- Paste relevant output from `./manage.sh logs` or attach a screenshot -->

View file

@ -13,12 +13,21 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Install system dependencies
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: pip
- name: Configure git credentials for Forgejo
env:
FORGEJO_TOKEN: ${{ secrets.FORGEJO_TOKEN }}
run: |
git config --global url."https://oauth2:${FORGEJO_TOKEN}@git.opensourcesolarpunk.com/".insteadOf "https://git.opensourcesolarpunk.com/"
- name: Install dependencies
run: pip install -r requirements.txt

9
.gitignore vendored
View file

@ -35,6 +35,9 @@ config/user.yaml.working
# Claude context files — kept out of version control
CLAUDE.md
.superpowers/
pytest-output.txt
docs/superpowers/
data/email_score.jsonl
data/email_label_queue.jsonl
@ -48,3 +51,9 @@ demo/seed_demo.py
# Git worktrees
.worktrees/
.env.e2e
# E2E test result artifacts
tests/e2e/results/demo/
tests/e2e/results/cloud/
tests/e2e/results/local/

View file

@ -9,7 +9,293 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
---
## [0.4.0] — 2026-03-13
## [0.8.5] — 2026-04-02
### Added
- **Vue onboarding wizard** — 7-step first-run setup replaces the Streamlit wizard
in the Vue SPA: Hardware detection → Tier → Resume upload/build → Identity →
Inference & API keys → Search preferences → Integrations. Progress saves to
`user.yaml` on every step; crash-recovery resumes from the last completed step.
- **Wizard API endpoints**`GET /api/wizard/status`, `POST /api/wizard/step`,
`GET /api/wizard/hardware`, `POST /api/wizard/inference/test`,
`POST /api/wizard/complete`. Inference test always soft-fails so Ollama being
unreachable never blocks setup completion.
- **Cloud auto-skip** — cloud instances automatically complete steps 1 (hardware),
2 (tier), and 5 (inference) and drop the user directly on the Resume step.
- **`wizardGuard` router gate** — all Vue routes require wizard completion; completed
users are bounced away from `/setup` to `/`.
- **Chip-input search step** — job titles and locations entered as press-Enter/comma
chips; validates at least one title before advancing.
- **Integrations tile grid** — optional step 7 shows Notion, Calendar, Slack, Discord,
Drive with paid-tier badges; skippable on Finish.
### Fixed
- **User config isolation: dangerous fallback removed**`_user_yaml_path()` fell
back to `/devl/job-seeker/config/user.yaml` (legacy profile) when `user.yaml`
didn't exist at the expected path; new users now get an empty dict instead of
another user's data. Affects profile, resume, search, and all wizard endpoints.
- **Resume path not user-isolated**`RESUME_PATH = Path("config/plain_text_resume.yaml")`
was a relative CWD path shared across all users. Replaced with `_resume_path()`
derived from `_user_yaml_path()` / `STAGING_DB`.
- **Resume upload silently returned empty data**`upload_resume` was passing a
file path string to `structure_resume()` which expects raw text; now reads bytes
and dispatches to the correct extractor (`extract_text_from_pdf` / `_docx` / `_odt`).
- **Wizard resume step read wrong envelope field**`WizardResumeStep.vue` read
`data.experience` but the upload response wraps parsed data under `data.data`.
---
## [0.8.4] — 2026-04-02
### Fixed
- **Cloud: cover letter used wrong user's profile**`generate_cover_letter.generate()`
loaded `_profile` from the global `config/user.yaml` at module import time, so all
cloud users got the default user's name, voice, and mission preferences in their
generated letters. `generate()` now accepts a `user_yaml_path` parameter; `task_runner`
derives it from the per-user config directory (`db_path/../config/user.yaml`) and
passes it through. `_build_system_context`, `_build_mission_notes`, `detect_mission_alignment`,
`build_prompt`, and `_trim_to_letter_end` all accept a `profile` override so the
per-call profile is used end-to-end without breaking CLI mode.
- **Apply Workspace: hardcoded config paths in cloud mode**`4_Apply.py` was loading
`_USER_YAML` and `RESUME_YAML` from the repo-root `config/` before `resolve_session()`
ran, so cloud users saw the global (Meg's) resume in the Apply tab. Both paths now
derive from `get_config_dir()` after session resolution.
### Changed
- **Vue SPA open to all tiers** — Vue 3 frontend is no longer gated behind the beta
flag; all tier users can switch to the Vue UI from Settings.
- **LLM model candidates** — vllm backend now tries Qwen2.5-3B first, Phi-4-mini
as fallback (was reversed). cf_orch allocation block added to vllm config.
- **Preflight** — removed `vllm` from Docker adoption list; vllm is now managed
entirely by cf-orch and should not be stubbed by preflight.
---
## [0.8.3] — 2026-04-01
### Fixed
- **CI: Forgejo auth** — GitHub Actions `pip install` was failing to fetch
`circuitforge-core` from the private Forgejo VCS URL. Added `FORGEJO_TOKEN`
repository secret and a `git config insteadOf` step to inject credentials
before `pip install`.
- **CI: settings API tests** — 6 `test_dev_api_settings` PUT/POST tests were
returning HTTP 500 in CI because `_user_yaml_path()` read the module-level
`DB_PATH` constant (frozen at import time), so `monkeypatch.setenv("STAGING_DB")`
had no effect. Fixed by reading `os.environ` at call time.
---
## [0.8.2] — 2026-04-01
### Fixed
- **CI pipeline**`pip install -r requirements.txt` was failing in GitHub Actions
because `-e ../circuitforge-core` requires a sibling directory that doesn't exist
in a single-repo checkout. Replaced with a `git+https://` VCS URL fallback;
`Dockerfile.cfcore` still installs from the local `COPY` to avoid redundant
network fetches during Docker builds.
- **Vue-nav reload loop**`sync_ui_cookie()` was calling
`window.parent.location.reload()` on every render when `user.yaml` has
`ui_preference: vue` but no Caddy proxy is in the traffic path (test instances,
bare Docker). Gated the reload on `PEREGRINE_CADDY_PROXY=1`; instances without
the env var set the cookie silently and skip the reload.
### Changed
- **cfcore VRAM lease integration** — the task scheduler now acquires a VRAM lease
from the cf-orch coordinator before running a batch of LLM tasks and releases it
when the batch completes. Visible in the coordinator dashboard at `:7700`.
- **`CF_ORCH_URL` env var** — scheduler reads coordinator address from
`CF_ORCH_URL` (default `http://localhost:7700`); set to
`http://host.docker.internal:7700` in Docker compose files so containers can
reach the host coordinator.
- **All compose files on `Dockerfile.cfcore`**`compose.yml`, `compose.cloud.yml`,
and `compose.test-cfcore.yml` all use the parent-context build. `build: .` is
removed from `compose.yml`.
---
## [0.8.1] — 2026-04-01
### Fixed
- **Job title suggester silent failure** — when the LLM returned empty arrays or
non-JSON text, the spinner would complete with zero UI feedback. Now shows an
explicit "No new suggestions found" info message with a resume-upload hint for
new users who haven't uploaded a resume yet.
- **Suggester exception handling** — catch `Exception` instead of only
`RuntimeError` so connection errors and `FileNotFoundError` (missing llm.yaml)
surface as error messages rather than crashing the page silently.
### Added
- **`Dockerfile.cfcore`** — parent-context Dockerfile that copies
`circuitforge-core/` alongside `peregrine/` before `pip install`, resolving
the `-e ../circuitforge-core` editable requirement inside Docker.
- **`compose.test-cfcore.yml`** — single-user test instance on port 8516 for
smoke-testing cfcore shim integration before promoting to the cloud instance.
---
## [0.8.0] — 2026-04-01
### Added
- **ATS Resume Optimizer** (gap report free; LLM rewrite paid+)
- `scripts/resume_optimizer.py` — full pipeline: TF-IDF gap extraction →
`prioritize_gaps``rewrite_for_ats` → hallucination guard (anchor-set
diffing on employers, institutions, and dates)
- `scripts/db.py``optimized_resume` + `ats_gap_report` columns;
`save_optimized_resume` / `get_optimized_resume` helpers
- `GET /api/jobs/{id}/resume_optimizer` — fetch gap report + rewrite
- `POST /api/jobs/{id}/resume_optimizer/generate` — queue rewrite task
- `GET /api/jobs/{id}/resume_optimizer/task` — poll task status
- `web/src/components/ResumeOptimizerPanel.vue` — gap report (all tiers),
LLM rewrite section (paid+), hallucination warning badge, `.txt` download
- `ResumeOptimizerPanel` integrated into `ApplyWorkspace`
- **Vue SPA full merge** (closes #8) — `feature/vue-spa` merged to `main`
- `dev-api.py` — full FastAPI backend (settings, jobs, interviews, prep,
survey, digest, resume optimizer); cloud session middleware (JWT → per-user
SQLite); BYOK credential store
- `dev_api.py` — symlink → `dev-api.py` for importable module alias
- `scripts/job_ranker.py` — two-stage ranking for `/api/jobs/stack`
- `scripts/credential_store.py` — per-user BYOK API key management
- `scripts/user_profile.py``load_user_profile` / `save_user_profile`
- `web/src/components/TaskIndicator.vue` + `web/src/stores/tasks.ts`
live background task queue display
- `web/public/` — peregrine logo assets (SVG + PNG)
- **API test suite** — 5 new test modules (622 tests total)
- `tests/test_dev_api_settings.py` (38 tests)
- `tests/test_dev_api_interviews.py`, `test_dev_api_prep.py`,
`test_dev_api_survey.py`, `test_dev_api_digest.py`
### Fixed
- **Cloud DB routing**`app/pages/1_Job_Review.py`, `5_Interviews.py`,
`6_Interview_Prep.py`, `7_Survey.py` were hardcoding `DEFAULT_DB`; now
use `get_db_path()` for correct per-user routing in cloud mode (#24)
- **Test isolation**`importlib.reload(dev_api)` in digest/interviews
fixtures reset all module globals, silently breaking `monkeypatch.setattr`
in subsequent test files; replaced with targeted `monkeypatch.setattr(dev_api,
"DB_PATH", tmp_db)` (#26)
---
## [0.7.0] — 2026-03-22
### Added
- **Vue 3 SPA — beta access for paid tier** — The new Vue 3 frontend (built with
Vite + UnoCSS) is now merged into `main` and available to paid-tier subscribers
as an opt-in beta. The Streamlit UI remains the default and will continue to
receive full support.
- `web/` — full Vue 3 SPA source (components, stores, router, composables,
views) from `feature/vue-spa`
- `web/src/components/ClassicUIButton.vue` — one-click switch back to the
Classic (Streamlit) UI; sets `prgn_ui=streamlit` cookie and appends
`?prgn_switch=streamlit` so `user.yaml` stays in sync
- `web/src/composables/useFeatureFlag.ts` — reads `prgn_demo_tier` cookie for
demo toolbar visual consistency (display-only, not an authoritative gate)
- **UI switcher** — Reddit-style opt-in to the Vue SPA with durable preference
persistence and graceful fallback.
- `app/components/ui_switcher.py``sync_ui_cookie()`, `switch_ui()`,
`render_banner()`, `render_settings_toggle()`
- `scripts/user_profile.py``ui_preference` field (`streamlit` | `vue`,
default: `streamlit`) with round-trip `save()`
- `app/wizard/tiers.py``vue_ui_beta: "paid"` feature key; `demo_tier`
keyword arg on `can_use()` for thread-safe demo mode simulation
- Banner (dismissible, paid tier only) + Settings → System → Deployment toggle
- Caddy cookie routing: `prgn_ui=vue` → nginx Vue SPA; absent/`streamlit` →
Streamlit. 502 fallback clears cookie and redirects with `?ui_fallback=1`
- **Demo toolbar** — slim full-width tier-simulation bar for `DEMO_MODE`
instances. Free / Paid / Premium pills let demo visitors explore all feature
tiers without an account. Persists via `prgn_demo_tier` cookie. Default: Paid
(most compelling first impression). `app/components/demo_toolbar.py`
- **Docker `web` service** — multi-stage nginx container serving the Vue SPA
`dist/` build. Added to `compose.yml` (port 8506), `compose.demo.yml`
(port 8507), `compose.cloud.yml` (port 8508). `manage.sh build` now includes
the `web` service alongside `app`.
### Changed
- **Caddy routing**`menagerie.circuitforge.tech` and
`demo.circuitforge.tech` peregrine blocks now inspect the `prgn_ui` cookie
and fan-out to the Vue SPA service or Streamlit accordingly.
---
## [0.6.2] — 2026-03-18
### Added
- **Playwright E2E test harness** — smoke + interaction test suite covering all
three Peregrine instances (demo / cloud / local). Navigates every page, checks
for DOM errors on load, clicks every interactable element, diffs errors
before/after each click, and XFAIL-marks expected demo-mode failures so
neutering-guard regressions are surfaced as XPASSes. Screenshots on failure.
- `tests/e2e/test_smoke.py` — page-load error detection
- `tests/e2e/test_interactions.py` — full click-through with XFAIL/XPASS bucketing
- `tests/e2e/conftest.py` — Streamlit-aware wait helpers, error scanner, fixtures
- `tests/e2e/models.py``ErrorRecord`, `ModeConfig`, `diff_errors`
- `tests/e2e/modes/` — per-mode configs (demo / cloud / local)
- `tests/e2e/pages/` — page objects for all 7 pages including Settings tabs
### Fixed
- **Demo: "Discovery failed" error on Home page load**`task_runner.py` now
checks `DEMO_MODE` before importing `discover.py`; returns a friendly error
immediately instead of crashing on missing `search_profiles.yaml` (#21)
- **Demo: silent `st.error()` in collapsed Practice Q&A expander** — Interview
Prep no longer auto-triggers the LLM on page render in demo mode; shows an
`st.info` placeholder instead, eliminating the hidden error element (#22)
- **Cloud: auth wall shown to E2E test browser**`cloud_session.py` now falls
back to the `Cookie` header when `X-CF-Session` is absent (direct access
without Caddy). Playwright's `set_extra_http_headers()` does not propagate to
WebSocket handshakes; cookies do. Test harness uses `ctx.add_cookies()`.
- **E2E error scanner returned empty text for collapsed expanders** — switched
from `inner_text()` (respects CSS `display:none`) to `text_content()` so
errors inside collapsed Streamlit expanders are captured with their full text.
---
## [0.6.1] — 2026-03-16
### Fixed
- **Keyword suggestions not visible on first render**`✨ Suggest` in
Settings → Search now calls `st.rerun()` after storing results; chips appear
immediately without requiring a tab switch (#18)
- **Wizard identity step required manual re-entry of resume data** — step 4
(Identity) now prefills name, email, and phone from the parsed resume when
those fields are blank; existing saved values are not overwritten (#17)
- **"Send to Notion" hardcoded on Home dashboard** — sync section now shows the
connected provider name, or a "Set up a sync integration" prompt with a
Settings link when no integration is configured (#16)
- **`test_generate_calls_llm_router` flaky in full suite** — resolved by queue
optimizer merge; mock state pollution eliminated (#12)
---
## [0.6.0] — 2026-03-16
### Added
- **Calendar integration** — push interview events to Apple Calendar (CalDAV) or
Google Calendar directly from the Interviews kanban. Idempotent: a second push
updates the existing event rather than creating a duplicate. Button shows
"📅 Add to Calendar" on first push and "🔄 Update Calendar" thereafter.
Event title: `{Stage}: {Job Title} @ {Company}`; 1hr duration at noon UTC;
job URL and company research brief included in event description.
- `scripts/calendar_push.py` — push/update orchestration
- `scripts/integrations/apple_calendar.py``create_event()` / `update_event()`
via `caldav` + `icalendar`
- `scripts/integrations/google_calendar.py``create_event()` / `update_event()`
via `google-api-python-client` (service account); `test()` now makes a real API call
- `scripts/db.py``calendar_event_id TEXT` column (auto-migration) +
`set_calendar_event_id()` helper
- `environment.yml` — pin `caldav>=1.3`, `icalendar>=5.0`,
`google-api-python-client>=2.0`, `google-auth>=2.0`
---
## [0.4.1] — 2026-03-13
### Added
- **LinkedIn profile import** — one-click import from a public LinkedIn profile URL

47
Dockerfile.cfcore Normal file
View file

@ -0,0 +1,47 @@
# Dockerfile.cfcore — build context must be the PARENT directory of peregrine/
#
# Used when circuitforge-core is installed from source (not PyPI).
# Both repos must be siblings on the build host:
# /devl/peregrine/ → WORKDIR /app
# /devl/circuitforge-core/ → installed to /circuitforge-core
#
# Build manually:
# docker build -f peregrine/Dockerfile.cfcore -t peregrine-cfcore ..
#
# Via compose (compose.test-cfcore.yml sets context: ..):
# docker compose -f compose.test-cfcore.yml build
FROM python:3.11-slim
WORKDIR /app
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libffi-dev curl libsqlcipher-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy circuitforge-core and install it from the local path before requirements.txt.
# requirements.txt has a git+https:// fallback URL for CI (where circuitforge-core
# is not a sibling directory), but Docker always has the local copy available here.
COPY circuitforge-core/ /circuitforge-core/
RUN pip install --no-cache-dir /circuitforge-core
COPY peregrine/requirements.txt .
# Skip the cfcore line — already installed above from the local copy
RUN grep -v 'circuitforge-core' requirements.txt | pip install --no-cache-dir -r /dev/stdin
# Install Playwright browser (cached separately from Python deps so requirements
# changes don't bust the ~600900 MB Chromium layer and vice versa)
RUN playwright install chromium && playwright install-deps chromium
# Bundle companyScraper (company research web scraper)
COPY peregrine/scrapers/ /app/scrapers/
COPY peregrine/ .
EXPOSE 8501
CMD ["streamlit", "run", "app/app.py", \
"--server.port=8501", \
"--server.headless=true", \
"--server.fileWatcherType=none"]

View file

@ -1,16 +1,33 @@
# Peregrine
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/pyr0ball/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
[![License: BSL 1.1](https://img.shields.io/badge/License-BSL_1.1-blue.svg)](./LICENSE-BSL)
[![CI](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml/badge.svg)](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml)
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.tech)**
**Job search pipeline — by [Circuit Forge LLC](https://circuitforge.tech)**
> *"Don't be evil, for real and forever."*
> *"Tools for the jobs that the system made hard on purpose."*
Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
Privacy-first, local-first. Your data never leaves your machine.
---
Job search is a second job nobody hired you for.
ATS filters designed to reject. Job boards that show the same listing eight times. Cover letter number forty-seven for a role that might already be filled. Hours of prep for a phone screen that lasts twelve minutes.
Peregrine handles the pipeline — discovery, matching, tracking, drafting, and prep — so you can spend your time doing the work you actually want to be doing.
**LLM support is optional.** The full discovery and tracking pipeline works without one. When you do configure a backend, the LLM drafts the parts that are genuinely miserable — cover letters, company research briefs, interview prep sheets — and waits for your approval before anything goes anywhere.
### What Peregrine does not do
Peregrine does **not** submit job applications for you. You still have to go to each employer's site and click apply yourself.
This is intentional. Automated mass-applying is a bad experience for everyone — it's also a trust violation with employers who took the time to post a real role. Peregrine is a preparation and organization tool, not a bot.
What it *does* cover is everything before and after that click: finding the jobs, matching them against your resume, generating cover letters and prep materials, and once you've applied — tracking where you stand, classifying the emails that come back, and surfacing company research when an interview lands on your calendar. The submit button is yours. The rest of the grind is ours.
> **Exception:** [AIHawk](https://github.com/nicolomantini/LinkedIn-Easy-Apply) is a separate, optional tool that handles LinkedIn Easy Apply automation. Peregrine integrates with it for AIHawk-compatible profiles, but it is not part of Peregrine's core pipeline.
---
@ -19,7 +36,7 @@ Privacy-first, local-first. Your data never leaves your machine.
**1. Clone and install dependencies** (Docker, NVIDIA toolkit if needed):
```bash
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
cd peregrine
./manage.sh setup
```
@ -129,21 +146,26 @@ Re-enter the wizard any time via **Settings → Developer → Reset wizard**.
| **Company research briefs** | Free with LLM¹ |
| **Interview prep & practice Q&A** | Free with LLM¹ |
| **Survey assistant** (culture-fit Q&A, screenshot analysis) | Free with LLM¹ |
| **AI wizard helpers** (career summary, bullet expansion, skill suggestions) | Free with LLM¹ |
| **Wizard helpers** (career summary, bullet expansion, skill suggestions, job title suggestions, mission notes) | Free with LLM¹ |
| Managed cloud LLM (no API key needed) | Paid |
| Email sync & auto-classification | Paid |
| LLM-powered keyword blocklist | Paid |
| Job tracking integrations (Notion, Airtable, Google Sheets) | Paid |
| Calendar sync (Google, Apple) | Paid |
| Slack notifications | Paid |
| CircuitForge shared cover-letter model | Paid |
| Vue 3 SPA — full UI with onboarding wizard, job board, apply workspace, sort/filter, research modal, draft cover letter | Free |
| **Voice guidelines** (custom writing style & tone) | Premium with LLM¹ ² |
| Cover letter model fine-tuning (your writing, your model) | Premium |
| Multi-user support | Premium |
¹ **BYOK unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance,
or your own API key (Anthropic, OpenAI-compatible) — and all AI features marked **Free with LLM**
¹ **BYOK (bring your own key/backend) unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance,
or your own API key (Anthropic, OpenAI-compatible) — and all features marked **Free with LLM** or **Premium with LLM**
unlock at no charge. The paid tier earns its price by providing managed cloud inference so you
don't need a key at all, plus integrations and email sync.
² **Voice guidelines** requires Premium tier without a configured LLM backend. With BYOK, it unlocks at any tier.
---
## Email Sync
@ -201,6 +223,6 @@ Full documentation at: https://docs.circuitforge.tech/peregrine
## License
Core discovery pipeline: [MIT](LICENSE-MIT)
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](LICENSE-BSL)
LLM features (cover letter generation, company research, interview prep, UI): [BSL 1.1](LICENSE-BSL)
© 2026 Circuit Forge LLC

View file

@ -19,11 +19,14 @@ _profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
from scripts.db import init_db, get_job_counts, purge_jobs, purge_email_data, \
purge_non_remote, archive_jobs, kill_stuck_tasks, get_task_for_job, get_active_tasks, \
insert_job, get_existing_urls
purge_non_remote, archive_jobs, kill_stuck_tasks, cancel_task, \
get_task_for_job, get_active_tasks, insert_job, get_existing_urls
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path
_CONFIG_DIR = Path(__file__).parent.parent / "config"
_NOTION_CONNECTED = (_CONFIG_DIR / "integrations" / "notion.yaml").exists()
resolve_session("peregrine")
init_db(get_db_path())
@ -220,7 +223,7 @@ with mid:
disabled=unscored == 0):
with st.spinner("Scoring…"):
result = subprocess.run(
["conda", "run", "-n", "job-seeker", "python", "scripts/match.py"],
[sys.executable, "scripts/match.py"],
capture_output=True, text=True,
cwd=str(Path(__file__).parent.parent),
)
@ -234,6 +237,7 @@ with mid:
with right:
approved_count = get_job_counts(get_db_path()).get("approved", 0)
if _NOTION_CONNECTED:
st.subheader("Send to Notion")
st.caption("Push all approved jobs to your Notion tracking database.")
if approved_count == 0:
@ -248,6 +252,11 @@ with right:
count = sync_to_notion(get_db_path())
st.success(f"Synced {count} job{'s' if count != 1 else ''} to Notion!")
st.rerun()
else:
st.subheader("Set up a sync integration")
st.caption("Connect an integration to push approved jobs to your tracking database.")
if st.button("⚙️ Go to Integrations", use_container_width=True):
st.switch_page("pages/2_Settings.py")
st.divider()
@ -367,177 +376,144 @@ _scrape_status()
st.divider()
# ── Danger zone: purge + re-scrape ────────────────────────────────────────────
# ── Danger zone ───────────────────────────────────────────────────────────────
with st.expander("⚠️ Danger Zone", expanded=False):
# ── Queue reset (the common case) ─────────────────────────────────────────
st.markdown("**Queue reset**")
st.caption(
"**Purge** permanently deletes jobs from the local database. "
"Applied and synced jobs are never touched."
"Archive clears your review queue while keeping job URLs for dedup, "
"so the same listings won't resurface on the next discovery run. "
"Use hard purge only if you want a full clean slate including dedup history."
)
purge_col, rescrape_col, email_col, tasks_col = st.columns(4)
_scope = st.radio(
"Clear scope",
["Pending only", "Pending + approved (stale search)"],
horizontal=True,
label_visibility="collapsed",
)
_scope_statuses = (
["pending"] if _scope == "Pending only" else ["pending", "approved"]
)
with purge_col:
st.markdown("**Purge pending & rejected**")
st.caption("Removes all _pending_ and _rejected_ listings so the next discovery starts fresh.")
if st.button("🗑 Purge Pending + Rejected", use_container_width=True):
st.session_state["confirm_purge"] = "partial"
_qc1, _qc2, _qc3 = st.columns([2, 2, 4])
if _qc1.button("📦 Archive & reset", use_container_width=True, type="primary"):
st.session_state["confirm_dz"] = "archive"
if _qc2.button("🗑 Hard purge (delete)", use_container_width=True):
st.session_state["confirm_dz"] = "purge"
if st.session_state.get("confirm_purge") == "partial":
st.warning("Are you sure? This cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge", type="primary", use_container_width=True):
deleted = purge_jobs(get_db_path(), statuses=["pending", "rejected"])
st.success(f"Purged {deleted} jobs.")
st.session_state.pop("confirm_purge", None)
if st.session_state.get("confirm_dz") == "archive":
st.info(
f"Archive **{', '.join(_scope_statuses)}** jobs? "
"URLs are kept for dedup — nothing is permanently deleted."
)
_dc1, _dc2 = st.columns(2)
if _dc1.button("Yes, archive", type="primary", use_container_width=True, key="dz_archive_confirm"):
n = archive_jobs(get_db_path(), statuses=_scope_statuses)
st.success(f"Archived {n} jobs.")
st.session_state.pop("confirm_dz", None)
st.rerun()
if c2.button("Cancel", use_container_width=True):
st.session_state.pop("confirm_purge", None)
if _dc2.button("Cancel", use_container_width=True, key="dz_archive_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with email_col:
st.markdown("**Purge email data**")
st.caption("Clears all email thread logs and email-sourced pending jobs so the next sync starts fresh.")
if st.button("📧 Purge Email Data", use_container_width=True):
st.session_state["confirm_purge"] = "email"
if st.session_state.get("confirm_purge") == "email":
st.warning("This deletes all email contacts and email-sourced jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge emails", type="primary", use_container_width=True):
contacts, jobs = purge_email_data(get_db_path())
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
st.session_state.pop("confirm_purge", None)
if st.session_state.get("confirm_dz") == "purge":
st.warning(
f"Permanently delete **{', '.join(_scope_statuses)}** jobs? "
"This removes the URLs from dedup history too. Cannot be undone."
)
_dc1, _dc2 = st.columns(2)
if _dc1.button("Yes, delete", type="primary", use_container_width=True, key="dz_purge_confirm"):
n = purge_jobs(get_db_path(), statuses=_scope_statuses)
st.success(f"Deleted {n} jobs.")
st.session_state.pop("confirm_dz", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
if _dc2.button("Cancel", use_container_width=True, key="dz_purge_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with tasks_col:
st.divider()
# ── Background tasks ──────────────────────────────────────────────────────
_active = get_active_tasks(get_db_path())
st.markdown("**Kill stuck tasks**")
st.caption(f"Force-fail all queued/running background tasks. Currently **{len(_active)}** active.")
if st.button("⏹ Kill All Tasks", use_container_width=True, disabled=len(_active) == 0):
st.markdown(f"**Background tasks** — {len(_active)} active")
if _active:
_task_icons = {"cover_letter": "✉️", "research": "🔍", "discovery": "🌐", "enrich_descriptions": "📝"}
for _t in _active:
_tc1, _tc2, _tc3 = st.columns([3, 4, 2])
_icon = _task_icons.get(_t["task_type"], "⚙️")
_tc1.caption(f"{_icon} `{_t['task_type']}`")
_job_label = f"{_t['title']} @ {_t['company']}" if _t.get("title") else f"job #{_t['job_id']}"
_tc2.caption(_job_label)
_tc3.caption(f"_{_t['status']}_")
if st.button("✕ Cancel", key=f"dz_cancel_task_{_t['id']}", use_container_width=True):
cancel_task(get_db_path(), _t["id"])
st.rerun()
st.caption("")
_kill_col, _ = st.columns([2, 6])
if _kill_col.button("⏹ Kill all stuck", use_container_width=True, disabled=len(_active) == 0):
killed = kill_stuck_tasks(get_db_path())
st.success(f"Killed {killed} task(s).")
st.rerun()
with rescrape_col:
st.markdown("**Purge all & re-scrape**")
st.caption("Wipes _all_ non-applied, non-synced jobs then immediately runs a fresh discovery.")
if st.button("🔄 Purge All + Re-scrape", use_container_width=True):
st.session_state["confirm_purge"] = "full"
if st.session_state.get("confirm_purge") == "full":
st.warning("This will delete ALL pending, approved, and rejected jobs, then re-scrape. Applied and synced records are kept.")
c1, c2 = st.columns(2)
if c1.button("Yes, wipe + scrape", type="primary", use_container_width=True):
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"])
submit_task(get_db_path(), "discovery", 0)
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
pending_col, nonremote_col, approved_col, _ = st.columns(4)
# ── Rarely needed (collapsed) ─────────────────────────────────────────────
with st.expander("More options", expanded=False):
_rare1, _rare2, _rare3 = st.columns(3)
with pending_col:
st.markdown("**Purge pending review**")
st.caption("Removes only _pending_ listings, keeping your rejected history intact.")
if st.button("🗑 Purge Pending Only", use_container_width=True):
st.session_state["confirm_purge"] = "pending_only"
if st.session_state.get("confirm_purge") == "pending_only":
st.warning("Deletes all pending jobs. Rejected jobs are kept. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge pending", type="primary", use_container_width=True):
deleted = purge_jobs(get_db_path(), statuses=["pending"])
st.success(f"Purged {deleted} pending jobs.")
st.session_state.pop("confirm_purge", None)
with _rare1:
st.markdown("**Purge email data**")
st.caption("Clears all email thread logs and email-sourced pending jobs.")
if st.button("📧 Purge Email Data", use_container_width=True):
st.session_state["confirm_dz"] = "email"
if st.session_state.get("confirm_dz") == "email":
st.warning("Deletes all email contacts and email-sourced jobs. Cannot be undone.")
_ec1, _ec2 = st.columns(2)
if _ec1.button("Yes, purge emails", type="primary", use_container_width=True, key="dz_email_confirm"):
contacts, jobs = purge_email_data(get_db_path())
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
st.session_state.pop("confirm_dz", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
if _ec2.button("Cancel", use_container_width=True, key="dz_email_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with nonremote_col:
with _rare2:
st.markdown("**Purge non-remote**")
st.caption("Removes pending/approved/rejected jobs where remote is not set. Keeps anything already in the pipeline.")
st.caption("Removes pending/approved/rejected on-site listings from the DB.")
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
st.session_state["confirm_purge"] = "non_remote"
if st.session_state.get("confirm_purge") == "non_remote":
st.session_state["confirm_dz"] = "non_remote"
if st.session_state.get("confirm_dz") == "non_remote":
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge on-site", type="primary", use_container_width=True):
_rc1, _rc2 = st.columns(2)
if _rc1.button("Yes, purge on-site", type="primary", use_container_width=True, key="dz_nonremote_confirm"):
deleted = purge_non_remote(get_db_path())
st.success(f"Purged {deleted} non-remote jobs.")
st.session_state.pop("confirm_purge", None)
st.session_state.pop("confirm_dz", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
if _rc2.button("Cancel", use_container_width=True, key="dz_nonremote_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
with approved_col:
st.markdown("**Purge approved (unapplied)**")
st.caption("Removes _approved_ jobs you haven't applied to yet — e.g. to reset after a review pass.")
if st.button("🗑 Purge Approved", use_container_width=True):
st.session_state["confirm_purge"] = "approved_only"
if st.session_state.get("confirm_purge") == "approved_only":
st.warning("Deletes all approved-but-not-applied jobs. Cannot be undone.")
c1, c2 = st.columns(2)
if c1.button("Yes, purge approved", type="primary", use_container_width=True):
deleted = purge_jobs(get_db_path(), statuses=["approved"])
st.success(f"Purged {deleted} approved jobs.")
st.session_state.pop("confirm_purge", None)
with _rare3:
st.markdown("**Wipe all + re-scrape**")
st.caption("Deletes all non-applied jobs then immediately runs a fresh discovery.")
if st.button("🔄 Wipe + Re-scrape", use_container_width=True):
st.session_state["confirm_dz"] = "rescrape"
if st.session_state.get("confirm_dz") == "rescrape":
st.warning("Wipes ALL pending, approved, and rejected jobs, then re-scrapes. Applied and synced records are kept.")
_wc1, _wc2 = st.columns(2)
if _wc1.button("Yes, wipe + scrape", type="primary", use_container_width=True, key="dz_rescrape_confirm"):
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"])
submit_task(get_db_path(), "discovery", 0)
st.session_state.pop("confirm_dz", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
st.divider()
archive_col1, archive_col2, _, _ = st.columns(4)
with archive_col1:
st.markdown("**Archive remaining**")
st.caption(
"Move all _pending_ and _rejected_ jobs to archived status. "
"Archived jobs stay in the DB for dedup — they just won't appear in Job Review."
)
if st.button("📦 Archive Pending + Rejected", use_container_width=True):
st.session_state["confirm_purge"] = "archive_remaining"
if st.session_state.get("confirm_purge") == "archive_remaining":
st.info("Jobs will be archived (not deleted) — URLs are kept for dedup.")
c1, c2 = st.columns(2)
if c1.button("Yes, archive", type="primary", use_container_width=True):
archived = archive_jobs(get_db_path(), statuses=["pending", "rejected"])
st.success(f"Archived {archived} jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
st.rerun()
with archive_col2:
st.markdown("**Archive approved (unapplied)**")
st.caption("Archive _approved_ listings you decided to skip — keeps history without cluttering the apply queue.")
if st.button("📦 Archive Approved", use_container_width=True):
st.session_state["confirm_purge"] = "archive_approved"
if st.session_state.get("confirm_purge") == "archive_approved":
st.info("Approved jobs will be archived (not deleted).")
c1, c2 = st.columns(2)
if c1.button("Yes, archive approved", type="primary", use_container_width=True):
archived = archive_jobs(get_db_path(), statuses=["approved"])
st.success(f"Archived {archived} approved jobs.")
st.session_state.pop("confirm_purge", None)
st.rerun()
if c2.button("Cancel ", use_container_width=True):
st.session_state.pop("confirm_purge", None)
if _wc2.button("Cancel", use_container_width=True, key="dz_rescrape_cancel"):
st.session_state.pop("confirm_dz", None)
st.rerun()
# ── Setup banners ─────────────────────────────────────────────────────────────

View file

@ -17,22 +17,39 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s")
# Load .env before any os.environ reads — safe to call inside Docker too
# (uses setdefault, so Docker-injected vars take precedence over .env values)
from circuitforge_core.config.settings import load_env as _load_env
_load_env(Path(__file__).parent.parent / ".env")
IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
import streamlit as st
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
from scripts.db_migrate import migrate_db
from app.feedback import inject_feedback_button
from app.cloud_session import resolve_session, get_db_path, get_config_dir
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
import sqlite3
_LOGO_CIRCLE = Path(__file__).parent / "static" / "peregrine_logo_circle.png"
_LOGO_FULL = Path(__file__).parent / "static" / "peregrine_logo.png"
st.set_page_config(
page_title="Peregrine",
page_icon="💼",
page_icon=str(_LOGO_CIRCLE) if _LOGO_CIRCLE.exists() else "💼",
layout="wide",
)
resolve_session("peregrine")
init_db(get_db_path())
migrate_db(Path(get_db_path()))
# Demo tier — initialize once per session (cookie persistence handled client-side)
if IS_DEMO and "simulated_tier" not in st.session_state:
st.session_state["simulated_tier"] = "paid"
if _LOGO_CIRCLE.exists():
st.logo(str(_LOGO_CIRCLE), icon_image=str(_LOGO_CIRCLE))
# ── Startup cleanup — runs once per server process via cache_resource ──────────
@st.cache_resource
@ -89,6 +106,15 @@ _show_wizard = not IS_DEMO and (
if _show_wizard:
_setup_page = st.Page("pages/0_Setup.py", title="Setup", icon="👋")
st.navigation({"": [_setup_page]}).run()
# Sync UI cookie even during wizard so vue preference redirects correctly.
# Tier not yet computed here — use cloud tier (or "free" fallback).
try:
from app.components.ui_switcher import sync_ui_cookie as _sync_wizard_cookie
from app.cloud_session import get_cloud_tier as _gctr
_wizard_tier = _gctr() if _gctr() != "local" else "free"
_sync_wizard_cookie(_USER_YAML, _wizard_tier)
except Exception:
pass
st.stop()
# ── Navigation ─────────────────────────────────────────────────────────────────
@ -113,6 +139,21 @@ pg = st.navigation(pages)
# ── Background task sidebar indicator ─────────────────────────────────────────
# Fragment polls every 3s so stage labels update live without a full page reload.
# The sidebar context WRAPS the fragment call — do not write to st.sidebar inside it.
_TASK_LABELS = {
"cover_letter": "Cover letter",
"company_research": "Research",
"email_sync": "Email sync",
"discovery": "Discovery",
"enrich_descriptions": "Enriching descriptions",
"score": "Scoring matches",
"scrape_url": "Scraping listing",
"enrich_craigslist": "Enriching listing",
"wizard_generate": "Wizard generation",
"prepare_training": "Training data",
}
_DISCOVERY_PIPELINE = ["discovery", "enrich_descriptions", "score"]
@st.fragment(run_every=3)
def _task_indicator():
tasks = get_active_tasks(get_db_path())
@ -120,27 +161,30 @@ def _task_indicator():
return
st.divider()
st.markdown(f"**⏳ {len(tasks)} task(s) running**")
for t in tasks:
pipeline_set = set(_DISCOVERY_PIPELINE)
pipeline_tasks = [t for t in tasks if t["task_type"] in pipeline_set]
other_tasks = [t for t in tasks if t["task_type"] not in pipeline_set]
# Discovery pipeline: render as ordered sub-queue with indented steps
if pipeline_tasks:
ordered = [
next((t for t in pipeline_tasks if t["task_type"] == typ), None)
for typ in _DISCOVERY_PIPELINE
]
ordered = [t for t in ordered if t is not None]
for i, t in enumerate(ordered):
icon = "" if t["status"] == "running" else "🕐"
task_type = t["task_type"]
if task_type == "cover_letter":
label = "Cover letter"
elif task_type == "company_research":
label = "Research"
elif task_type == "email_sync":
label = "Email sync"
elif task_type == "discovery":
label = "Discovery"
elif task_type == "enrich_descriptions":
label = "Enriching"
elif task_type == "scrape_url":
label = "Scraping URL"
elif task_type == "wizard_generate":
label = "Wizard generation"
elif task_type == "enrich_craigslist":
label = "Enriching listing"
else:
label = task_type.replace("_", " ").title()
label = _TASK_LABELS.get(t["task_type"], t["task_type"].replace("_", " ").title())
stage = t.get("stage") or ""
detail = f" · {stage}" if stage else ""
prefix = "" if i == 0 else ""
st.caption(f"{prefix}{icon} {label}{detail}")
# All other tasks (cover letter, email sync, etc.) as individual rows
for t in other_tasks:
icon = "" if t["status"] == "running" else "🕐"
label = _TASK_LABELS.get(t["task_type"], t["task_type"].replace("_", " ").title())
stage = t.get("stage") or ""
detail = f" · {stage}" if stage else (f"{t.get('company')}" if t.get("company") else "")
st.caption(f"{icon} {label}{detail}")
@ -156,6 +200,13 @@ def _get_version() -> str:
except Exception:
return "dev"
# ── Effective tier (resolved before sidebar so switcher can use it) ──────────
# get_cloud_tier() returns "local" in dev/self-hosted mode, real tier in cloud.
_ui_profile = _UserProfile(_USER_YAML) if _UserProfile.exists(_USER_YAML) else None
_ui_yaml_tier = _ui_profile.effective_tier if _ui_profile else "free"
_ui_cloud_tier = get_cloud_tier()
_ui_tier = _ui_cloud_tier if _ui_cloud_tier != "local" else _ui_yaml_tier
with st.sidebar:
if IS_DEMO:
st.info(
@ -185,7 +236,31 @@ with st.sidebar:
)
st.divider()
try:
from app.components.ui_switcher import render_sidebar_switcher
render_sidebar_switcher(_USER_YAML, _ui_tier)
except Exception:
pass # never crash the app over the sidebar switcher
st.caption(f"Peregrine {_get_version()}")
inject_feedback_button(page=pg.title)
# ── Demo toolbar (DEMO_MODE only) ───────────────────────────────────────────
if IS_DEMO:
from app.components.demo_toolbar import render_demo_toolbar
render_demo_toolbar()
# ── UI switcher banner (paid tier; or all visitors in demo mode) ─────────────
try:
from app.components.ui_switcher import render_banner
render_banner(_USER_YAML, _ui_tier)
except Exception:
pass # never crash the app over the banner
pg.run()
# ── UI preference cookie sync (runs after page render) ──────────────────────
try:
from app.components.ui_switcher import sync_ui_cookie
sync_ui_cookie(_USER_YAML, _ui_tier)
except Exception:
pass # never crash the app over cookie sync

View file

@ -40,6 +40,26 @@ def _extract_session_token(cookie_header: str) -> str:
return m.group(1).strip() if m else ""
def _ensure_provisioned(user_id: str, product: str) -> None:
"""Call Heimdall /admin/provision for this user if no key exists yet.
Idempotent Heimdall does nothing if a key already exists for this
(user_id, product) pair. Called once per session start so new Google
OAuth signups get a free key created automatically.
"""
if not HEIMDALL_ADMIN_TOKEN:
return
try:
requests.post(
f"{HEIMDALL_URL}/admin/provision",
json={"directus_user_id": user_id, "product": product, "tier": "free"},
headers={"Authorization": f"Bearer {HEIMDALL_ADMIN_TOKEN}"},
timeout=5,
)
except Exception as exc:
log.warning("Heimdall provision failed for user %s: %s", user_id, exc)
@st.cache_data(ttl=300, show_spinner=False)
def _fetch_cloud_tier(user_id: str, product: str) -> str:
"""Call Heimdall to resolve the current cloud tier for this user.
@ -92,6 +112,28 @@ def derive_db_key(user_id: str) -> str:
).hexdigest()
def _render_auth_wall(message: str = "Please sign in to continue.") -> None:
"""Render a branded sign-in prompt and halt the page."""
st.markdown(
"""
<style>
[data-testid="stSidebar"] { display: none; }
[data-testid="collapsedControl"] { display: none; }
</style>
""",
unsafe_allow_html=True,
)
col = st.columns([1, 2, 1])[1]
with col:
st.markdown("## 🦅 Peregrine")
st.info(message, icon="🔒")
st.link_button(
"Sign in to CircuitForge",
url=f"https://circuitforge.tech/login?next=/peregrine",
use_container_width=True,
)
def resolve_session(app: str = "peregrine") -> None:
"""
Call at the top of each Streamlit page.
@ -109,32 +151,38 @@ def resolve_session(app: str = "peregrine") -> None:
if st.session_state.get("user_id"):
return
cookie_header = st.context.headers.get("x-cf-session", "")
# Primary: Caddy injects X-CF-Session header in production.
# Fallback: direct access (E2E tests, dev without Caddy) reads the cookie header.
cookie_header = (
st.context.headers.get("x-cf-session", "")
or st.context.headers.get("cookie", "")
)
session_jwt = _extract_session_token(cookie_header)
if not session_jwt:
st.components.v1.html(
'<script>window.top.location.href = "https://circuitforge.tech/login";</script>',
height=0,
)
_render_auth_wall("Please sign in to access Peregrine.")
st.stop()
try:
user_id = validate_session_jwt(session_jwt)
except Exception:
st.components.v1.html(
'<script>window.top.location.href = "https://circuitforge.tech/login";</script>',
height=0,
)
_render_auth_wall("Your session has expired. Please sign in again.")
st.stop()
user_path = _user_data_path(user_id, app)
user_path.mkdir(parents=True, exist_ok=True)
(user_path / "config").mkdir(exist_ok=True)
config_path = user_path / "config"
config_path.mkdir(exist_ok=True)
(user_path / "data").mkdir(exist_ok=True)
# Bootstrap config files that the UI requires to exist — never overwrite
_kw = config_path / "resume_keywords.yaml"
if not _kw.exists():
_kw.write_text("skills: []\ndomains: []\nkeywords: []\n")
st.session_state["user_id"] = user_id
st.session_state["db_path"] = user_path / "staging.db"
st.session_state["db_key"] = derive_db_key(user_id)
_ensure_provisioned(user_id, app)
st.session_state["cloud_tier"] = _fetch_cloud_tier(user_id, app)
@ -157,7 +205,7 @@ def get_config_dir() -> Path:
"""
if CLOUD_MODE and st.session_state.get("db_path"):
return Path(st.session_state["db_path"]).parent / "config"
return Path(__file__).parent.parent.parent / "config"
return Path(__file__).parent.parent / "config"
def get_cloud_tier() -> str:

View file

@ -0,0 +1,72 @@
"""Demo toolbar — tier simulation for DEMO_MODE instances.
Renders a slim full-width bar above the Streamlit nav showing
Free / Paid / Premium pills. Clicking a pill sets a prgn_demo_tier
cookie (for persistence across reloads) and st.session_state.simulated_tier
(for immediate use within the current render pass).
Only ever rendered when DEMO_MODE=true.
"""
from __future__ import annotations
import os
import streamlit as st
import streamlit.components.v1 as components
_VALID_TIERS = ("free", "paid", "premium")
_DEFAULT_TIER = "paid" # most compelling first impression
_DEMO_MODE = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
_COOKIE_JS = """
<script>
(function() {{
document.cookie = 'prgn_demo_tier={tier}; path=/; SameSite=Lax';
}})();
</script>
"""
def get_simulated_tier() -> str:
"""Return the current simulated tier, defaulting to 'paid'."""
return st.session_state.get("simulated_tier", _DEFAULT_TIER)
def set_simulated_tier(tier: str) -> None:
"""Set simulated tier in session state + cookie. Reruns the page."""
if tier not in _VALID_TIERS:
return
st.session_state["simulated_tier"] = tier
components.html(_COOKIE_JS.format(tier=tier), height=0)
st.rerun()
def render_demo_toolbar() -> None:
"""Render the demo mode toolbar.
Shows a dismissible info bar with tier-selection pills.
Call this at the TOP of app.py's render pass, before pg.run().
"""
current = get_simulated_tier()
labels = {t: t.capitalize() + ("" if t == current else "") for t in _VALID_TIERS}
with st.container():
cols = st.columns([3, 1, 1, 1, 2])
with cols[0]:
st.caption("🎭 **Demo mode** — exploring as:")
for i, tier in enumerate(_VALID_TIERS):
with cols[i + 1]:
is_active = tier == current
if st.button(
labels[tier],
key=f"_demo_tier_{tier}",
type="primary" if is_active else "secondary",
use_container_width=True,
):
if not is_active:
set_simulated_tier(tier)
with cols[4]:
st.caption("[Get your own →](https://circuitforge.tech/software/peregrine)")
st.divider()

View file

@ -0,0 +1,262 @@
"""UI switcher component for Peregrine.
Manages the prgn_ui cookie (Caddy routing signal) and user.yaml
ui_preference (durability across browser clears).
Cookie mechanics
----------------
Streamlit cannot read HTTP cookies server-side. Instead:
- sync_ui_cookie() injects a JS snippet that sets document.cookie.
- Vue SPA switch-back appends ?prgn_switch=streamlit to the redirect URL.
sync_ui_cookie() reads this param via st.query_params and uses it as
an override signal, then writes user.yaml to match.
Call sync_ui_cookie() in the app.py render pass (after pg.run()).
"""
from __future__ import annotations
import os
from pathlib import Path
import streamlit as st
import streamlit.components.v1 as components
from scripts.user_profile import UserProfile
from app.wizard.tiers import can_use
_DEMO_MODE = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
# When set, the app is running without a Caddy reverse proxy in front
# (local dev, direct port exposure). Switch to Vue by navigating directly
# to this URL instead of relying on cookie-based Caddy routing.
# Example: PEREGRINE_VUE_URL=http://localhost:8506
_VUE_URL = os.environ.get("PEREGRINE_VUE_URL", "").strip().rstrip("/")
# When True, a window.location.reload() after setting prgn_ui=vue will be
# intercepted by Caddy and routed to the Vue SPA. When False (no Caddy in the
# traffic path — e.g. test instances, direct Docker exposure), reloading just
# comes back to Streamlit and creates an infinite loop. Only set this in
# production/staging compose files where Caddy is actually in front.
_CADDY_PROXY = os.environ.get("PEREGRINE_CADDY_PROXY", "").lower() in ("1", "true", "yes")
_COOKIE_JS = """
<script>
(function() {{
document.cookie = 'prgn_ui={value}; path=/; SameSite=Lax';
{navigate_js}
}})();
</script>
"""
def _set_cookie_js(value: str, navigate: bool = False) -> None:
"""Inject JS to set the prgn_ui cookie.
When PEREGRINE_VUE_URL is set (local dev, no Caddy): navigating to Vue
uses window.parent.location.href to jump directly to the Vue container
port. Without this, reload() just sends the request back to the same
Streamlit port with no router in between to inspect the cookie.
When PEREGRINE_CADDY_PROXY is set (production/staging): navigate=True
triggers window.location.reload() so Caddy sees the updated cookie on
the next HTTP request and routes accordingly.
When neither is set (test instances, bare Docker): navigate is suppressed
entirely the cookie is written silently, but no reload is attempted.
Reloading without a proxy just bounces back to Streamlit and loops.
"""
# components.html() renders in an iframe — window.parent navigates the host page
if navigate and value == "vue" and _VUE_URL:
nav_js = f"window.parent.location.href = '{_VUE_URL}';"
elif navigate and _CADDY_PROXY:
nav_js = "window.parent.location.reload();"
else:
nav_js = ""
components.html(_COOKIE_JS.format(value=value, navigate_js=nav_js), height=0)
def sync_ui_cookie(yaml_path: Path, tier: str) -> None:
"""Sync the prgn_ui cookie to match user.yaml ui_preference.
Also handles:
- ?prgn_switch=<value> param (Vue SPA switch-back signal): overrides yaml,
writes yaml to match, clears the param.
- Tier downgrade: resets vue preference to streamlit for ineligible users.
- ?ui_fallback=1 param: Vue SPA was down reinforce streamlit cookie and
return early to avoid immediately navigating back to a broken Vue SPA.
When the resolved preference is "vue", this function navigates (full page
reload) rather than silently setting the cookie. Without navigate=True,
Streamlit would set prgn_ui=vue mid-page-load; subsequent HTTP requests
made by Streamlit's own frontend (lazy JS chunks, WebSocket upgrade) would
carry the new cookie and Caddy would misroute them to the Vue nginx
container, causing TypeError: error loading dynamically imported module.
"""
# ── ?ui_fallback=1 — Vue SPA was down, Caddy bounced us back ──────────────
# Return early: reinforce the streamlit cookie so we don't immediately
# navigate back to a Vue SPA that may still be down.
if st.query_params.get("ui_fallback"):
st.toast("⚠️ New UI temporarily unavailable — switched back to Classic", icon="⚠️")
st.query_params.pop("ui_fallback", None)
_set_cookie_js("streamlit")
return
# ── ?prgn_switch param — Vue SPA sent us here to switch back ──────────────
switch_param = st.query_params.get("prgn_switch")
if switch_param in ("streamlit", "vue"):
try:
profile = UserProfile(yaml_path)
profile.ui_preference = switch_param
profile.save()
except Exception:
# UI components must not crash the app — silent fallback
pass
st.query_params.pop("prgn_switch", None)
_set_cookie_js(switch_param)
return
# ── Normal path: read yaml, enforce tier, inject cookie ───────────────────
profile = None
try:
profile = UserProfile(yaml_path)
pref = profile.ui_preference
except Exception:
# UI components must not crash the app — silent fallback to default
pref = "streamlit"
# Demo mode: Vue SPA has no demo data wiring — always serve Streamlit.
# (The tier downgrade check below is skipped in demo mode, but we must
# also block the Vue navigation itself so Caddy doesn't route to a blank SPA.)
if pref == "vue" and _DEMO_MODE:
pref = "streamlit"
# Tier downgrade protection (skip in demo — demo bypasses tier gate)
if pref == "vue" and not _DEMO_MODE and not can_use(tier, "vue_ui_beta"):
if profile is not None:
try:
profile.ui_preference = "streamlit"
profile.save()
except Exception:
# UI components must not crash the app — silent fallback
pass
pref = "streamlit"
# Navigate (full reload) when switching to Vue so Caddy re-routes on the
# next HTTP request before Streamlit serves any more content. Silent
# cookie-only set is safe for streamlit since we're already on that origin.
_set_cookie_js(pref, navigate=(pref == "vue"))
def switch_ui(yaml_path: Path, to: str, tier: str) -> None:
"""Write user.yaml, set cookie, and navigate.
to: "vue" | "streamlit"
Switching to Vue triggers window.location.reload() so Caddy sees the
updated prgn_ui cookie and routes to the Vue SPA. st.rerun() alone is
not sufficient it operates over WebSocket and produces no HTTP request.
Switching back to streamlit uses st.rerun() (no full reload needed since
we're already on the Streamlit origin and no Caddy re-routing is required).
"""
if to not in ("vue", "streamlit"):
return
try:
profile = UserProfile(yaml_path)
profile.ui_preference = to
profile.save()
except Exception:
# UI components must not crash the app — silent fallback
pass
if to == "vue":
# navigate=True triggers window.location.reload() after setting cookie
_set_cookie_js("vue", navigate=True)
else:
sync_ui_cookie(yaml_path, tier=tier)
st.rerun()
def render_banner(yaml_path: Path, tier: str) -> None:
"""Show the 'Try the new UI' banner once per session.
Dismissed flag stored in user.yaml dismissed_banners list so it
persists across sessions (uses the existing dismissed_banners pattern).
Eligible: paid+ tier, OR demo mode. Not shown if already on vue.
"""
eligible = _DEMO_MODE or can_use(tier, "vue_ui_beta")
if not eligible:
return
try:
profile = UserProfile(yaml_path)
except Exception:
# UI components must not crash the app — silent fallback
return
if profile.ui_preference == "vue":
return
if "ui_switcher_beta" in (profile.dismissed_banners or []):
return
col1, col2, col3 = st.columns([8, 1, 1])
with col1:
st.info("✨ **New Peregrine UI available** — try the modern Vue interface (Beta)")
with col2:
if st.button("Try it", key="_ui_banner_try"):
switch_ui(yaml_path, to="vue", tier=tier)
with col3:
if st.button("Dismiss", key="_ui_banner_dismiss"):
profile.dismissed_banners = list(profile.dismissed_banners or []) + ["ui_switcher_beta"]
profile.save()
st.rerun()
def render_sidebar_switcher(yaml_path: Path, tier: str) -> None:
"""Persistent sidebar button to switch to the Vue UI.
Shown when the user is eligible (paid+ or demo) and currently on Streamlit.
This is always visible unlike the banner which can be dismissed.
"""
eligible = _DEMO_MODE or can_use(tier, "vue_ui_beta")
if not eligible:
return
try:
profile = UserProfile(yaml_path)
if profile.ui_preference == "vue":
return
except Exception:
pass
if st.button("✨ Switch to New UI", key="_sidebar_switch_vue", use_container_width=True):
switch_ui(yaml_path, to="vue", tier=tier)
def render_settings_toggle(yaml_path: Path, tier: str) -> None:
"""Toggle in Settings → System → Deployment expander."""
eligible = _DEMO_MODE or can_use(tier, "vue_ui_beta")
if not eligible:
return
try:
profile = UserProfile(yaml_path)
current = profile.ui_preference
except Exception:
# UI components must not crash the app — silent fallback to default
current = "streamlit"
options = ["streamlit", "vue"]
labels = ["Classic (Streamlit)", "✨ New UI (Vue, Beta)"]
current_idx = options.index(current) if current in options else 0
st.markdown("**UI Version**")
chosen = st.radio(
"UI Version",
options=labels,
index=current_idx,
key="_ui_toggle_radio",
label_visibility="collapsed",
)
chosen_val = options[labels.index(chosen)]
if chosen_val != current:
switch_ui(yaml_path, to=chosen_val, tier=tier)

View file

@ -403,9 +403,10 @@ elif step == 4:
st.caption("Used in cover letter PDFs, LLM prompts, and the app header.")
c1, c2 = st.columns(2)
name = c1.text_input("Full Name *", saved_yaml.get("name", ""))
email = c1.text_input("Email *", saved_yaml.get("email", ""))
phone = c2.text_input("Phone", saved_yaml.get("phone", ""))
_parsed = st.session_state.get("_parsed_resume", {})
name = c1.text_input("Full Name *", saved_yaml.get("name") or _parsed.get("name", ""))
email = c1.text_input("Email *", saved_yaml.get("email") or _parsed.get("email", ""))
phone = c2.text_input("Phone", saved_yaml.get("phone") or _parsed.get("phone", ""))
linkedin = c2.text_input("LinkedIn URL", saved_yaml.get("linkedin", ""))
# Career summary with optional LLM generation — resume text available now (step 3 ran first)
@ -456,6 +457,11 @@ elif step == 5:
from app.wizard.step_inference import validate
st.subheader("Step 5 \u2014 Inference & API Keys")
st.info(
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
"Peregrine auto-detects it, no config file needed. "
"Or use the fields below to configure API keys and endpoints."
)
profile = saved_yaml.get("inference_profile", "remote")
if profile == "remote":
@ -465,8 +471,18 @@ elif step == 5:
placeholder="https://api.together.xyz/v1")
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
key="oai_key") if openai_url else ""
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
placeholder="http://localhost:11434",
key="ollama_host_input")
ollama_model = st.text_input("Ollama model (optional)",
value="llama3.2:3b",
key="ollama_model_input")
else:
st.info(f"Local mode ({profile}): Ollama provides inference.")
import os
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
if _ollama_host_env:
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
anthropic_key = openai_url = openai_key = ""
with st.expander("Advanced \u2014 Service Ports & Hosts"):
@ -545,6 +561,14 @@ elif step == 5:
if anthropic_key or openai_url:
env_path.write_text("\n".join(env_lines) + "\n")
if profile == "remote":
if ollama_host:
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
if ollama_model:
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
if ollama_host or ollama_model:
env_path.write_text("\n".join(env_lines) + "\n")
_save_yaml({"services": svc, "wizard_step": 5})
st.session_state.wizard_step = 6
st.rerun()
@ -630,7 +654,7 @@ elif step == 6:
)
default_profile = {
"name": "default",
"job_titles": titles,
"titles": titles,
"locations": locations,
"remote_only": False,
"boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],

View file

@ -12,12 +12,15 @@ from scripts.db import (
DEFAULT_DB, init_db, get_jobs_by_status, update_job_status,
update_cover_letter, mark_applied, get_email_leads,
)
from app.cloud_session import resolve_session, get_db_path
resolve_session("peregrine")
st.title("📋 Job Review")
init_db(DEFAULT_DB)
init_db(get_db_path())
_email_leads = get_email_leads(DEFAULT_DB)
_email_leads = get_email_leads(get_db_path())
# ── Sidebar filters ────────────────────────────────────────────────────────────
with st.sidebar:
@ -37,7 +40,7 @@ with st.sidebar:
index=0,
)
jobs = get_jobs_by_status(DEFAULT_DB, show_status)
jobs = get_jobs_by_status(get_db_path(), show_status)
if remote_only:
jobs = [j for j in jobs if j.get("is_remote")]
@ -86,11 +89,11 @@ if show_status == "pending" and _email_leads:
with right_l:
if st.button("✅ Approve", key=f"el_approve_{lead_id}",
type="primary", use_container_width=True):
update_job_status(DEFAULT_DB, [lead_id], "approved")
update_job_status(get_db_path(), [lead_id], "approved")
st.rerun()
if st.button("❌ Reject", key=f"el_reject_{lead_id}",
use_container_width=True):
update_job_status(DEFAULT_DB, [lead_id], "rejected")
update_job_status(get_db_path(), [lead_id], "rejected")
st.rerun()
st.divider()
@ -162,7 +165,7 @@ for job in jobs:
)
save_col, _ = st.columns([2, 5])
if save_col.button("💾 Save draft", key=f"save_cl_{job_id}"):
update_cover_letter(DEFAULT_DB, job_id, st.session_state[_cl_key])
update_cover_letter(get_db_path(), job_id, st.session_state[_cl_key])
st.success("Saved!")
# Applied date + cover letter preview (applied/synced)
@ -182,11 +185,11 @@ for job in jobs:
if show_status == "pending":
if st.button("✅ Approve", key=f"approve_{job_id}",
type="primary", use_container_width=True):
update_job_status(DEFAULT_DB, [job_id], "approved")
update_job_status(get_db_path(), [job_id], "approved")
st.rerun()
if st.button("❌ Reject", key=f"reject_{job_id}",
use_container_width=True):
update_job_status(DEFAULT_DB, [job_id], "rejected")
update_job_status(get_db_path(), [job_id], "rejected")
st.rerun()
elif show_status == "approved":
@ -198,6 +201,6 @@ for job in jobs:
use_container_width=True):
cl_text = st.session_state.get(f"cl_{job_id}", "")
if cl_text:
update_cover_letter(DEFAULT_DB, job_id, cl_text)
mark_applied(DEFAULT_DB, [job_id])
update_cover_letter(get_db_path(), job_id, cl_text)
mark_applied(get_db_path(), [job_id])
st.rerun()

View file

@ -323,6 +323,26 @@ with tab_search:
_run_suggest = st.button("✨ Suggest", key="sp_suggest_btn",
help="Ask the LLM to suggest additional titles and smarter exclude keywords — using your blocklist, mission values, and career background.")
_title_sugg_count = len((st.session_state.get("_sp_suggestions") or {}).get("suggested_titles", []))
if _title_sugg_count:
st.markdown(f"""<style>
@keyframes _pg_arrow_float {{
0%, 100% {{
transform: translateY(0px);
filter: drop-shadow(0 0 2px #4fc3f7);
}}
50% {{
transform: translateY(4px);
filter: drop-shadow(0 0 8px #4fc3f7);
}}
}}
/* Target the expand-arrow SVG inside the multiselect dropdown indicator */
.stMultiSelect [data-baseweb="select"] > div + div svg {{
animation: _pg_arrow_float 1.3s ease-in-out infinite;
cursor: pointer;
}}
</style>""", unsafe_allow_html=True)
st.multiselect(
"Job titles",
options=st.session_state.get("_sp_title_options", p.get("titles", [])),
@ -330,6 +350,14 @@ with tab_search:
help="Select from known titles. Suggestions from ✨ Suggest appear here — pick the ones you want.",
label_visibility="collapsed",
)
if _title_sugg_count:
st.markdown(
f'<div style="font-size:0.8em; color:#4fc3f7; margin-top:-10px; margin-bottom:4px;">'
f'&nbsp;↑&nbsp;{_title_sugg_count} new suggestion{"s" if _title_sugg_count != 1 else ""} '
f'added — open the dropdown to browse</div>',
unsafe_allow_html=True,
)
_add_t_col, _add_t_btn = st.columns([5, 1])
with _add_t_col:
st.text_input("Add a title", key="_sp_new_title", label_visibility="collapsed",
@ -373,21 +401,31 @@ with tab_search:
with st.spinner("Asking LLM for suggestions…"):
try:
suggestions = _suggest_search_terms(_current_titles, RESUME_PATH, _blocklist, _user_profile)
except RuntimeError as _e:
except Exception as _e:
_err_msg = str(_e)
if "exhausted" in _err_msg.lower() or isinstance(_e, RuntimeError):
st.warning(
f"No LLM backend available: {_e}. "
f"No LLM backend available: {_err_msg}. "
"Check that Ollama is running and has GPU access, or enable a cloud backend in Settings → System → LLM.",
icon="⚠️",
)
else:
st.error(f"Suggestion failed: {_err_msg}", icon="🚨")
suggestions = None
if suggestions is not None:
# Add suggested titles to options list (not auto-selected — user picks from dropdown)
_opts = list(st.session_state.get("_sp_title_options", []))
for _t in suggestions.get("suggested_titles", []):
if _t not in _opts:
_opts.append(_t)
_new_titles = [_t for _t in suggestions.get("suggested_titles", []) if _t not in _opts]
_opts.extend(_new_titles)
st.session_state["_sp_title_options"] = _opts
st.session_state["_sp_suggestions"] = suggestions
if not _new_titles and not suggestions.get("suggested_excludes"):
_resume_hint = " Upload your resume in Settings → Resume Profile for better results." if not RESUME_PATH.exists() else ""
st.info(
f"No new suggestions found — the LLM didn't generate anything new for these titles.{_resume_hint}",
icon="",
)
else:
st.rerun()
if st.session_state.get("_sp_suggestions"):
@ -786,6 +824,7 @@ with tab_resume:
try:
_kw_sugg = _suggest_resume_keywords(RESUME_PATH, _kw_current)
st.session_state["_kw_suggestions"] = _kw_sugg
st.rerun()
except RuntimeError as _e:
st.warning(
f"No LLM backend available: {_e}. "
@ -812,6 +851,13 @@ with tab_resume:
kw_current: list[str] = kw_data.get(kw_category, [])
kw_suggestions = _load_sugg(kw_category)
# If a custom tag was added last render, clear the multiselect's session
# state key NOW (before the widget is created) so Streamlit uses `default`
# instead of the stale session state that lacks the new tag.
_reset_key = f"_kw_reset_{kw_category}"
if st.session_state.pop(_reset_key, False):
st.session_state.pop(f"kw_ms_{kw_category}", None)
# Merge: suggestions first, then any custom tags not in suggestions
kw_custom = [t for t in kw_current if t not in kw_suggestions]
kw_options = kw_suggestions + kw_custom
@ -832,6 +878,7 @@ with tab_resume:
label_visibility="collapsed",
placeholder=f"Custom: {kw_placeholder}",
)
_tag_just_added = False
if kw_btn_col.button("", key=f"kw_add_{kw_category}", help="Add custom tag"):
cleaned = _filter_tag(kw_raw)
if cleaned is None:
@ -839,13 +886,19 @@ with tab_resume:
elif cleaned in kw_options:
st.info(f"'{cleaned}' is already in the list — select it above.")
else:
# Persist custom tag: add to YAML and session state so it appears in options
# Save to YAML and set a reset flag so the multiselect session
# state is cleared before the widget renders on the next rerun,
# allowing `default` (which includes the new tag) to take effect.
kw_new_list = kw_selected + [cleaned]
st.session_state[_reset_key] = True
kw_data[kw_category] = kw_new_list
kw_changed = True
_tag_just_added = True
# Detect multiselect changes
if sorted(kw_selected) != sorted(kw_current):
# Detect multiselect changes. Skip when a tag was just added — the change
# detection would otherwise overwrite kw_data with the old kw_selected
# (which doesn't include the new tag) in the same render.
if not _tag_just_added and sorted(kw_selected) != sorted(kw_current):
kw_data[kw_category] = kw_selected
kw_changed = True
@ -998,6 +1051,11 @@ with tab_system:
_env_path.write_text("\n".join(_env_lines) + "\n")
st.success("Deployment settings saved. Run `./manage.sh restart` to apply.")
st.divider()
from app.components.ui_switcher import render_settings_toggle as _render_ui_toggle
_ui_tier = _profile.tier if _profile else "free"
_render_ui_toggle(yaml_path=_USER_YAML, tier=_ui_tier)
st.divider()
# ── LLM Backends ─────────────────────────────────────────────────────────

View file

@ -15,28 +15,28 @@ import streamlit.components.v1 as components
import yaml
from scripts.user_profile import UserProfile
_USER_YAML = Path(__file__).parent.parent.parent / "config" / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
from scripts.db import (
DEFAULT_DB, init_db, get_jobs_by_status,
update_cover_letter, mark_applied, update_job_status,
get_task_for_job,
)
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path
from app.cloud_session import resolve_session, get_db_path, get_config_dir
from app.telemetry import log_usage_event
DOCS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
RESUME_YAML = Path(__file__).parent.parent.parent / "config" / "plain_text_resume.yaml"
st.title("🚀 Apply Workspace")
resolve_session("peregrine")
init_db(get_db_path())
_CONFIG_DIR = get_config_dir()
_USER_YAML = _CONFIG_DIR / "user.yaml"
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
_name = _profile.name if _profile else "Job Seeker"
DOCS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
RESUME_YAML = _CONFIG_DIR / "plain_text_resume.yaml"
# ── PDF generation ─────────────────────────────────────────────────────────────
def _make_cover_letter_pdf(job: dict, cover_letter: str, output_dir: Path) -> Path:
from reportlab.lib.pagesizes import letter

View file

@ -31,31 +31,41 @@ _name = _profile.name if _profile else "Job Seeker"
from scripts.db import (
DEFAULT_DB, init_db,
get_interview_jobs, advance_to_stage, reject_at_stage,
set_interview_date, add_contact, get_contacts,
set_interview_date, set_calendar_event_id, add_contact, get_contacts,
get_research, get_task_for_job, get_job_by_id,
get_unread_stage_signals, dismiss_stage_signal,
)
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path
resolve_session("peregrine")
_CONFIG_DIR = Path(__file__).parent.parent.parent / "config"
_CALENDAR_INTEGRATIONS = ("apple_calendar", "google_calendar")
_calendar_connected = any(
(_CONFIG_DIR / "integrations" / f"{n}.yaml").exists()
for n in _CALENDAR_INTEGRATIONS
)
st.title("🎯 Interviews")
init_db(DEFAULT_DB)
init_db(get_db_path())
# ── Sidebar: Email sync ────────────────────────────────────────────────────────
with st.sidebar:
st.markdown("### 📧 Email Sync")
_email_task = get_task_for_job(DEFAULT_DB, "email_sync", 0)
_email_task = get_task_for_job(get_db_path(), "email_sync", 0)
_email_running = _email_task and _email_task["status"] in ("queued", "running")
if st.button("🔄 Sync Emails", use_container_width=True, type="primary",
disabled=bool(_email_running)):
submit_task(DEFAULT_DB, "email_sync", 0)
submit_task(get_db_path(), "email_sync", 0)
st.rerun()
if _email_running:
@st.fragment(run_every=4)
def _email_sidebar_status():
t = get_task_for_job(DEFAULT_DB, "email_sync", 0)
t = get_task_for_job(get_db_path(), "email_sync", 0)
if t and t["status"] in ("queued", "running"):
st.info("⏳ Syncing…")
else:
@ -92,7 +102,7 @@ STAGE_NEXT_LABEL = {
}
# ── Data ──────────────────────────────────────────────────────────────────────
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
jobs_by_stage = get_interview_jobs(get_db_path())
# ── Helpers ───────────────────────────────────────────────────────────────────
def _days_ago(date_str: str | None) -> str:
@ -113,8 +123,8 @@ def _days_ago(date_str: str | None) -> str:
def _research_modal(job: dict) -> None:
job_id = job["id"]
st.caption(f"**{job.get('company')}** — {job.get('title')}")
research = get_research(DEFAULT_DB, job_id=job_id)
task = get_task_for_job(DEFAULT_DB, "company_research", job_id)
research = get_research(get_db_path(), job_id=job_id)
task = get_task_for_job(get_db_path(), "company_research", job_id)
running = task and task["status"] in ("queued", "running")
if running:
@ -137,7 +147,7 @@ def _research_modal(job: dict) -> None:
"inaccuracies. SearXNG is now available — re-run to get verified facts."
)
if st.button("🔄 Re-run with live data", key=f"modal_rescrape_{job_id}", type="primary"):
submit_task(DEFAULT_DB, "company_research", job_id)
submit_task(get_db_path(), "company_research", job_id)
st.rerun()
st.divider()
else:
@ -153,14 +163,14 @@ def _research_modal(job: dict) -> None:
)
st.markdown(research["raw_output"])
if st.button("🔄 Refresh", key=f"modal_regen_{job_id}", disabled=bool(running)):
submit_task(DEFAULT_DB, "company_research", job_id)
submit_task(get_db_path(), "company_research", job_id)
st.rerun()
else:
st.info("No research brief yet.")
if task and task["status"] == "failed":
st.error(f"Last attempt failed: {task.get('error', '')}")
if st.button("🔬 Generate now", key=f"modal_gen_{job_id}"):
submit_task(DEFAULT_DB, "company_research", job_id)
submit_task(get_db_path(), "company_research", job_id)
st.rerun()
@ -168,7 +178,7 @@ def _research_modal(job: dict) -> None:
def _email_modal(job: dict) -> None:
job_id = job["id"]
st.caption(f"**{job.get('company')}** — {job.get('title')}")
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
contacts = get_contacts(get_db_path(), job_id=job_id)
if not contacts:
st.info("No emails logged yet. Use the form below to add one.")
@ -239,7 +249,7 @@ def _email_modal(job: dict) -> None:
body_text = st.text_area("Body / notes", height=80, key=f"body_modal_{job_id}")
if st.form_submit_button("📧 Save contact"):
add_contact(
DEFAULT_DB, job_id=job_id,
get_db_path(), job_id=job_id,
direction=direction, subject=subject,
from_addr=from_addr, body=body_text, received_at=recv_at,
)
@ -248,7 +258,7 @@ def _email_modal(job: dict) -> None:
def _render_card(job: dict, stage: str, compact: bool = False) -> None:
"""Render a single job card appropriate for the given stage."""
job_id = job["id"]
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
contacts = get_contacts(get_db_path(), job_id=job_id)
last_contact = contacts[-1] if contacts else None
with st.container(border=True):
@ -271,13 +281,26 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
format="YYYY-MM-DD",
)
if st.form_submit_button("📅 Save date"):
set_interview_date(DEFAULT_DB, job_id=job_id, date_str=str(new_date))
set_interview_date(get_db_path(), job_id=job_id, date_str=str(new_date))
st.success("Saved!")
st.rerun()
# Calendar push — only shown when a date is saved and an integration is connected
if current_idate and _calendar_connected:
_has_event = bool(job.get("calendar_event_id"))
_cal_label = "🔄 Update Calendar" if _has_event else "📅 Add to Calendar"
if st.button(_cal_label, key=f"cal_push_{job_id}", use_container_width=True):
from scripts.calendar_push import push_interview_event
result = push_interview_event(get_db_path(), job_id=job_id, config_dir=_CONFIG_DIR)
if result["ok"]:
st.success(f"Event {'updated' if _has_event else 'added'} ({result['provider'].replace('_', ' ').title()})")
st.rerun()
else:
st.error(result["error"])
if not compact:
if stage in ("applied", "phone_screen", "interviewing"):
signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
signals = get_unread_stage_signals(get_db_path(), job_id=job_id)
if signals:
sig = signals[-1]
_SIGNAL_TO_STAGE = {
@ -298,23 +321,23 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
if sig["stage_signal"] == "rejected":
if b1.button("✗ Reject", key=f"sig_rej_{sig['id']}",
use_container_width=True):
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
dismiss_stage_signal(DEFAULT_DB, sig["id"])
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
dismiss_stage_signal(get_db_path(), sig["id"])
st.rerun(scope="app")
elif target_stage and b1.button(
f"{target_label}", key=f"sig_adv_{sig['id']}",
use_container_width=True, type="primary",
):
if target_stage == "phone_screen" and stage == "applied":
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
submit_task(DEFAULT_DB, "company_research", job_id)
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
submit_task(get_db_path(), "company_research", job_id)
elif target_stage:
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
dismiss_stage_signal(DEFAULT_DB, sig["id"])
advance_to_stage(get_db_path(), job_id=job_id, stage=target_stage)
dismiss_stage_signal(get_db_path(), sig["id"])
st.rerun(scope="app")
if b2.button("Dismiss", key=f"sig_dis_{sig['id']}",
use_container_width=True):
dismiss_stage_signal(DEFAULT_DB, sig["id"])
dismiss_stage_signal(get_db_path(), sig["id"])
st.rerun()
# Advance / Reject buttons
@ -326,16 +349,16 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
f"{next_label}", key=f"adv_{job_id}",
use_container_width=True, type="primary",
):
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=next_stage)
advance_to_stage(get_db_path(), job_id=job_id, stage=next_stage)
if next_stage == "phone_screen":
submit_task(DEFAULT_DB, "company_research", job_id)
submit_task(get_db_path(), "company_research", job_id)
st.rerun(scope="app") # full rerun — card must appear in new column
if c2.button(
"✗ Reject", key=f"rej_{job_id}",
use_container_width=True,
):
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
st.rerun() # fragment-scope rerun — card disappears without scroll-to-top
if job.get("url"):
@ -365,7 +388,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
@st.fragment
def _card_fragment(job_id: int, stage: str) -> None:
"""Re-fetches the job on each fragment rerun; renders nothing if moved/rejected."""
job = get_job_by_id(DEFAULT_DB, job_id)
job = get_job_by_id(get_db_path(), job_id)
if job is None or job.get("status") != stage:
return
_render_card(job, stage)
@ -374,11 +397,11 @@ def _card_fragment(job_id: int, stage: str) -> None:
@st.fragment
def _pre_kanban_row_fragment(job_id: int) -> None:
"""Pre-kanban compact row for applied and survey-stage jobs."""
job = get_job_by_id(DEFAULT_DB, job_id)
job = get_job_by_id(get_db_path(), job_id)
if job is None or job.get("status") not in ("applied", "survey"):
return
stage = job["status"]
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
contacts = get_contacts(get_db_path(), job_id=job_id)
last_contact = contacts[-1] if contacts else None
with st.container(border=True):
@ -394,7 +417,7 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
_email_modal(job)
# Stage signal hint (email-detected next steps)
signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
signals = get_unread_stage_signals(get_db_path(), job_id=job_id)
if signals:
sig = signals[-1]
_SIGNAL_TO_STAGE = {
@ -417,15 +440,15 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
use_container_width=True, type="primary",
):
if target_stage == "phone_screen":
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
submit_task(DEFAULT_DB, "company_research", job_id)
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
submit_task(get_db_path(), "company_research", job_id)
else:
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
dismiss_stage_signal(DEFAULT_DB, sig["id"])
advance_to_stage(get_db_path(), job_id=job_id, stage=target_stage)
dismiss_stage_signal(get_db_path(), sig["id"])
st.rerun(scope="app")
if s2.button("Dismiss", key=f"sig_dis_pre_{sig['id']}",
use_container_width=True):
dismiss_stage_signal(DEFAULT_DB, sig["id"])
dismiss_stage_signal(get_db_path(), sig["id"])
st.rerun()
with right:
@ -433,24 +456,24 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
"→ 📞 Phone Screen", key=f"adv_pre_{job_id}",
use_container_width=True, type="primary",
):
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
submit_task(DEFAULT_DB, "company_research", job_id)
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
submit_task(get_db_path(), "company_research", job_id)
st.rerun(scope="app")
col_a, col_b = st.columns(2)
if stage == "applied" and col_a.button(
"📋 Survey", key=f"to_survey_{job_id}", use_container_width=True,
):
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="survey")
advance_to_stage(get_db_path(), job_id=job_id, stage="survey")
st.rerun(scope="app")
if col_b.button("✗ Reject", key=f"rej_pre_{job_id}", use_container_width=True):
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
st.rerun()
@st.fragment
def _hired_card_fragment(job_id: int) -> None:
"""Compact hired job card — shown in the Offer/Hired column."""
job = get_job_by_id(DEFAULT_DB, job_id)
job = get_job_by_id(get_db_path(), job_id)
if job is None or job.get("status") != "hired":
return
with st.container(border=True):

View file

@ -25,11 +25,14 @@ from scripts.db import (
get_task_for_job,
)
from scripts.task_runner import submit_task
from app.cloud_session import resolve_session, get_db_path
init_db(DEFAULT_DB)
resolve_session("peregrine")
init_db(get_db_path())
# ── Job selection ─────────────────────────────────────────────────────────────
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
jobs_by_stage = get_interview_jobs(get_db_path())
active_stages = ["phone_screen", "interviewing", "offer"]
active_jobs = [
j for stage in active_stages
@ -100,10 +103,10 @@ col_prep, col_context = st.columns([2, 3])
# ════════════════════════════════════════════════
with col_prep:
research = get_research(DEFAULT_DB, job_id=selected_id)
research = get_research(get_db_path(), job_id=selected_id)
# Refresh / generate research
_res_task = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
_res_task = get_task_for_job(get_db_path(), "company_research", selected_id)
_res_running = _res_task and _res_task["status"] in ("queued", "running")
if not research:
@ -112,13 +115,13 @@ with col_prep:
if _res_task and _res_task["status"] == "failed":
st.error(f"Last attempt failed: {_res_task.get('error', '')}")
if st.button("🔬 Generate research brief", type="primary", use_container_width=True):
submit_task(DEFAULT_DB, "company_research", selected_id)
submit_task(get_db_path(), "company_research", selected_id)
st.rerun()
if _res_running:
@st.fragment(run_every=3)
def _res_status_initial():
t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
t = get_task_for_job(get_db_path(), "company_research", selected_id)
if t and t["status"] in ("queued", "running"):
stage = t.get("stage") or ""
lbl = "Queued…" if t["status"] == "queued" else (stage or "Generating… this may take 3060 seconds")
@ -133,13 +136,13 @@ with col_prep:
col_ts, col_btn = st.columns([3, 1])
col_ts.caption(f"Research generated: {generated_at}")
if col_btn.button("🔄 Refresh", use_container_width=True, disabled=bool(_res_running)):
submit_task(DEFAULT_DB, "company_research", selected_id)
submit_task(get_db_path(), "company_research", selected_id)
st.rerun()
if _res_running:
@st.fragment(run_every=3)
def _res_status_refresh():
t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
t = get_task_for_job(get_db_path(), "company_research", selected_id)
if t and t["status"] in ("queued", "running"):
stage = t.get("stage") or ""
lbl = "Queued…" if t["status"] == "queued" else (stage or "Refreshing research…")
@ -224,7 +227,11 @@ with col_prep:
st.markdown(msg["content"])
# Initial question if session is empty
if not st.session_state[qa_key]:
import os as _os
_is_demo = _os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
if not st.session_state[qa_key] and _is_demo:
st.info("AI features are disabled in the public demo. Run your own instance to use Practice Q&A.")
elif not st.session_state[qa_key]:
with st.spinner("Setting up your mock interview…"):
try:
from scripts.llm_router import complete
@ -307,7 +314,7 @@ with col_context:
st.markdown(job.get("description") or "_No description saved for this listing._")
with tab_emails:
contacts = get_contacts(DEFAULT_DB, job_id=selected_id)
contacts = get_contacts(get_db_path(), job_id=selected_id)
if not contacts:
st.info("No contacts logged yet. Use the Interviews page to log emails.")
else:

View file

@ -22,10 +22,13 @@ from scripts.db import (
insert_survey_response, get_survey_responses,
)
from scripts.llm_router import LLMRouter
from app.cloud_session import resolve_session, get_db_path
resolve_session("peregrine")
st.title("📋 Survey Assistant")
init_db(DEFAULT_DB)
init_db(get_db_path())
# ── Vision service health check ────────────────────────────────────────────────
@ -40,7 +43,7 @@ def _vision_available() -> bool:
vision_up = _vision_available()
# ── Job selector ───────────────────────────────────────────────────────────────
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
jobs_by_stage = get_interview_jobs(get_db_path())
survey_jobs = jobs_by_stage.get("survey", [])
other_jobs = (
jobs_by_stage.get("applied", []) +
@ -61,7 +64,7 @@ selected_job_id = st.selectbox(
format_func=lambda jid: job_labels[jid],
index=0,
)
selected_job = get_job_by_id(DEFAULT_DB, selected_job_id)
selected_job = get_job_by_id(get_db_path(), selected_job_id)
# ── LLM prompt builders ────────────────────────────────────────────────────────
_SURVEY_SYSTEM = (
@ -236,7 +239,7 @@ with right_col:
image_path = str(img_file)
insert_survey_response(
DEFAULT_DB,
get_db_path(),
job_id=selected_job_id,
survey_name=survey_name,
source=source,
@ -256,7 +259,7 @@ with right_col:
# ── History ────────────────────────────────────────────────────────────────────
st.divider()
st.subheader("📂 Response History")
history = get_survey_responses(DEFAULT_DB, job_id=selected_job_id)
history = get_survey_responses(get_db_path(), job_id=selected_job_id)
if not history:
st.caption("No saved responses for this job yet.")

View file

@ -1,7 +1,7 @@
"""
Tier definitions and feature gates for Peregrine.
Tiers: free < paid < premium
Tiers: free < paid < premium < ultra (ultra reserved; no Peregrine features use it yet)
FEATURES maps feature key minimum tier required.
Features not in FEATURES are available to all tiers (free).
@ -22,9 +22,14 @@ Features that stay gated even with BYOK:
"""
from __future__ import annotations
import os as _os
from pathlib import Path
TIERS = ["free", "paid", "premium"]
from circuitforge_core.tiers import (
can_use as _core_can_use,
TIERS,
tier_label as _core_tier_label,
)
# Maps feature key → minimum tier string required.
# Features absent from this dict are free (available to all).
@ -58,6 +63,9 @@ FEATURES: dict[str, str] = {
"google_calendar_sync": "paid",
"apple_calendar_sync": "paid",
"slack_notifications": "paid",
# Beta UI access — open to all tiers (access management, not compute)
"vue_ui_beta": "free",
}
# Features that unlock when the user supplies any LLM backend (local or BYOK).
@ -75,6 +83,13 @@ BYOK_UNLOCKABLE: frozenset[str] = frozenset({
"survey_assistant",
})
# Demo mode flag — read from environment at module load time.
# Allows demo toolbar to override tier without accessing st.session_state (thread-safe).
# _DEMO_MODE is immutable after import for the process lifetime.
# DEMO_MODE must be set in the environment before the process starts (e.g., via
# Docker Compose environment:). Runtime toggling is not supported.
_DEMO_MODE = _os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
# Free integrations (not in FEATURES):
# google_drive_sync, dropbox_sync, onedrive_sync, mega_sync,
# nextcloud_sync, discord_notifications, home_assistant
@ -101,34 +116,40 @@ def has_configured_llm(config_path: Path | None = None) -> bool:
return False
def can_use(tier: str, feature: str, has_byok: bool = False) -> bool:
def can_use(
tier: str,
feature: str,
has_byok: bool = False,
*,
demo_tier: str | None = None,
) -> bool:
"""Return True if the given tier has access to the feature.
has_byok: pass has_configured_llm() to unlock BYOK_UNLOCKABLE features
for users who supply their own LLM backend regardless of tier.
demo_tier: when set AND _DEMO_MODE is True, substitutes for `tier`.
Read from st.session_state by the *caller*, not here keeps
this function thread-safe for background tasks and tests.
Returns True for unknown features (not gated).
Returns False for unknown/invalid tier strings.
"""
required = FEATURES.get(feature)
if required is None:
return True # not gated — available to all
effective_tier = demo_tier if (demo_tier is not None and _DEMO_MODE) else tier
# Pass Peregrine's BYOK_UNLOCKABLE via has_byok collapse — core's frozenset is empty
if has_byok and feature in BYOK_UNLOCKABLE:
return True
try:
return TIERS.index(tier) >= TIERS.index(required)
except ValueError:
return False # invalid tier string
return _core_can_use(feature, effective_tier, _features=FEATURES)
def tier_label(feature: str, has_byok: bool = False) -> str:
"""Return a display label for a locked feature, or '' if free/unlocked."""
if has_byok and feature in BYOK_UNLOCKABLE:
return ""
required = FEATURES.get(feature)
if required is None:
raw = _core_tier_label(feature, _features=FEATURES)
if not raw or raw == "free":
return ""
return "🔒 Paid" if required == "paid" else "⭐ Premium"
return "🔒 Paid" if raw == "paid" else "⭐ Premium"
def effective_tier(

View file

@ -13,12 +13,15 @@
services:
app:
build: .
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
container_name: peregrine-cloud
ports:
- "8505:8501"
volumes:
- /devl/menagerie-data:/devl/menagerie-data # per-user data trees
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro # cloud-safe backends only (no claude_code/copilot/anthropic)
environment:
- CLOUD_MODE=true
- CLOUD_DATA_ROOT=/devl/menagerie-data
@ -31,7 +34,10 @@ services:
- DOCS_DIR=/tmp/cloud-docs
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
- PYTHONUNBUFFERED=1
- PEREGRINE_CADDY_PROXY=1
- CF_ORCH_URL=http://host.docker.internal:7700
- DEMO_MODE=false
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
depends_on:
searxng:
condition: service_healthy
@ -39,6 +45,42 @@ services:
- "host.docker.internal:host-gateway"
restart: unless-stopped
api:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
command: >
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
volumes:
- /devl/menagerie-data:/devl/menagerie-data
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro
environment:
- CLOUD_MODE=true
- CLOUD_DATA_ROOT=/devl/menagerie-data
- STAGING_DB=/devl/menagerie-data/cloud-default.db
- DIRECTUS_JWT_SECRET=${DIRECTUS_JWT_SECRET}
- CF_SERVER_SECRET=${CF_SERVER_SECRET}
- PLATFORM_DB_URL=${PLATFORM_DB_URL}
- HEIMDALL_URL=${HEIMDALL_URL:-http://cf-license:8000}
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
- PYTHONUNBUFFERED=1
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
web:
build:
context: .
dockerfile: docker/web/Dockerfile
args:
VITE_BASE_PATH: /peregrine/
ports:
- "8508:80"
depends_on:
- api
restart: unless-stopped
searxng:
image: searxng/searxng:latest
volumes:

View file

@ -38,6 +38,16 @@ services:
- "host.docker.internal:host-gateway"
restart: unless-stopped
web:
build:
context: .
dockerfile: docker/web/Dockerfile
args:
VITE_BASE_PATH: /peregrine/
ports:
- "8507:80"
restart: unless-stopped
searxng:
image: searxng/searxng:latest
volumes:

35
compose.test-cfcore.yml Normal file
View file

@ -0,0 +1,35 @@
# compose.test-cfcore.yml — single-user test instance for circuitforge-core integration
#
# Run from the PARENT directory of peregrine/ (the build context must include
# both peregrine/ and circuitforge-core/ as siblings):
#
# cd /devl (or /Library/Development/CircuitForge on dev)
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test up -d
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test logs -f
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test down
#
# UI: http://localhost:8516
# Purpose: smoke-test circuitforge-core shims (db, llm_router, tiers, task_scheduler)
# before promoting cfcore integration to the production cloud instance.
services:
app:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
container_name: peregrine-test-cfcore
ports:
- "8516:8501"
volumes:
- /devl/job-seeker:/devl/job-seeker
- /devl/job-seeker/config:/app/config
- /devl/job-seeker/config/llm.docker.yaml:/app/config/llm.yaml:ro
- /devl/job-seeker/config/user.docker.yaml:/app/config/user.yaml:ro
environment:
- STAGING_DB=/devl/job-seeker/staging.db
- PYTHONUNBUFFERED=1
- STREAMLIT_SERVER_BASE_URL_PATH=
- CF_ORCH_URL=http://host.docker.internal:7700
extra_hosts:
- "host.docker.internal:host-gateway"
restart: "no"

View file

@ -1,9 +1,11 @@
# compose.yml — Peregrine by Circuit Forge LLC
# Profiles: remote | cpu | single-gpu | dual-gpu-ollama | dual-gpu-vllm | dual-gpu-mixed
# Profiles: remote | cpu | single-gpu | dual-gpu-ollama
services:
app:
build: .
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
command: >
bash -c "streamlit run app/app.py
--server.port=8501
@ -33,6 +35,7 @@ services:
- FORGEJO_API_URL=${FORGEJO_API_URL:-}
- PYTHONUNBUFFERED=1
- PYTHONLOGGING=WARNING
- PEREGRINE_CADDY_PROXY=1
depends_on:
searxng:
condition: service_healthy
@ -40,6 +43,39 @@ services:
- "host.docker.internal:host-gateway"
restart: unless-stopped
api:
build:
context: ..
dockerfile: peregrine/Dockerfile.cfcore
command: >
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
volumes:
- ./config:/app/config
- ./data:/app/data
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
environment:
- STAGING_DB=/app/data/staging.db
- DOCS_DIR=/docs
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
- PYTHONUNBUFFERED=1
extra_hosts:
- "host.docker.internal:host-gateway"
restart: unless-stopped
web:
build:
context: .
dockerfile: docker/web/Dockerfile
ports:
- "${VUE_PORT:-8506}:80"
depends_on:
- api
restart: unless-stopped
searxng:
image: searxng/searxng:latest
ports:
@ -93,23 +129,6 @@ services:
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
restart: unless-stopped
vllm:
image: vllm/vllm-openai:latest
ports:
- "${VLLM_PORT:-8000}:8000"
volumes:
- ${VLLM_MODELS_DIR:-~/models/vllm}:/models
command: >
--model /models/${VLLM_MODEL:-Ouro-1.4B}
--trust-remote-code
--max-model-len 4096
--gpu-memory-utilization 0.75
--enforce-eager
--max-num-seqs 8
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
profiles: [dual-gpu-vllm, dual-gpu-mixed]
restart: unless-stopped
finetune:
build:
context: .

62
config/llm.cloud.yaml Normal file
View file

@ -0,0 +1,62 @@
backends:
anthropic:
api_key_env: ANTHROPIC_API_KEY
enabled: false
model: claude-sonnet-4-6
supports_images: true
type: anthropic
claude_code:
api_key: any
base_url: http://localhost:3009/v1
enabled: false
model: claude-code-terminal
supports_images: true
type: openai_compat
github_copilot:
api_key: any
base_url: http://localhost:3010/v1
enabled: false
model: gpt-4o
supports_images: false
type: openai_compat
ollama:
api_key: ollama
base_url: http://host.docker.internal:11434/v1
enabled: true
model: llama3.1:8b # generic — no personal fine-tunes in cloud
supports_images: false
type: openai_compat
ollama_research:
api_key: ollama
base_url: http://host.docker.internal:11434/v1
enabled: true
model: llama3.1:8b
supports_images: false
type: openai_compat
vision_service:
base_url: http://host.docker.internal:8002
enabled: true
supports_images: true
type: vision_service
vllm:
api_key: ''
base_url: http://host.docker.internal:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
vllm_research:
api_key: ''
base_url: http://host.docker.internal:8000/v1
enabled: true
model: __auto__
supports_images: false
type: openai_compat
fallback_order:
- vllm
- ollama
research_fallback_order:
- vllm_research
- ollama_research
vision_fallback_order:
- vision_service

View file

@ -28,9 +28,9 @@ backends:
type: openai_compat
ollama_research:
api_key: ollama
base_url: http://host.docker.internal:11434/v1
base_url: http://ollama_research:11434/v1
enabled: true
model: llama3.2:3b
model: llama3.1:8b
supports_images: false
type: openai_compat
vision_service:
@ -45,6 +45,11 @@ backends:
model: __auto__
supports_images: false
type: openai_compat
cf_orch:
service: vllm
model_candidates:
- Qwen2.5-3B-Instruct
ttl_s: 300
vllm_research:
api_key: ''
base_url: http://host.docker.internal:8000/v1

View file

@ -43,6 +43,7 @@ dev_tier_override: null # overrides tier locally (for testing only)
wizard_complete: false
wizard_step: 0
dismissed_banners: []
ui_preference: streamlit # UI preference — "streamlit" (default) or "vue" (Beta: Paid tier)
docs_dir: "~/Documents/JobSearch"
ollama_models_dir: "~/models/ollama"

View file

@ -22,7 +22,7 @@ mission_preferences:
social_impact: Want my work to reach people who need it most.
name: Demo User
nda_companies: []
ollama_models_dir: ~/models/ollama
ollama_models_dir: /root/models/ollama
phone: ''
services:
ollama_host: localhost
@ -39,6 +39,7 @@ services:
vllm_ssl: false
vllm_ssl_verify: true
tier: free
vllm_models_dir: ~/models/vllm
ui_preference: streamlit
vllm_models_dir: /root/models/vllm
wizard_complete: true
wizard_step: 0

2793
dev-api.py Normal file

File diff suppressed because it is too large Load diff

1
dev_api.py Symbolic link
View file

@ -0,0 +1 @@
dev-api.py

15
docker/web/Dockerfile Normal file
View file

@ -0,0 +1,15 @@
# Stage 1: build
FROM node:20-alpine AS build
WORKDIR /app
COPY web/package*.json ./
RUN npm ci --prefer-offline
COPY web/ ./
ARG VITE_BASE_PATH=/
ENV VITE_BASE_PATH=${VITE_BASE_PATH}
RUN npm run build
# Stage 2: serve
FROM nginx:alpine
COPY docker/web/nginx.conf /etc/nginx/conf.d/default.conf
COPY --from=build /app/dist /usr/share/nginx/html
EXPOSE 80

29
docker/web/nginx.conf Normal file
View file

@ -0,0 +1,29 @@
server {
listen 80;
server_name _;
client_max_body_size 20m;
root /usr/share/nginx/html;
index index.html;
# Proxy API calls to the FastAPI backend service
location /api/ {
proxy_pass http://api:8601;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_read_timeout 120s;
}
# Cache static assets
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
# SPA fallback must come after API and assets
location / {
try_files $uri $uri/ /index.html;
}
}

View file

@ -50,6 +50,36 @@ community-contributed and CF-freebie scrapers (free, MIT, in `scripts/plugins/`)
---
## Discovery — Jobgether Non-Headless Scraper
Design doc: `peregrine/docs/superpowers/specs/2026-03-15-jobgether-integration-design.md`
**Background:** Headless Playwright is blocked by Cloudflare Turnstile on all `jobgether.com` pages.
A non-headless Playwright instance backed by `Xvfb` (virtual framebuffer) renders as a real browser and
bypasses Turnstile. Heimdall already has Xvfb available.
**Live-inspection findings (2026-03-15):**
- Search URL: `https://jobgether.com/search-offers?keyword=<query>`
- Job cards: `div.new-opportunity` — one per listing
- Card URL: `div.new-opportunity > a[href*="/offer/"]` (`href` attr)
- Title: `#offer-body h3`
- Company: `#offer-body p.font-medium`
- Dedup: existing URL-based dedup in `discover.py` covers Jobgether↔other-board overlap
**Implementation tasks (blocked until Xvfb-Playwright integration is in place):**
- [ ] Add `Xvfb` launch helper to `scripts/custom_boards/` (shared util, or inline in scraper)
- [ ] Implement `scripts/custom_boards/jobgether.py` using `p.chromium.launch(headless=False)` with `DISPLAY=:99`
- [ ] Pre-launch `Xvfb :99 -screen 0 1280x720x24` (or assert `DISPLAY` is already set)
- [ ] Register `jobgether` in `discover.py` `CUSTOM_SCRAPERS` (currently omitted — no viable scraper)
- [ ] Add `jobgether` to `custom_boards` in remote-eligible profiles in `config/search_profiles.yaml`
- [ ] Remove or update the "Jobgether discovery scraper — decided against" note in the design spec
**Pre-condition:** Validate Xvfb approach manually (headless=False + `DISPLAY=:99`) before implementing.
The `filter-api.jobgether.com` endpoint still requires auth and `robots.txt` still blocks bots —
confirm Turnstile acceptance is the only remaining blocker before beginning.
---
## Settings / Data Management
- **Backup / Restore / Teleport** — Settings panel option to export a full config snapshot (user.yaml + all gitignored configs) as a zip, restore from a snapshot, and "teleport" (export + import to a new machine or Docker volume). Useful for migrations, multi-machine setups, and safe wizard testing.
@ -63,6 +93,31 @@ community-contributed and CF-freebie scrapers (free, MIT, in `scripts/plugins/`)
---
## LinkedIn Import
Shipped in v0.4.0. Ongoing maintenance and known decisions:
- **Selector maintenance** — LinkedIn changes their DOM periodically. When import stops working, update
CSS selectors in `scripts/linkedin_utils.py` only (all other files import from there). Real `data-section`
attribute values (as of 2025 DOM): `summary`, `currentPositionsDetails`, `educationsDetails`,
`certifications`, `posts`, `volunteering`, `publications`, `projects`.
- **Data export zip is the recommended path for full history** — LinkedIn's unauthenticated public profile
page is server-side degraded: experience titles, past roles, education, and skills are blurred/omitted.
Only available without login: name, About summary (truncated), current employer name, certifications.
The "Import from LinkedIn data export zip" expander (Settings → Resume Profile and Wizard step 3) is the
correct path for full career history. UI already shows an `` callout explaining this.
- **LinkedIn OAuth — decided: not viable** — LinkedIn's OAuth API is restricted to approved partner
programs. Even if approved, it only grants name + email (not career history, experience, or skills).
This is a deliberate LinkedIn platform restriction, not a technical gap. Do not pursue this path.
- **Selector test harness** (future) — A lightweight test that fetches a known-public LinkedIn profile
and asserts at least N fields non-empty would catch DOM breakage before users report it. Low priority
until selector breakage becomes a recurring support issue.
---
## Cover Letter / Resume Generation
- ~~**Iterative refinement feedback loop**~~ — ✅ Done (`94225c9`): `generate()` accepts `previous_result`/`feedback`; task_runner parses params JSON; Apply Workspace has "Refine with Feedback" expander. Same pattern available for wizard `expand_bullets` via `_run_wizard_generate`.

View file

@ -102,6 +102,23 @@ Before opening a pull request:
---
## Database Migrations
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
### Adding a migration
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
### Rollbacks
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
---
## What NOT to Do
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored

File diff suppressed because it is too large Load diff

View file

@ -1,700 +0,0 @@
# Jobgether Integration Implementation Plan
> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Filter Jobgether listings out of all other scrapers, add a dedicated Jobgether scraper and URL scraper (Playwright-based), and add recruiter-aware cover letter framing for Jobgether jobs.
**Architecture:** Blocklist config handles filtering with zero code changes. A new `_scrape_jobgether()` in `scrape_url.py` handles manual URL imports via Playwright with URL slug fallback. A new `scripts/custom_boards/jobgether.py` handles discovery. Cover letter framing is an `is_jobgether` flag threaded from `task_runner.py``generate()``build_prompt()`.
**Tech Stack:** Python, Playwright (already installed), SQLite, PyTest, YAML config
**Spec:** `/Library/Development/CircuitForge/peregrine/docs/superpowers/specs/2026-03-15-jobgether-integration-design.md`
---
## Worktree Setup
- [ ] **Create worktree for this feature**
```bash
cd /Library/Development/CircuitForge/peregrine
git worktree add .worktrees/jobgether-integration -b feature/jobgether-integration
```
All implementation work happens in `/Library/Development/CircuitForge/peregrine/.worktrees/jobgether-integration/`.
---
## Chunk 1: Blocklist filter + scrape_url.py
### Task 1: Add Jobgether to blocklist
**Files:**
- Modify: `/Library/Development/CircuitForge/peregrine/config/blocklist.yaml`
- [ ] **Step 1: Edit blocklist.yaml**
```yaml
companies:
- jobgether
```
- [ ] **Step 2: Verify the existing `_is_blocklisted` test passes (or write one)**
Check `/Library/Development/CircuitForge/peregrine/tests/test_discover.py` for existing blocklist tests. If none cover company matching, add:
```python
def test_is_blocklisted_jobgether():
from scripts.discover import _is_blocklisted
blocklist = {"companies": ["jobgether"], "industries": [], "locations": []}
assert _is_blocklisted({"company": "Jobgether", "location": "", "description": ""}, blocklist)
assert _is_blocklisted({"company": "jobgether inc", "location": "", "description": ""}, blocklist)
assert not _is_blocklisted({"company": "Acme Corp", "location": "", "description": ""}, blocklist)
```
Run: `conda run -n job-seeker python -m pytest tests/test_discover.py -v -k "blocklist"`
Expected: PASS
- [ ] **Step 3: Commit**
```bash
git add config/blocklist.yaml tests/test_discover.py
git commit -m "feat: filter Jobgether listings via blocklist"
```
---
### Task 2: Add Jobgether detection to scrape_url.py
**Files:**
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/scrape_url.py`
- Modify: `/Library/Development/CircuitForge/peregrine/tests/test_scrape_url.py`
- [ ] **Step 1: Write failing tests**
In `/Library/Development/CircuitForge/peregrine/tests/test_scrape_url.py`, add:
```python
def test_detect_board_jobgether():
from scripts.scrape_url import _detect_board
assert _detect_board("https://jobgether.com/offer/69b42d9d24d79271ee0618e8-csm---resware") == "jobgether"
assert _detect_board("https://www.jobgether.com/offer/abc-role---company") == "jobgether"
def test_jobgether_slug_company_extraction():
from scripts.scrape_url import _company_from_jobgether_url
assert _company_from_jobgether_url(
"https://jobgether.com/offer/69b42d9d24d79271ee0618e8-customer-success-manager---resware"
) == "Resware"
assert _company_from_jobgether_url(
"https://jobgether.com/offer/abc123-director-of-cs---acme-corp"
) == "Acme Corp"
assert _company_from_jobgether_url(
"https://jobgether.com/offer/abc123-no-separator-here"
) == ""
def test_scrape_jobgether_no_playwright(tmp_path):
"""When Playwright is unavailable, _scrape_jobgether falls back to URL slug for company."""
# Patch playwright.sync_api to None in sys.modules so the local import inside
# _scrape_jobgether raises ImportError at call time (local imports run at call time,
# not at module load time — so no reload needed).
import sys
import unittest.mock as mock
url = "https://jobgether.com/offer/69b42d9d24d79271ee0618e8-customer-success-manager---resware"
with mock.patch.dict(sys.modules, {"playwright": None, "playwright.sync_api": None}):
from scripts.scrape_url import _scrape_jobgether
result = _scrape_jobgether(url)
assert result.get("company") == "Resware"
assert result.get("source") == "jobgether"
```
Run: `conda run -n job-seeker python -m pytest tests/test_scrape_url.py::test_detect_board_jobgether tests/test_scrape_url.py::test_jobgether_slug_company_extraction tests/test_scrape_url.py::test_scrape_jobgether_no_playwright -v`
Expected: FAIL (functions not yet defined)
- [ ] **Step 2: Add `_company_from_jobgether_url()` to scrape_url.py**
Add after the `_STRIP_PARAMS` block (around line 34):
```python
def _company_from_jobgether_url(url: str) -> str:
"""Extract company name from Jobgether offer URL slug.
Slug format: /offer/{24-hex-hash}-{title-slug}---{company-slug}
Triple-dash separator delimits title from company.
Returns title-cased company name, or "" if pattern not found.
"""
m = re.search(r"---([^/?]+)$", urlparse(url).path)
if not m:
print(f"[scrape_url] Jobgether URL slug: no company separator found in {url}")
return ""
return m.group(1).replace("-", " ").title()
```
- [ ] **Step 3: Add `"jobgether"` branch to `_detect_board()`**
In `/Library/Development/CircuitForge/peregrine/scripts/scrape_url.py`, modify `_detect_board()` (add before `return "generic"`):
```python
if "jobgether.com" in url_lower:
return "jobgether"
```
- [ ] **Step 4: Add `_scrape_jobgether()` function**
Add after `_scrape_glassdoor()` (around line 137):
```python
def _scrape_jobgether(url: str) -> dict:
"""Scrape a Jobgether offer page using Playwright to bypass 403.
Falls back to URL slug for company name when Playwright is unavailable.
Does not use requests — no raise_for_status().
"""
try:
from playwright.sync_api import sync_playwright
except ImportError:
company = _company_from_jobgether_url(url)
if company:
print(f"[scrape_url] Jobgether: Playwright not installed, using slug fallback → {company}")
return {"company": company, "source": "jobgether"} if company else {}
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
try:
ctx = browser.new_context(user_agent=_HEADERS["User-Agent"])
page = ctx.new_page()
page.goto(url, timeout=30_000)
page.wait_for_load_state("networkidle", timeout=20_000)
result = page.evaluate("""() => {
const title = document.querySelector('h1')?.textContent?.trim() || '';
const company = document.querySelector('[class*="company"], [class*="employer"], [data-testid*="company"]')
?.textContent?.trim() || '';
const location = document.querySelector('[class*="location"], [data-testid*="location"]')
?.textContent?.trim() || '';
const desc = document.querySelector('[class*="description"], [class*="job-desc"], article')
?.innerText?.trim() || '';
return { title, company, location, description: desc };
}""")
finally:
browser.close()
# Fall back to slug for company if DOM extraction missed it
if not result.get("company"):
result["company"] = _company_from_jobgether_url(url)
result["source"] = "jobgether"
return {k: v for k, v in result.items() if v}
except Exception as exc:
print(f"[scrape_url] Jobgether Playwright error for {url}: {exc}")
# Last resort: slug fallback
company = _company_from_jobgether_url(url)
return {"company": company, "source": "jobgether"} if company else {}
```
> ⚠️ **The CSS selectors in the `page.evaluate()` call are placeholders.** Before committing, inspect `https://jobgether.com/offer/` in a browser to find the actual class names for title, company, location, and description. Update the selectors accordingly.
- [ ] **Step 5: Add dispatch branch in `scrape_job_url()`**
In the `if board == "linkedin":` dispatch chain (around line 208), add before the `else`:
```python
elif board == "jobgether":
fields = _scrape_jobgether(url)
```
- [ ] **Step 6: Run tests to verify they pass**
Run: `conda run -n job-seeker python -m pytest tests/test_scrape_url.py -v`
Expected: All PASS (including pre-existing tests)
- [ ] **Step 7: Commit**
```bash
git add scripts/scrape_url.py tests/test_scrape_url.py
git commit -m "feat: add Jobgether URL detection and scraper to scrape_url.py"
```
---
## Chunk 2: Jobgether custom board scraper
> ⚠️ **Pre-condition:** Before writing the scraper, inspect `https://jobgether.com/remote-jobs` live to determine the actual URL/filter param format and DOM card selectors. Use the Playwright MCP browser tool or Chrome devtools. Record: (1) the query param for job title search, (2) the job card CSS selectors for title, company, URL, location, salary.
### Task 3: Inspect Jobgether search live
**Files:** None (research step)
- [ ] **Step 1: Navigate to Jobgether remote jobs and inspect search params**
Using browser devtools or Playwright network capture, navigate to `https://jobgether.com/remote-jobs`, search for "Customer Success Manager", and capture:
- The resulting URL (query params)
- Network requests (XHR/fetch) if the page uses API calls
- CSS selectors for job card elements
Record findings here before proceeding.
- [ ] **Step 2: Test a Playwright page.evaluate() extraction manually**
```python
# Run interactively to validate selectors
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False) # headless=False to see the page
page = browser.new_page()
page.goto("https://jobgether.com/remote-jobs")
page.wait_for_load_state("networkidle")
# Test your selectors here
cards = page.query_selector_all("[YOUR_CARD_SELECTOR]")
print(len(cards))
browser.close()
```
---
### Task 4: Write jobgether.py scraper
**Files:**
- Create: `/Library/Development/CircuitForge/peregrine/scripts/custom_boards/jobgether.py`
- Modify: `/Library/Development/CircuitForge/peregrine/tests/test_discover.py` (or create `tests/test_jobgether.py`)
- [ ] **Step 1: Write failing test**
In `/Library/Development/CircuitForge/peregrine/tests/test_discover.py` (or a new `tests/test_jobgether.py`):
```python
def test_jobgether_scraper_returns_empty_on_missing_playwright(monkeypatch):
"""Graceful fallback when Playwright is unavailable."""
import scripts.custom_boards.jobgether as jg
monkeypatch.setattr("scripts.custom_boards.jobgether.sync_playwright", None)
result = jg.scrape({"titles": ["Customer Success Manager"]}, "Remote", results_wanted=5)
assert result == []
def test_jobgether_scraper_respects_results_wanted(monkeypatch):
"""Scraper caps results at results_wanted."""
import scripts.custom_boards.jobgether as jg
fake_jobs = [
{"title": f"CSM {i}", "href": f"/offer/abc{i}-csm---acme", "company": f"Acme {i}",
"location": "Remote", "is_remote": True, "salary": ""}
for i in range(20)
]
class FakePage:
def goto(self, *a, **kw): pass
def wait_for_load_state(self, *a, **kw): pass
def evaluate(self, _): return fake_jobs
class FakeCtx:
def new_page(self): return FakePage()
class FakeBrowser:
def new_context(self, **kw): return FakeCtx()
def close(self): pass
class FakeChromium:
def launch(self, **kw): return FakeBrowser()
class FakeP:
chromium = FakeChromium()
def __enter__(self): return self
def __exit__(self, *a): pass
monkeypatch.setattr("scripts.custom_boards.jobgether.sync_playwright", lambda: FakeP())
result = jg.scrape({"titles": ["CSM"]}, "Remote", results_wanted=5)
assert len(result) <= 5
```
Run: `conda run -n job-seeker python -m pytest tests/ -v -k "jobgether"`
Expected: FAIL (module not found)
- [ ] **Step 2: Create `scripts/custom_boards/jobgether.py`**
```python
"""Jobgether scraper — Playwright-based (requires chromium installed).
Jobgether (jobgether.com) is a remote-work job aggregator. It blocks plain
requests with 403, so we use Playwright to render the page and extract cards.
Install Playwright: conda run -n job-seeker pip install playwright &&
conda run -n job-seeker python -m playwright install chromium
Returns a list of dicts compatible with scripts.db.insert_job().
"""
from __future__ import annotations
import re
import time
from typing import Any
_BASE = "https://jobgether.com"
_SEARCH_PATH = "/remote-jobs"
# TODO: Replace with confirmed query param key after live inspection (Task 3)
_QUERY_PARAM = "search"
# Module-level import so tests can monkeypatch scripts.custom_boards.jobgether.sync_playwright
try:
from playwright.sync_api import sync_playwright
except ImportError:
sync_playwright = None
def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]:
"""
Scrape job listings from Jobgether using Playwright.
Args:
profile: Search profile dict (uses 'titles').
location: Location string — Jobgether is remote-focused; location used
only if the site exposes a location filter.
results_wanted: Maximum results to return across all titles.
Returns:
List of job dicts with keys: title, company, url, source, location,
is_remote, salary, description.
"""
if sync_playwright is None:
print(
" [jobgether] playwright not installed.\n"
" Install: conda run -n job-seeker pip install playwright && "
"conda run -n job-seeker python -m playwright install chromium"
)
return []
results: list[dict] = []
seen_urls: set[str] = set()
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
ctx = browser.new_context(
user_agent=(
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
)
page = ctx.new_page()
for title in profile.get("titles", []):
if len(results) >= results_wanted:
break
# TODO: Confirm URL param format from live inspection (Task 3)
url = f"{_BASE}{_SEARCH_PATH}?{_QUERY_PARAM}={title.replace(' ', '+')}"
try:
page.goto(url, timeout=30_000)
page.wait_for_load_state("networkidle", timeout=20_000)
except Exception as exc:
print(f" [jobgether] Page load error for '{title}': {exc}")
continue
# TODO: Replace JS selector with confirmed card selector from Task 3
try:
raw_jobs: list[dict[str, Any]] = page.evaluate(_extract_jobs_js())
except Exception as exc:
print(f" [jobgether] JS extract error for '{title}': {exc}")
continue
if not raw_jobs:
print(f" [jobgether] No cards found for '{title}' — selector may need updating")
continue
for job in raw_jobs:
href = job.get("href", "")
if not href:
continue
full_url = _BASE + href if href.startswith("/") else href
if full_url in seen_urls:
continue
seen_urls.add(full_url)
results.append({
"title": job.get("title", ""),
"company": job.get("company", ""),
"url": full_url,
"source": "jobgether",
"location": job.get("location") or "Remote",
"is_remote": True, # Jobgether is remote-focused
"salary": job.get("salary") or "",
"description": "", # not in card view; scrape_url fills in
})
if len(results) >= results_wanted:
break
time.sleep(1) # polite pacing between titles
browser.close()
return results[:results_wanted]
def _extract_jobs_js() -> str:
"""JS to run in page context — extracts job data from rendered card elements.
TODO: Replace selectors with confirmed values from Task 3 live inspection.
"""
return """() => {
// TODO: replace '[class*=job-card]' with confirmed card selector
const cards = document.querySelectorAll('[class*="job-card"], [data-testid*="job"]');
return Array.from(cards).map(card => {
// TODO: replace these selectors with confirmed values
const titleEl = card.querySelector('h2, h3, [class*="title"]');
const companyEl = card.querySelector('[class*="company"], [class*="employer"]');
const linkEl = card.querySelector('a');
const salaryEl = card.querySelector('[class*="salary"]');
const locationEl = card.querySelector('[class*="location"]');
return {
title: titleEl ? titleEl.textContent.trim() : null,
company: companyEl ? companyEl.textContent.trim() : null,
href: linkEl ? linkEl.getAttribute('href') : null,
salary: salaryEl ? salaryEl.textContent.trim() : null,
location: locationEl ? locationEl.textContent.trim() : null,
is_remote: true,
};
}).filter(j => j.title && j.href);
}"""
```
- [ ] **Step 3: Run tests**
Run: `conda run -n job-seeker python -m pytest tests/ -v -k "jobgether"`
Expected: PASS
- [ ] **Step 4: Commit**
```bash
git add scripts/custom_boards/jobgether.py tests/test_discover.py
git commit -m "feat: add Jobgether custom board scraper (selectors pending live inspection)"
```
---
## Chunk 3: Registration, config, cover letter framing
### Task 5: Register scraper in discover.py + update search_profiles.yaml
**Files:**
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/discover.py`
- Modify: `/Library/Development/CircuitForge/peregrine/config/search_profiles.yaml`
- Modify: `/Library/Development/CircuitForge/peregrine/config/search_profiles.yaml.example` (if it exists)
- [ ] **Step 1: Add import to discover.py import block (lines 2022)**
`jobgether.py` absorbs the Playwright `ImportError` internally (module-level `try/except`), so it always imports successfully. Match the existing pattern exactly:
```python
from scripts.custom_boards import jobgether as _jobgether
```
- [ ] **Step 2: Add to CUSTOM_SCRAPERS dict literal (lines 3034)**
```python
CUSTOM_SCRAPERS: dict[str, object] = {
"adzuna": _adzuna.scrape,
"theladders": _theladders.scrape,
"craigslist": _craigslist.scrape,
"jobgether": _jobgether.scrape,
}
```
When Playwright is absent, `_jobgether.scrape()` returns `[]` gracefully — no special guard needed in `discover.py`.
- [ ] **Step 3: Add `jobgether` to remote-eligible profiles in search_profiles.yaml**
Add `- jobgether` to the `custom_boards` list for every profile that has `Remote` in its `locations`. Based on the current file, that means: `cs_leadership`, `music_industry`, `animal_welfare`, `education`. Do NOT add it to `default` (locations: San Francisco CA only).
- [ ] **Step 4: Run discover tests**
Run: `conda run -n job-seeker python -m pytest tests/test_discover.py -v`
Expected: All PASS
- [ ] **Step 5: Commit**
```bash
git add scripts/discover.py config/search_profiles.yaml
git commit -m "feat: register Jobgether scraper and add to remote search profiles"
```
---
### Task 6: Cover letter recruiter framing
**Files:**
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/generate_cover_letter.py`
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/task_runner.py`
- Modify: `/Library/Development/CircuitForge/peregrine/tests/test_match.py` or add `tests/test_cover_letter.py`
- [ ] **Step 1: Write failing test**
Create or add to `/Library/Development/CircuitForge/peregrine/tests/test_cover_letter.py`:
```python
def test_build_prompt_jobgether_framing_unknown_company():
from scripts.generate_cover_letter import build_prompt
prompt = build_prompt(
title="Customer Success Manager",
company="Jobgether",
description="CSM role at an undisclosed company.",
examples=[],
is_jobgether=True,
)
assert "Your client" in prompt
assert "recruiter" in prompt.lower() or "jobgether" in prompt.lower()
def test_build_prompt_jobgether_framing_known_company():
from scripts.generate_cover_letter import build_prompt
prompt = build_prompt(
title="Customer Success Manager",
company="Resware",
description="CSM role at Resware.",
examples=[],
is_jobgether=True,
)
assert "Your client at Resware" in prompt
def test_build_prompt_no_jobgether_framing_by_default():
from scripts.generate_cover_letter import build_prompt
prompt = build_prompt(
title="Customer Success Manager",
company="Acme Corp",
description="CSM role.",
examples=[],
)
assert "Your client" not in prompt
```
Run: `conda run -n job-seeker python -m pytest tests/test_cover_letter.py -v`
Expected: FAIL
- [ ] **Step 2: Add `is_jobgether` to `build_prompt()` in generate_cover_letter.py**
Modify the `build_prompt()` signature (line 186):
```python
def build_prompt(
title: str,
company: str,
description: str,
examples: list[dict],
mission_hint: str | None = None,
is_jobgether: bool = False,
) -> str:
```
Add the recruiter hint block after the `mission_hint` block (after line 203):
```python
if is_jobgether:
if company and company.lower() != "jobgether":
recruiter_note = (
f"🤝 Recruiter context: This listing is posted by Jobgether on behalf of "
f"{company}. Address the cover letter to the Jobgether recruiter, not directly "
f"to the hiring company. Use framing like 'Your client at {company} will "
f"appreciate...' rather than addressing {company} directly. The role "
f"requirements are those of the actual employer."
)
else:
recruiter_note = (
"🤝 Recruiter context: This listing is posted by Jobgether on behalf of an "
"undisclosed employer. Address the cover letter to the Jobgether recruiter. "
"Use framing like 'Your client will appreciate...' rather than addressing "
"the company directly."
)
parts.append(f"{recruiter_note}\n")
```
- [ ] **Step 3: Add `is_jobgether` to `generate()` signature**
Modify `generate()` (line 233):
```python
def generate(
title: str,
company: str,
description: str = "",
previous_result: str = "",
feedback: str = "",
is_jobgether: bool = False,
_router=None,
) -> str:
```
Pass it through to `build_prompt()` (line 254):
```python
prompt = build_prompt(title, company, description, examples,
mission_hint=mission_hint, is_jobgether=is_jobgether)
```
- [ ] **Step 4: Pass `is_jobgether` from task_runner.py**
In `/Library/Development/CircuitForge/peregrine/scripts/task_runner.py`, modify the `generate()` call inside the `cover_letter` task block (`elif task_type == "cover_letter":` starts at line 152; the `generate()` call is at ~line 156):
```python
elif task_type == "cover_letter":
import json as _json
p = _json.loads(params or "{}")
from scripts.generate_cover_letter import generate
result = generate(
job.get("title", ""),
job.get("company", ""),
job.get("description", ""),
previous_result=p.get("previous_result", ""),
feedback=p.get("feedback", ""),
is_jobgether=job.get("source") == "jobgether",
)
update_cover_letter(db_path, job_id, result)
```
- [ ] **Step 5: Run tests**
Run: `conda run -n job-seeker python -m pytest tests/test_cover_letter.py -v`
Expected: All PASS
- [ ] **Step 6: Run full test suite**
Run: `conda run -n job-seeker python -m pytest tests/ -v`
Expected: All PASS
- [ ] **Step 7: Commit**
```bash
git add scripts/generate_cover_letter.py scripts/task_runner.py tests/test_cover_letter.py
git commit -m "feat: add Jobgether recruiter framing to cover letter generation"
```
---
## Final: Merge
- [ ] **Merge worktree branch to main**
```bash
cd /Library/Development/CircuitForge/peregrine
git merge feature/jobgether-integration
git worktree remove .worktrees/jobgether-integration
```
- [ ] **Push to remote**
```bash
git push origin main
```
---
## Manual verification after merge
1. Add the stuck Jobgether manual import (job 2286) — delete the old stuck row and re-add the URL via "Add Jobs by URL" in the Home page. Verify the scraper resolves company = "Resware".
2. Run a short discovery (`discover.py` with `results_per_board: 5`) and confirm no `company="Jobgether"` rows appear in `staging.db`.
3. Generate a cover letter for a Jobgether-sourced job and confirm recruiter framing appears.

View file

@ -1,477 +0,0 @@
# LLM Queue Optimizer — Design Spec
**Date:** 2026-03-14
**Branch:** `feature/llm-queue-optimizer`
**Closes:** [#2](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues/2)
**Author:** pyr0ball
---
## Problem
On single-GPU and CPU-only systems, the background task runner spawns a daemon thread for every task immediately on submission. When a user approves N jobs at once, N threads race to load their respective LLM models simultaneously, causing repeated model swaps and significant latency overhead.
The root issue is that `submit_task()` is a spawn-per-task model with no scheduling layer. SQLite's `background_tasks` table is a status log, not a consumed work queue.
Additionally, on restart all `queued` and `running` tasks are cleared to `failed` (inline SQL in `app.py`'s `_startup()`), discarding pending work that had not yet started executing.
---
## Goals
- Eliminate unnecessary model switching by batching LLM tasks by type
- Allow concurrent model execution when VRAM permits multiple models simultaneously
- Preserve FIFO ordering within each task type
- Survive process restarts — `queued` tasks resume after restart; only `running` tasks (whose results are unknown) are reset to `failed`
- Apply to all tiers (no tier gating)
- Keep non-LLM tasks (discovery, email sync, scrape, enrich) unaffected — they continue to spawn free threads
---
## Non-Goals
- Changing the LLM router fallback chain
- Adding new task types
- Tier gating on the scheduler
- Persistent task history in memory
- Durability for non-LLM task types (discovery, email_sync, etc. — these do not survive restarts, same as current behavior)
- Dynamic VRAM tracking — `_available_vram` is read once at startup and not refreshed (see Known Limitations)
---
## Architecture
### Task Classification
```python
LLM_TASK_TYPES = {"cover_letter", "company_research", "wizard_generate"}
```
The routing rule is: if `task_type in LLM_TASK_TYPES`, route through the scheduler. Everything else spawns a free thread unchanged from the current implementation. **Future task types default to bypass mode** unless explicitly added to `LLM_TASK_TYPES` — which is the safe default (bypass = current behavior).
`LLM_TASK_TYPES` is defined in `scripts/task_scheduler.py` and imported by `scripts/task_runner.py` for routing. This import direction (task_runner imports from task_scheduler) avoids circular imports because `task_scheduler.py` does **not** import from `task_runner.py`.
Current non-LLM types (all bypass scheduler): `discovery`, `email_sync`, `scrape_url`, `enrich_descriptions`, `enrich_craigslist`, `prepare_training`.
### Routing in `submit_task()` — No Circular Import
The routing split lives entirely in `submit_task()` in `task_runner.py`:
```python
def submit_task(db_path, task_type, job_id=None, params=None):
task_id, is_new = insert_task(db_path, task_type, job_id or 0, params=params)
if is_new:
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
if task_type in LLM_TASK_TYPES:
get_scheduler(db_path).enqueue(task_id, task_type, job_id or 0, params)
else:
t = threading.Thread(
target=_run_task,
args=(db_path, task_id, task_type, job_id or 0, params),
daemon=True,
)
t.start()
return task_id, is_new
```
`TaskScheduler.enqueue()` only handles LLM task types and never imports or calls `_run_task`. This eliminates any circular import between `task_runner` and `task_scheduler`.
### Component Overview
```
submit_task()
├── task_type in LLM_TASK_TYPES?
│ │ yes │ no
│ ▼ ▼
│ get_scheduler().enqueue() spawn free thread (unchanged)
│ │
│ ▼
│ per-type deque
│ │
│ ▼
│ Scheduler loop (daemon thread)
│ (wakes on enqueue or batch completion)
│ │
│ Sort eligible types by queue depth (desc)
│ │
│ For each type:
│ reserved_vram + budget[type] ≤ available_vram?
│ │ yes │ no
│ ▼ ▼
│ Start batch worker skip (wait for slot)
│ (serial: one task at a time)
│ │
│ Batch worker signals done → scheduler re-evaluates
```
### New File: `scripts/task_scheduler.py`
**State:**
| Attribute | Type | Purpose |
|---|---|---|
| `_queues` | `dict[str, deque[TaskSpec]]` | Per-type pending task deques |
| `_active` | `dict[str, Thread]` | Currently running batch worker per type |
| `_budgets` | `dict[str, float]` | VRAM budget per task type (GB). Loaded at construction by merging `DEFAULT_VRAM_BUDGETS` with `scheduler.vram_budgets` from `config/llm.yaml`. Config path derived from `db_path` (e.g. `db_path.parent.parent / "config/llm.yaml"`). Missing file or key → defaults used as-is. At construction, a warning is logged for any type in `LLM_TASK_TYPES` with no budget entry after the merge. |
| `_reserved_vram` | `float` | Sum of `_budgets` values for currently active type batches |
| `_available_vram` | `float` | Total VRAM from `get_gpus()` summed across all GPUs at construction; 999.0 on CPU-only systems. Static — not refreshed after startup (see Known Limitations). |
| `_max_queue_depth` | `int` | Max tasks per type queue before drops. From `scheduler.max_queue_depth` in config; default 500. |
| `_lock` | `threading.Lock` | Protects all mutable scheduler state |
| `_wake` | `threading.Event` | Pulsed on enqueue or batch completion |
| `_stop` | `threading.Event` | Set by `shutdown()` to terminate the loop |
**Default VRAM budgets (module-level constant):**
```python
DEFAULT_VRAM_BUDGETS: dict[str, float] = {
"cover_letter": 2.5, # alex-cover-writer:latest (~2GB GGUF + headroom)
"company_research": 5.0, # llama3.1:8b or vllm model
"wizard_generate": 2.5, # same model family as cover_letter
}
```
At construction, the scheduler validates that every type in `LLM_TASK_TYPES` has an entry
in the merged `_budgets`. If any type is missing, a warning is logged:
```
WARNING task_scheduler: No VRAM budget defined for LLM task type 'foo' — defaulting to 0.0 GB (unlimited concurrency for this type)
```
**Scheduler loop:**
```python
while not _stop.is_set():
_wake.wait(timeout=30)
_wake.clear()
with _lock:
# Defense in depth: reap dead threads not yet cleaned by their finally block.
# In the normal path, a batch worker's finally block calls _active.pop() and
# decrements _reserved_vram BEFORE firing _wake — so by the time we scan here,
# the entry is already gone and there is no double-decrement risk.
# This reap only catches threads killed externally (daemon exit on shutdown).
for t, thread in list(_active.items()):
if not thread.is_alive():
_reserved_vram -= _budgets.get(t, 0)
del _active[t]
# Start new batches where VRAM allows
candidates = sorted(
[t for t in _queues if _queues[t] and t not in _active],
key=lambda t: len(_queues[t]),
reverse=True,
)
for task_type in candidates:
budget = _budgets.get(task_type, 0)
if _reserved_vram + budget <= _available_vram:
thread = Thread(target=_batch_worker, args=(task_type,), daemon=True)
_active[task_type] = thread
_reserved_vram += budget
thread.start()
```
**Batch worker:**
The `finally` block is the single authoritative path for releasing `_reserved_vram` and
removing the entry from `_active`. Because `_active.pop` runs in `finally` before
`_wake.set()`, the scheduler loop's dead-thread scan will never find this entry —
no double-decrement is possible in the normal execution path.
```python
def _batch_worker(task_type: str) -> None:
try:
while True:
with _lock:
if not _queues[task_type]:
break
task = _queues[task_type].popleft()
_run_task(db_path, task.id, task_type, task.job_id, task.params)
finally:
with _lock:
_active.pop(task_type, None)
_reserved_vram -= _budgets.get(task_type, 0)
_wake.set()
```
`_run_task` here refers to `task_runner._run_task`, passed in as a callable at
construction (e.g. `self._run_task = run_task_fn`). The caller (`task_runner.py`)
passes `_run_task` when constructing the scheduler, avoiding any import of `task_runner`
from within `task_scheduler`.
**`enqueue()` method:**
`enqueue()` only accepts LLM task types. Non-LLM routing is handled in `submit_task()`
before `enqueue()` is called (see Routing section above).
```python
def enqueue(self, task_id: int, task_type: str, job_id: int, params: str | None) -> None:
with self._lock:
q = self._queues.setdefault(task_type, deque())
if len(q) >= self._max_queue_depth:
logger.warning(
"Queue depth limit reached for %s (max=%d) — task %d dropped",
task_type, self._max_queue_depth, task_id,
)
update_task_status(self._db_path, task_id, "failed",
error="Queue depth limit reached")
return
q.append(TaskSpec(task_id, job_id, params))
self._wake.set()
```
When a task is dropped at the depth limit, `update_task_status()` marks it `failed` in
SQLite immediately — the row inserted by `insert_task()` is never left as a permanent
ghost in `queued` state.
**Singleton access — thread-safe initialization:**
```python
_scheduler: TaskScheduler | None = None
_scheduler_lock = threading.Lock()
def get_scheduler(db_path: Path) -> TaskScheduler:
global _scheduler
if _scheduler is None: # fast path — avoids lock on steady state
with _scheduler_lock:
if _scheduler is None: # re-check under lock (double-checked locking)
_scheduler = TaskScheduler(db_path)
_scheduler.start()
return _scheduler
def reset_scheduler() -> None:
"""Tear down and clear singleton. Test teardown only."""
global _scheduler
with _scheduler_lock:
if _scheduler:
_scheduler.shutdown()
_scheduler = None
```
The safety guarantee comes from the **inner `with _scheduler_lock:` block and re-check**,
not from GIL atomicity. The outer `if _scheduler is None` is a performance optimization
(avoid acquiring the lock on every `submit_task()` call once the scheduler is running).
Two threads racing at startup will both pass the outer check, but only one will win the
inner lock and construct the scheduler; the other will see a non-None value on its
inner re-check and return the already-constructed instance.
---
## Required Call Ordering in `app.py`
`reset_running_tasks()` **must complete before** `get_scheduler()` is ever called.
The scheduler's durability query reads `status='queued'` rows; if `reset_running_tasks()`
has not yet run, a row stuck in `status='running'` from a prior crash would be loaded
into the deque and re-executed, producing a duplicate result.
In practice, the first call to `get_scheduler()` is triggered by the `submit_task()` call
inside `_startup()`'s SearXNG auto-recovery block — not by a user action. The ordering
holds because `reset_running_tasks()` is called on an earlier line within the same
`_startup()` function body. **Do not reorder these calls.**
```python
@st.cache_resource
def _startup() -> None:
# Step 1: Reset interrupted tasks — MUST come first
from scripts.db import reset_running_tasks
reset_running_tasks(get_db_path())
# Step 2 (later in same function): SearXNG re-queue calls submit_task(),
# which triggers get_scheduler() for the first time. Ordering is guaranteed
# because _startup() runs synchronously and step 1 is already complete.
conn = sqlite3.connect(get_db_path())
# ... existing SearXNG re-queue logic using conn ...
conn.close()
```
---
## Changes to Existing Files
### `scripts/task_runner.py`
`submit_task()` gains routing logic; `_run_task` is passed to the scheduler at first call:
```python
def submit_task(db_path, task_type, job_id=None, params=None):
task_id, is_new = insert_task(db_path, task_type, job_id or 0, params=params)
if is_new:
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
if task_type in LLM_TASK_TYPES:
get_scheduler(db_path, run_task_fn=_run_task).enqueue(
task_id, task_type, job_id or 0, params
)
else:
t = threading.Thread(
target=_run_task,
args=(db_path, task_id, task_type, job_id or 0, params),
daemon=True,
)
t.start()
return task_id, is_new
```
`get_scheduler()` accepts `run_task_fn` only on first call (when constructing); subsequent
calls ignore it (singleton already initialized). `_run_task()` and all handler branches
remain unchanged.
### `scripts/db.py`
Add `reset_running_tasks()` alongside the existing `kill_stuck_tasks()`. Like
`kill_stuck_tasks()`, it uses a plain `sqlite3.connect()` — consistent with the
existing pattern in this file, and appropriate because this call happens before the
app's connection pooling is established:
```python
def reset_running_tasks(db_path: Path = DEFAULT_DB) -> int:
"""On restart: mark in-flight tasks failed. Queued tasks survive for the scheduler."""
conn = sqlite3.connect(db_path)
count = conn.execute(
"UPDATE background_tasks SET status='failed', error='Interrupted by restart',"
" finished_at=datetime('now') WHERE status='running'"
).rowcount
conn.commit()
conn.close()
return count
```
### `app/app.py`
Inside `_startup()`, replace the inline SQL block that wipes both `queued` and `running`
rows with a call to `reset_running_tasks()`. The replacement must be the **first operation
in `_startup()`** — before the SearXNG re-queue logic that calls `submit_task()`:
```python
# REMOVE this block:
conn.execute(
"UPDATE background_tasks SET status='failed', error='Interrupted by server restart',"
" finished_at=datetime('now') WHERE status IN ('queued','running')"
)
# ADD at the top of _startup(), before any submit_task() calls:
from scripts.db import reset_running_tasks
reset_running_tasks(get_db_path())
```
The existing `conn` used for subsequent SearXNG logic is unaffected — `reset_running_tasks()`
opens and closes its own connection.
### `config/llm.yaml.example`
Add `scheduler:` section:
```yaml
scheduler:
vram_budgets:
cover_letter: 2.5 # alex-cover-writer:latest (~2GB GGUF + headroom)
company_research: 5.0 # llama3.1:8b or vllm model
wizard_generate: 2.5 # same model family as cover_letter
max_queue_depth: 500
```
---
## Data Model
No schema changes. The existing `background_tasks` table supports all scheduler needs:
| Column | Scheduler use |
|---|---|
| `task_type` | Queue routing — determines which deque receives the task |
| `status` | `queued` → in deque; `running` → batch worker executing; `completed`/`failed` → done |
| `created_at` | FIFO ordering within type (durability startup query sorts by this) |
| `params` | Passed through to `_run_task()` unchanged |
---
## Durability
Scope: **LLM task types only** (`cover_letter`, `company_research`, `wizard_generate`).
Non-LLM tasks do not survive restarts, same as current behavior.
On construction, `TaskScheduler.__init__()` queries:
```sql
SELECT id, task_type, job_id, params
FROM background_tasks
WHERE status = 'queued'
AND task_type IN ('cover_letter', 'company_research', 'wizard_generate')
ORDER BY created_at ASC
```
Results are pushed onto their respective deques. This query runs inside `__init__` before
`start()` is called (before the scheduler loop thread exists), so there is no concurrency
concern with deque population.
`running` rows are reset to `failed` by `reset_running_tasks()` before `get_scheduler()`
is called — see Required Call Ordering above.
---
## Known Limitations
**Static `_available_vram`:** Total GPU VRAM is read from `get_gpus()` once at scheduler
construction and never refreshed. Changes after startup — another process releasing VRAM,
a GPU going offline, Ollama unloading a model — are not reflected. The scheduler's
correctness depends on per-task VRAM budgets being conservative estimates of **peak model
footprint** (not free VRAM at a given moment). On a system where Ollama and vLLM share
the GPU, budgets should account for both models potentially resident simultaneously.
Dynamic VRAM polling is a future enhancement.
---
## Memory Safety
- **`finally` block owns VRAM release** — batch worker always decrements `_reserved_vram`
and removes its `_active` entry before firing `_wake`, even on exception. The scheduler
loop's dead-thread scan is defense in depth for externally-killed daemons only; it cannot
double-decrement because `_active.pop` in `finally` runs first.
- **Max queue depth with DB cleanup**`enqueue()` rejects tasks past `max_queue_depth`,
logs a warning, and immediately marks the dropped task `failed` in SQLite to prevent
permanent ghost rows in `queued` state.
- **No in-memory history** — deques hold only pending `TaskSpec` namedtuples. Completed
and failed state lives exclusively in SQLite. Memory footprint is `O(pending tasks)`.
- **Thread-safe singleton** — double-checked locking with `_scheduler_lock` prevents
double-construction. Safety comes from the inner lock + re-check; the outer `None`
check is a performance optimization only.
- **Missing budget warning** — any `LLM_TASK_TYPES` entry with no budget entry after
config merge logs a warning at construction; defaults to 0.0 GB (unlimited concurrency
for that type). This prevents silent incorrect scheduling for future task types.
- **`reset_scheduler()`** — explicit teardown for test isolation: sets `_stop`, joins
scheduler thread with timeout, clears module-level reference under `_scheduler_lock`.
---
## Testing (`tests/test_task_scheduler.py`)
All tests mock `_run_task` to avoid real LLM calls. `reset_scheduler()` is called in
an `autouse` fixture for isolation between test cases.
| Test | What it verifies |
|---|---|
| `test_deepest_queue_wins_first_slot` | N cover_letter + M research enqueued (N > M); cover_letter batch starts first when `_available_vram` only fits one model budget, because it has the deeper queue |
| `test_fifo_within_type` | Arrival order preserved within a type batch |
| `test_concurrent_batches_when_vram_allows` | Two type batches start simultaneously when `_available_vram` fits both budgets combined |
| `test_new_tasks_picked_up_mid_batch` | Task enqueued via `enqueue()` while a batch is active is consumed by the running worker in the same batch |
| `test_worker_crash_releases_vram` | `_run_task` raises; `_reserved_vram` returns to 0; scheduler continues; no double-decrement |
| `test_non_llm_tasks_bypass_scheduler` | `discovery`, `email_sync` etc. spawn free threads via `submit_task()`; scheduler deques untouched |
| `test_durability_llm_tasks_on_startup` | DB has existing `queued` LLM-type rows; scheduler loads them into deques on construction |
| `test_durability_excludes_non_llm` | `queued` non-LLM rows in DB are not loaded into deques on startup |
| `test_running_rows_reset_before_scheduler` | `reset_running_tasks()` sets `running``failed`; `queued` rows untouched |
| `test_max_queue_depth_marks_failed` | Enqueue past limit logs warning, does not add to deque, and marks task `failed` in DB |
| `test_missing_budget_logs_warning` | Type in `LLM_TASK_TYPES` with no budget entry at construction logs a warning |
| `test_singleton_thread_safe` | Concurrent calls to `get_scheduler()` produce exactly one scheduler instance |
| `test_reset_scheduler_cleans_up` | `reset_scheduler()` stops loop thread; no lingering threads after call |
---
## Files Touched
| File | Change |
|---|---|
| `scripts/task_scheduler.py` | **New** — ~180 lines |
| `scripts/task_runner.py` | `submit_task()` routing shim — ~12 lines changed |
| `scripts/db.py` | `reset_running_tasks()` added — ~10 lines |
| `app/app.py` | `_startup()`: inline SQL block → `reset_running_tasks()` call, placed first |
| `config/llm.yaml.example` | Add `scheduler:` section |
| `tests/test_task_scheduler.py` | **New** — ~240 lines |

View file

@ -1,173 +0,0 @@
# Jobgether Integration Design
**Date:** 2026-03-15
**Status:** Approved
**Scope:** Peregrine — discovery pipeline + manual URL import
---
## Problem
Jobgether is a job aggregator that posts listings on LinkedIn and other boards with `company = "Jobgether"` rather than the actual employer. This causes two problems:
1. **Misleading listings** — Jobs appear to be at "Jobgether" rather than the real hiring company. Meg sees "Jobgether" as employer throughout the pipeline (Job Review, cover letters, company research).
2. **Broken manual import** — Direct `jobgether.com` URLs return HTTP 403 when scraped with plain `requests`, leaving jobs stuck as `title = "Importing…"`.
**Evidence from DB:** 29+ Jobgether-sourced LinkedIn listings with `company = "Jobgether"`. Actual employer is intentionally withheld by Jobgether's business model ("on behalf of a partner company").
---
## Decision: Option A — Filter + Dedicated Scraper
Drop Jobgether listings from other scrapers entirely and replace with a direct Jobgether scraper that retrieves accurate company names. Existing Jobgether-via-LinkedIn listings in the DB are left as-is for manual review/rejection.
**Why not Option B (follow-through):** LinkedIn→Jobgether→employer is a two-hop chain where the employer is deliberately hidden. Jobgether blocks `requests`. Not worth the complexity for unreliable data.
---
## Components
### 1. Jobgether company filter — `config/blocklist.yaml`
Add `"jobgether"` to the `companies` list in `config/blocklist.yaml`. The existing `_is_blocklisted()` function in `discover.py` already performs a partial case-insensitive match on the company field and applies to all scrapers (JobSpy boards + all custom boards). No code change required.
```yaml
companies:
- jobgether
```
This is the correct mechanism — it is user-visible, config-driven, and applies uniformly. Log output already reports blocklisted jobs per run.
### 2. URL handling in `scrape_url.py`
Three changes required:
**a) `_detect_board()`** — add `"jobgether"` branch returning `"jobgether"` when `"jobgether.com"` is in the URL. Must be added before the `return "generic"` fallback.
**b) dispatch block in `scrape_job_url()`** — add `elif board == "jobgether": fields = _scrape_jobgether(url)` to the `if/elif` chain (lines 208215). Without this, the new `_detect_board()` branch silently falls through to `_scrape_generic()`.
**c) `_scrape_jobgether(url)`** — Playwright-based scraper to bypass 403. Extracts:
- `title` — job title from page heading
- `company` — actual employer name (visible on Jobgether offer pages)
- `location` — remote/location info
- `description` — full job description
- `source = "jobgether"`
Playwright errors (`playwright.sync_api.Error`, `TimeoutError`) are not subclasses of `requests.RequestException` but are caught by the existing broad `except Exception` handler in `scrape_job_url()` — no changes needed to the error handling block.
**URL slug fallback for company name (manual import path only):** Jobgether offer URLs follow the pattern:
```
https://jobgether.com/offer/{24-hex-hash}-{title-slug}---{company-slug}
```
When Playwright is unavailable, parse `company-slug` using:
```python
m = re.search(r'---([^/?]+)$', parsed_path)
company = m.group(1).replace("-", " ").title() if m else ""
```
Example: `/offer/69b42d9d24d79271ee0618e8-customer-success-manager---resware``"Resware"`.
This fallback is scoped to `_scrape_jobgether()` in `scrape_url.py` only; the discovery scraper always gets company name from the rendered DOM. `_scrape_jobgether()` does not make any `requests` calls — there is no `raise_for_status()` — so the `requests.RequestException` handler in `scrape_job_url()` is irrelevant to this path; only the broad `except Exception` applies.
**Pre-implementation checkpoint:** Confirm that Jobgether offer URLs have no tracking query params beyond UTM (already covered by `_STRIP_PARAMS`). No `canonicalize_url()` changes are expected but verify before implementation.
### 3. `scripts/custom_boards/jobgether.py`
Playwright-based search scraper following the same interface as `theladders.py`:
```python
def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
```
- Base URL: `https://jobgether.com/remote-jobs`
- Search strategy: iterate over `profile["titles"]`, apply search/filter params
- **Pre-condition — do not begin implementation of this file until live URL inspection is complete.** Use browser dev tools or a Playwright `page.on("request")` capture to determine the actual query parameter format for title/location filtering. Jobgether may use URL query params, path segments, or JS-driven state — this cannot be assumed from the URL alone.
- Extraction: job cards from rendered DOM (Playwright `page.evaluate()`)
- Returns standard job dicts: `title, company, url, source, location, is_remote, salary, description`
- `source = "jobgether"`
- Graceful `ImportError` handling if Playwright not installed (same pattern as `theladders.py`)
- Polite pacing: 1s sleep between title iterations
- Company name comes from DOM; URL slug parse is not needed in this path
### 4. Registration + config
**`discover.py` — import block (lines 2022):**
```python
from scripts.custom_boards import jobgether as _jobgether
```
**`discover.py` — `CUSTOM_SCRAPERS` dict literal (lines 3034):**
```python
CUSTOM_SCRAPERS: dict[str, object] = {
"adzuna": _adzuna.scrape,
"theladders": _theladders.scrape,
"craigslist": _craigslist.scrape,
"jobgether": _jobgether.scrape, # ← add this line
}
```
**`config/search_profiles.yaml` (and `.example`):**
Add `jobgether` to `custom_boards` for any profile that includes `Remote` in its `locations` list. Jobgether is a remote-work-focused aggregator; adding it to location-specific non-remote profiles is not useful. Do not add a `custom_boards` key to profiles that don't already have one unless they are remote-eligible.
```yaml
custom_boards:
- jobgether
```
---
## Data Flow
```
discover.py
├── JobSpy boards → _is_blocklisted(company="jobgether") → drop → DB insert
├── custom: adzuna → _is_blocklisted(company="jobgether") → drop → DB insert
├── custom: theladders → _is_blocklisted(company="jobgether") → drop → DB insert
├── custom: craigslist → _is_blocklisted(company="jobgether") → drop → DB insert
└── custom: jobgether → (company = real employer, never "jobgether") → DB insert
scrape_url.py
└── jobgether.com URL → _detect_board() = "jobgether"
→ _scrape_jobgether()
├── Playwright available → full job fields from page
└── Playwright unavailable → company from URL slug only
```
---
## Implementation Notes
- **Slug fallback None-guard:** The regex `r'---([^/?]+)$'` returns a wrong value (not `None`) if the URL slug doesn't follow the expected format. Add a logged warning and return `""` rather than title-casing garbage.
- **Import guard in `discover.py`:** Wrap the `jobgether` import with `try/except ImportError`, setting `_jobgether = None`, and gate the `CUSTOM_SCRAPERS` registration with `if _jobgether is not None`. This ensures the graceful ImportError in `jobgether.py` (for missing Playwright) propagates cleanly to the caller rather than crashing discovery.
### 5. Cover letter recruiter framing — `scripts/generate_cover_letter.py`
When `source = "jobgether"`, inject a system hint that shifts the cover letter addressee from the employer to the Jobgether recruiter. Use Policy A: recruiter framing applies for all Jobgether-sourced jobs regardless of whether the real company name was resolved.
- If company is known (e.g. "Resware"): *"Your client at Resware will appreciate..."*
- If company is unknown: *"Your client will appreciate..."*
The real company name is always stored in the DB as resolved by the scraper — this is internal knowledge only. The framing shift is purely in the generated letter text, not in how the job is stored or displayed.
Implementation: add an `is_jobgether` flag to the cover letter prompt context (same pattern as `mission_hint` injection). Add a conditional block in the system prompt / Para 1 instructions when the flag is true.
---
## Out of Scope
- Retroactively fixing existing `company = "Jobgether"` rows in the DB (left for manual review/rejection)
- Jobgether discovery scraper — **decided against during implementation (2026-03-15)**: Cloudflare Turnstile blocks all headless browsers on all Jobgether pages; `filter-api.jobgether.com` requires auth; `robots.txt` blocks all bots. The email digest → manual URL paste → slug company extraction flow covers the actual use case.
- Jobgether authentication / logged-in scraping
- Pagination
- Dedup between Jobgether and other boards (existing URL dedup handles this)
---
## Files Changed
| File | Change |
|------|--------|
| `config/blocklist.yaml` | Add `"jobgether"` to `companies` list |
| `scripts/discover.py` | Add import + entry in `CUSTOM_SCRAPERS` dict literal |
| `scripts/scrape_url.py` | Add `_detect_board` branch, dispatch branch, `_scrape_jobgether()` |
| `scripts/custom_boards/jobgether.py` | New file — Playwright search scraper |
| `config/search_profiles.yaml` | Add `jobgether` to `custom_boards` |
| `config/search_profiles.yaml.example` | Same |

174
docs/vue-spa-migration.md Normal file
View file

@ -0,0 +1,174 @@
# Peregrine Vue 3 SPA Migration
**Branch:** `feature/vue-spa`
**Issue:** #8 — Vue 3 SPA frontend (Paid Tier GA milestone)
**Worktree:** `.worktrees/feature-vue-spa/`
**Reference:** `avocet/docs/vue-port-gotchas.md` (15 battle-tested gotchas)
---
## What We're Replacing
The current Streamlit UI (`app/app.py` + `app/pages/`) is an internal tool built for speed of development. The Vue SPA replaces it with a proper frontend — faster, more accessible, and extensible for the Paid Tier. The FastAPI already exists (partially, from the cloud managed instance work); the Vue SPA will consume it.
### Pages to Port
| Streamlit file | Vue view | Route | Notes |
|---|---|---|---|
| `app/Home.py` | `HomeView.vue` | `/` | Dashboard, discovery trigger, sync status |
| `app/pages/1_Job_Review.py` | `JobReviewView.vue` | `/review` | Batch approve/reject; primary daily-driver view |
| `app/pages/4_Apply.py` | `ApplyView.vue` | `/apply` | Cover letter gen + PDF + mark applied |
| `app/pages/5_Interviews.py` | `InterviewsView.vue` | `/interviews` | Kanban: phone_screen → offer → hired |
| `app/pages/6_Interview_Prep.py` | `InterviewPrepView.vue` | `/prep` | Live reference sheet + practice Q&A |
| `app/pages/7_Survey.py` | `SurveyView.vue` | `/survey` | Culture-fit survey assist + screenshot |
| `app/pages/2_Settings.py` | `SettingsView.vue` | `/settings` | 6 tabs: Profile, Resume, Search, System, Fine-Tune, License |
---
## Avocet Lessons Applied — What We Fixed Before Starting
The avocet SPA was the testbed. These bugs were found and fixed there; Peregrine's scaffold already incorporates all fixes. See `avocet/docs/vue-port-gotchas.md` for the full writeup.
### Applied at scaffold level (baked in — you don't need to think about these)
| # | Gotcha | How it's fixed in this scaffold |
|---|--------|----------------------------------|
| 1 | `id="app"` on App.vue root → nested `#app` elements, broken CSS specificity | `App.vue` root uses `class="app-root"`. `#app` in `index.html` is mount target only. |
| 3 | `overflow-x: hidden` on html → creates scroll container → 15px scrollbar jitter on Linux | `peregrine.css`: `html { overflow-x: clip }` |
| 4 | UnoCSS `presetAttributify` generates CSS for bare attribute names like `h2` | `uno.config.ts`: `presetAttributify({ prefix: 'un-', prefixedOnly: true })` |
| 5 | Theme variable name mismatches cause dark mode to silently fall back to hardcoded colors | `peregrine.css` alias map: `--color-bg → var(--color-surface)`, `--color-text-secondary → var(--color-text-muted)` |
| 7 | SPA cache: browser caches `index.html` indefinitely → old asset hashes → 404 on rebuild | FastAPI must register explicit `GET /` with no-cache headers before `StaticFiles` mount (see FastAPI section below) |
| 9 | `navigator.vibrate()` not supported on desktop/Safari — throws on call | `useHaptics.ts` guards with `'vibrate' in navigator` |
| 10 | Pinia options store = Vue 2 migration path | All stores use setup store form: `defineStore('id', () => { ... })` |
| 12 | `matchMedia`, `vibrate`, `ResizeObserver` absent in jsdom → composable tests throw | `test-setup.ts` stubs all three |
| 13 | `100vh` ignores mobile browser chrome | `App.vue`: `min-height: 100dvh` |
### Must actively avoid when writing new components
| # | Gotcha | Rule |
|---|--------|------|
| 2 | `transition: all` + spring easing → every CSS property bounces → layout explosion | Always enumerate: `transition: background 200ms ease, transform 250ms cubic-bezier(...)` |
| 6 | Keyboard composables called with snapshot arrays → keys don't work after async data loads | Accept `getLabels: () => labels.value` (reactive getter), not `labels: []` (snapshot) |
| 8 | Font reflow at ~780ms shifts layout measurements taken in `onMounted` | Measure layout in `document.fonts.ready` promise or after 1s timeout |
| 11 | `useSwipe` from `@vueuse/core` fires on desktop trackpad pointer events, not just touch | Add `pointer-type === 'touch'` guard if you need touch-only behavior |
| 14 | Rebuild workflow confusion | `cd web && npm run build` → refresh browser. Only restart FastAPI if `app/api.py` changed. |
| 15 | `:global(ancestor) .descendant` in `<style scoped>` → Vue drops the descendant entirely | Never use `:global(X) .Y` in scoped CSS. Use JS gate or CSS custom property token. |
---
## FastAPI Integration
### SPA serving (gotcha #7)
When the Vue SPA is built, FastAPI needs to serve it. Register the explicit `/` route **before** the `StaticFiles` mount, otherwise `index.html` gets cached and old asset hashes cause 404s after rebuild:
```python
from pathlib import Path
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
_DIST = Path(__file__).parent.parent / "web" / "dist"
_NO_CACHE = {
"Cache-Control": "no-cache, no-store, must-revalidate",
"Pragma": "no-cache",
}
@app.get("/")
def spa_root():
return FileResponse(_DIST / "index.html", headers=_NO_CACHE)
# Must come after the explicit route above
app.mount("/", StaticFiles(directory=str(_DIST), html=True), name="spa")
```
Hashed assets (`/assets/index-abc123.js`) can be cached aggressively — their filenames change with content. Only `index.html` needs no-cache.
### API prefix
Vue Router uses HTML5 history mode. All `/api/*` routes must be registered on FastAPI before the `StaticFiles` mount. Vue routes (`/`, `/review`, `/apply`, etc.) are handled client-side; FastAPI's `html=True` on `StaticFiles` serves `index.html` for any unmatched path.
---
## Peregrine-Specific Considerations
### Auth & license gating
The Streamlit UI uses `app/wizard/tiers.py` for tier gating. In the Vue SPA, tier state should be fetched from a `GET /api/license/status` endpoint on mount and stored in a Pinia store. Components check `licenseStore.tier` to gate features.
### Discovery trigger
The "Start Discovery" button on Home triggers `python scripts/discover.py` as a background process. The Vue version should use SSE (same pattern as avocet's finetune SSE) to stream progress back in real-time. The `useApiSSE` composable is already wired for this.
### Job Review — card stack UX
This is the daily-driver view. Consider the avocet ASMR bucket pattern here — approve/reject could transform into buckets on drag pickup. The motion tokens (`--transition-spring`, `--transition-dismiss`) are pre-defined in `peregrine.css`. The `useHaptics` composable is ready.
### Kanban (Interviews view)
The drag-to-column kanban is a strong candidate for `@vueuse/core`'s `useDraggable`. Watch for the `useSwipe` gotcha #11 — use pointer-type guards if drag behavior differs between touch and mouse.
### Settings — 6 tabs
Use a tab component with reactive route query params (`/settings?tab=license`) so direct links work and the page is shareable/bookmarkable.
---
## Build & Dev Workflow
```bash
# From worktree root
cd web
npm install # first time only
npm run dev # Vite dev server at :5173 (proxies /api/* to FastAPI at :8502)
npm run build # output to web/dist/
npm run test # Vitest unit tests
```
FastAPI serves the built `dist/` on the main port. During dev, configure Vite to proxy `/api` to the running FastAPI:
```ts
// vite.config.ts addition for dev proxy
server: {
proxy: {
'/api': 'http://localhost:8502',
}
}
```
After `npm run build`, just refresh the browser — no FastAPI restart needed unless `app/api.py` changed (gotcha #14).
---
## Implementation Order
Suggested sequence — validate the full stack before porting complex pages:
1. **FastAPI SPA endpoint** — serve `web/dist/` with correct cache headers
2. **App shell** — nav, routing, hacker mode, motion toggle work end-to-end
3. **Home view** — dashboard widgets, discovery trigger with SSE progress
4. **Job Review** — most-used view; gets the most polish
5. **Settings** — license tab is the blocker for tier gating in other views
6. **Apply Workspace** — cover letter gen + PDF export
7. **Interviews kanban** — drag-to-column + calendar sync
8. **Interview Prep** — reference sheet, practice Q&A
9. **Survey Assistant** — screenshot + text paste
---
## Checklist
Copy of the avocet gotchas checklist (all pre-applied at scaffold level are checked):
- [x] App.vue root element: use `.app-root` class, NOT `id="app"`
- [ ] No `transition: all` with spring easings — enumerate properties explicitly
- [ ] No `:global(ancestor) .descendant` in scoped CSS — Vue drops the descendant
- [x] `overflow-x: clip` on html, `overflow-x: hidden` on body
- [x] UnoCSS `presetAttributify`: `prefixedOnly: true`
- [x] Product CSS aliases: `--color-bg`, `--color-text-secondary` mapped in `peregrine.css`
- [ ] Keyboard composables: accept reactive getters, not snapshot arrays
- [x] FastAPI SPA serving pattern documented — apply when wiring FastAPI
- [ ] Font reflow: measure layout after `document.fonts.ready` or 1s timeout
- [x] Haptics: guard `navigator.vibrate` with feature detection
- [x] Pinia: use setup store form (function syntax)
- [x] Tests: mock matchMedia, vibrate, ResizeObserver in test-setup.ts
- [x] `min-height: 100dvh` on full-height layout containers

View file

@ -1,4 +1,4 @@
name: job-seeker
name: cf
# Recreate: conda env create -f environment.yml
# Update pinned snapshot: conda env export --no-builds > environment.yml
channels:
@ -48,6 +48,12 @@ dependencies:
# ── Notion integration ────────────────────────────────────────────────────
- notion-client>=3.0
# ── Calendar integrations ─────────────────────────────────────────────────
- caldav>=1.3
- icalendar>=5.0
- google-api-python-client>=2.0
- google-auth>=2.0
# ── Document handling ─────────────────────────────────────────────────────
- pypdf
- pdfminer-six

View file

@ -32,7 +32,10 @@ usage() {
echo -e " ${GREEN}logs [service]${NC} Tail logs (default: app)"
echo -e " ${GREEN}update${NC} Pull latest images + rebuild app"
echo -e " ${GREEN}preflight${NC} Check ports + resources; write .env"
echo -e " ${GREEN}models${NC} Check ollama models in config; pull any missing"
echo -e " ${GREEN}test${NC} Run test suite"
echo -e " ${GREEN}e2e [mode]${NC} Run E2E tests (mode: demo|cloud|local, default: demo)"
echo -e " Set E2E_HEADLESS=false to run headed via Xvfb"
echo -e " ${GREEN}prepare-training${NC} Extract cover letters → training JSONL"
echo -e " ${GREEN}finetune${NC} Run LoRA fine-tune (needs GPU profile)"
echo -e " ${GREEN}clean${NC} Remove containers, images, volumes (DESTRUCTIVE)"
@ -89,6 +92,12 @@ case "$CMD" in
make preflight PROFILE="$PROFILE"
;;
models)
info "Checking ollama models..."
conda run -n cf python scripts/preflight.py --models-only
success "Model check complete."
;;
start)
info "Starting Peregrine (PROFILE=${PROFILE})..."
make start PROFILE="$PROFILE"
@ -131,7 +140,7 @@ case "$CMD" in
&& echo "docker compose" \
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
$COMPOSE pull searxng ollama 2>/dev/null || true
$COMPOSE build app
$COMPOSE build app web
success "Update complete. Run './manage.sh restart' to apply."
;;
@ -170,6 +179,24 @@ case "$CMD" in
fi
;;
e2e)
MODE="${2:-demo}"
RESULTS_DIR="tests/e2e/results/${MODE}"
mkdir -p "${RESULTS_DIR}"
HEADLESS="${E2E_HEADLESS:-true}"
if [ "$HEADLESS" = "false" ]; then
RUNNER="xvfb-run --auto-servernum --server-args='-screen 0 1280x900x24'"
else
RUNNER=""
fi
info "Running E2E tests (mode=${MODE}, headless=${HEADLESS})..."
$RUNNER conda run -n cf pytest tests/e2e/ \
--mode="${MODE}" \
--json-report \
--json-report-file="${RESULTS_DIR}/report.json" \
-v "${@:3}"
;;
help|--help|-h)
usage
;;

View file

@ -0,0 +1,97 @@
-- Migration 001: Baseline schema
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
company TEXT,
url TEXT UNIQUE,
source TEXT,
location TEXT,
is_remote INTEGER DEFAULT 0,
salary TEXT,
description TEXT,
match_score REAL,
keyword_gaps TEXT,
date_found TEXT,
status TEXT DEFAULT 'pending',
notion_page_id TEXT,
cover_letter TEXT,
applied_at TEXT,
interview_date TEXT,
rejection_stage TEXT,
phone_screen_at TEXT,
interviewing_at TEXT,
offer_at TEXT,
hired_at TEXT,
survey_at TEXT,
calendar_event_id TEXT,
optimized_resume TEXT,
ats_gap_report TEXT
);
CREATE TABLE IF NOT EXISTS job_contacts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
direction TEXT,
subject TEXT,
from_addr TEXT,
to_addr TEXT,
body TEXT,
received_at TEXT,
is_response_needed INTEGER DEFAULT 0,
responded_at TEXT,
message_id TEXT,
stage_signal TEXT,
suggestion_dismissed INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS company_research (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER UNIQUE,
generated_at TEXT,
company_brief TEXT,
ceo_brief TEXT,
talking_points TEXT,
raw_output TEXT,
tech_brief TEXT,
funding_brief TEXT,
competitors_brief TEXT,
red_flags TEXT,
scrape_used INTEGER DEFAULT 0,
accessibility_brief TEXT
);
CREATE TABLE IF NOT EXISTS background_tasks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_type TEXT,
job_id INTEGER,
params TEXT,
status TEXT DEFAULT 'pending',
error TEXT,
created_at TEXT,
started_at TEXT,
finished_at TEXT,
stage TEXT,
updated_at TEXT
);
CREATE TABLE IF NOT EXISTS survey_responses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
survey_name TEXT,
received_at TEXT,
source TEXT,
raw_input TEXT,
image_path TEXT,
mode TEXT,
llm_output TEXT,
reported_score REAL,
created_at TEXT
);
CREATE TABLE IF NOT EXISTS digest_queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_contact_id INTEGER UNIQUE,
created_at TEXT
);

View file

@ -2,6 +2,15 @@
# Extracted from environment.yml for Docker pip installs
# Keep in sync with environment.yml
# ── CircuitForge shared core ───────────────────────────────────────────────
# Requires circuitforge-core >= 0.8.0 (config.load_env, db, tasks; resources moved to circuitforge-orch).
# Local dev / Docker (parent-context build): path install works because
# circuitforge-core/ is a sibling directory.
# CI / fresh checkouts: falls back to the Forgejo VCS URL below.
# To use local editable install run: pip install -e ../circuitforge-core
# TODO: pin to @v0.7.0 tag once cf-core cuts a release tag.
git+https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git@main
# ── Web UI ────────────────────────────────────────────────────────────────
streamlit>=1.35
watchdog
@ -13,6 +22,8 @@ streamlit-paste-button>=0.1.0
# ── Job scraping ──────────────────────────────────────────────────────────
python-jobspy>=1.1
playwright>=1.40
pytest-playwright>=0.4
pytest-json-report>=1.5
selenium
undetected-chromedriver
webdriver-manager
@ -76,3 +87,10 @@ lxml
# ── Documentation ────────────────────────────────────────────────────────
mkdocs>=1.5
mkdocs-material>=9.5
# ── Vue SPA API backend ──────────────────────────────────────────────────
fastapi>=0.100.0
uvicorn[standard]>=0.20.0
PyJWT>=2.8.0
cryptography>=40.0.0
python-multipart>=0.0.6

119
scripts/calendar_push.py Normal file
View file

@ -0,0 +1,119 @@
"""calendar_push.py — push interview events to connected calendar integrations.
Supports Apple Calendar (CalDAV) and Google Calendar. Idempotent: a second
push updates the existing event rather than creating a duplicate.
"""
from __future__ import annotations
import uuid
import yaml
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Optional
from scripts.db import get_job_by_id, get_research, set_calendar_event_id, DEFAULT_DB
_CALENDAR_INTEGRATIONS = ("apple_calendar", "google_calendar")
# Stage label map matches 5_Interviews.py
_STAGE_LABELS = {
"phone_screen": "Phone Screen",
"interviewing": "Interview",
"offer": "Offer Review",
}
def _load_integration(name: str, config_dir: Path):
"""Instantiate and connect an integration from its saved config file."""
config_file = config_dir / "integrations" / f"{name}.yaml"
if not config_file.exists():
return None
with open(config_file) as f:
config = yaml.safe_load(f) or {}
if name == "apple_calendar":
from scripts.integrations.apple_calendar import AppleCalendarIntegration
integration = AppleCalendarIntegration()
elif name == "google_calendar":
from scripts.integrations.google_calendar import GoogleCalendarIntegration
integration = GoogleCalendarIntegration()
else:
return None
integration.connect(config)
return integration
def _build_event_details(job: dict, research: Optional[dict]) -> tuple[str, str]:
"""Return (title, description) for the calendar event."""
stage_label = _STAGE_LABELS.get(job.get("status", ""), "Interview")
title = f"{stage_label}: {job.get('title', 'Interview')} @ {job.get('company', '')}"
lines = []
if job.get("url"):
lines.append(f"Job listing: {job['url']}")
if research and research.get("company_brief"):
brief = research["company_brief"].strip()
# Trim to first 3 sentences so the event description stays readable
sentences = brief.split(". ")
lines.append("\n" + ". ".join(sentences[:3]) + ("." if len(sentences) > 1 else ""))
lines.append("\n— Sent by Peregrine (CircuitForge)")
return title, "\n".join(lines)
def push_interview_event(
db_path: Path = DEFAULT_DB,
job_id: int = None,
config_dir: Path = None,
) -> dict:
"""Push (or update) an interview event on the first connected calendar integration.
Returns:
{"ok": True, "provider": "apple_calendar", "event_id": "..."}
{"ok": False, "error": "..."}
"""
if config_dir is None:
config_dir = Path(__file__).parent.parent / "config"
job = get_job_by_id(db_path, job_id)
if not job:
return {"ok": False, "error": f"Job {job_id} not found"}
interview_date = job.get("interview_date")
if not interview_date:
return {"ok": False, "error": "No interview date set — save a date first"}
# Build datetimes: noon UTC, 1 hour duration
try:
base = datetime.fromisoformat(interview_date).replace(
hour=12, minute=0, second=0, microsecond=0, tzinfo=timezone.utc
)
except ValueError:
return {"ok": False, "error": f"Could not parse interview_date: {interview_date!r}"}
start_dt = base
end_dt = base + timedelta(hours=1)
research = get_research(db_path, job_id)
title, description = _build_event_details(job, research)
existing_event_id = job.get("calendar_event_id")
for name in _CALENDAR_INTEGRATIONS:
integration = _load_integration(name, config_dir)
if integration is None:
continue
try:
# Use a stable UID derived from job_id for CalDAV; gcal uses the returned event id
uid = existing_event_id or f"peregrine-job-{job_id}@circuitforge.tech"
if existing_event_id:
event_id = integration.update_event(uid, title, start_dt, end_dt, description)
else:
event_id = integration.create_event(uid, title, start_dt, end_dt, description)
set_calendar_event_id(db_path, job_id, event_id)
return {"ok": True, "provider": name, "event_id": event_id}
except Exception as exc:
return {"ok": False, "error": str(exc)}
return {"ok": False, "error": "No calendar integration configured — connect one in Settings → Integrations"}

198
scripts/credential_store.py Normal file
View file

@ -0,0 +1,198 @@
"""
Credential store abstraction for Peregrine.
Backends (set via CREDENTIAL_BACKEND env var):
auto try keyring, fall back to file (default)
keyring python-keyring (OS Keychain / SecretService / libsecret)
file Fernet-encrypted JSON in config/credentials/ (key at config/.credential_key)
Env var references:
Any stored value matching ${VAR_NAME} is resolved from os.environ at read time.
Users can store "${IMAP_PASSWORD}" as the credential value; it is never treated
as the actual secret only the env var it points to is used.
"""
import os
import re
import json
import logging
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
_ENV_REF = re.compile(r'^\$\{([A-Z_][A-Z0-9_]*)\}$')
_PROJECT_ROOT = Path(__file__).parent.parent
CRED_DIR = _PROJECT_ROOT / "config" / "credentials"
KEY_PATH = _PROJECT_ROOT / "config" / ".credential_key"
def _resolve_env_ref(value: str) -> Optional[str]:
"""If value is ${VAR_NAME}, return os.environ[VAR_NAME]; otherwise return None."""
m = _ENV_REF.match(value)
if m:
resolved = os.environ.get(m.group(1))
if resolved is None:
logger.warning("Credential reference %s is set but env var is not defined", value)
return resolved
return None
def _get_backend() -> str:
backend = os.environ.get("CREDENTIAL_BACKEND", "auto").lower()
if backend != "auto":
return backend
# Auto: try keyring, fall back to file
try:
import keyring
kr = keyring.get_keyring()
# Reject the null/fail keyring — it can't actually store anything
if "fail" in type(kr).__name__.lower() or "null" in type(kr).__name__.lower():
raise RuntimeError("No usable keyring backend found")
return "keyring"
except Exception:
return "file"
def _get_fernet():
"""Return a Fernet instance, auto-generating the key on first use."""
try:
from cryptography.fernet import Fernet
except ImportError:
return None
if KEY_PATH.exists():
key = KEY_PATH.read_bytes().strip()
else:
key = Fernet.generate_key()
KEY_PATH.parent.mkdir(parents=True, exist_ok=True)
fd = os.open(str(KEY_PATH), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "wb") as f:
f.write(key)
logger.info("Generated new credential encryption key at %s", KEY_PATH)
return Fernet(key)
def _file_read(service: str) -> dict:
"""Read the credentials file for a service, decrypting if possible."""
cred_file = CRED_DIR / f"{service}.json"
if not cred_file.exists():
return {}
raw = cred_file.read_bytes()
fernet = _get_fernet()
if fernet:
try:
return json.loads(fernet.decrypt(raw))
except Exception:
# May be an older plaintext file — try reading as text
try:
return json.loads(raw.decode())
except Exception:
logger.error("Failed to read credentials for service %s", service)
return {}
else:
try:
return json.loads(raw.decode())
except Exception:
return {}
def _file_write(service: str, data: dict) -> None:
"""Write the credentials file for a service, encrypting if possible."""
CRED_DIR.mkdir(parents=True, exist_ok=True)
cred_file = CRED_DIR / f"{service}.json"
fernet = _get_fernet()
if fernet:
content = fernet.encrypt(json.dumps(data).encode())
fd = os.open(str(cred_file), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "wb") as f:
f.write(content)
else:
logger.warning(
"cryptography package not installed — storing credentials as plaintext with chmod 600. "
"Install with: pip install cryptography"
)
content = json.dumps(data).encode()
fd = os.open(str(cred_file), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "wb") as f:
f.write(content)
def get_credential(service: str, key: str) -> Optional[str]:
"""
Retrieve a credential. If the stored value is an env var reference (${VAR}),
resolves it from os.environ at call time.
"""
backend = _get_backend()
raw: Optional[str] = None
if backend == "keyring":
try:
import keyring
raw = keyring.get_password(service, key)
except Exception as e:
logger.error("keyring get failed for %s/%s: %s", service, key, e)
else: # file
data = _file_read(service)
raw = data.get(key)
if raw is None:
return None
# Resolve env var references transparently
resolved = _resolve_env_ref(raw)
if resolved is not None:
return resolved
if _ENV_REF.match(raw):
return None # reference defined but env var not set
return raw
def set_credential(service: str, key: str, value: str) -> None:
"""
Store a credential. Value may be a literal secret or a ${VAR_NAME} reference.
Env var references are stored as-is and resolved at get time.
"""
if not value:
return
backend = _get_backend()
if backend == "keyring":
try:
import keyring
keyring.set_password(service, key, value)
return
except Exception as e:
logger.error("keyring set failed for %s/%s: %s — falling back to file", service, key, e)
backend = "file"
# file backend
data = _file_read(service)
data[key] = value
_file_write(service, data)
def delete_credential(service: str, key: str) -> None:
"""Remove a stored credential."""
backend = _get_backend()
if backend == "keyring":
try:
import keyring
keyring.delete_password(service, key)
return
except Exception:
backend = "file"
data = _file_read(service)
data.pop(key, None)
if data:
_file_write(service, data)
else:
cred_file = CRED_DIR / f"{service}.json"
if cred_file.exists():
cred_file.unlink()

View file

@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
print(f" [adzuna] Skipped — {exc}")
return []
titles = profile.get("titles", [])
titles = profile.get("titles") or profile.get("job_titles", [])
hours_old = profile.get("hours_old", 240)
max_days_old = max(1, hours_old // 24)
is_remote_search = location.lower() == "remote"

View file

@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
return []
metros = [metro]
titles: list[str] = profile.get("titles", [])
titles: list[str] = profile.get("titles") or profile.get("job_titles", [])
hours_old: int = profile.get("hours_old", 240)
cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)

View file

@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
)
page = ctx.new_page()
for title in profile.get("titles", []):
for title in (profile.get("titles") or profile.get("job_titles", [])):
if len(results) >= results_wanted:
break

View file

@ -9,30 +9,14 @@ from datetime import datetime
from pathlib import Path
from typing import Optional
from circuitforge_core.db import get_connection as _cf_get_connection
DEFAULT_DB = Path(os.environ.get("STAGING_DB", Path(__file__).parent.parent / "staging.db"))
def get_connection(db_path: Path = DEFAULT_DB, key: str = "") -> "sqlite3.Connection":
"""
Open a database connection.
In cloud mode with a key: uses SQLCipher (AES-256 encrypted, API-identical to sqlite3).
Otherwise: vanilla sqlite3.
Args:
db_path: Path to the SQLite/SQLCipher database file.
key: SQLCipher encryption key (hex string). Empty = unencrypted.
"""
import os as _os
cloud_mode = _os.environ.get("CLOUD_MODE", "").lower() in ("1", "true", "yes")
if cloud_mode and key:
from pysqlcipher3 import dbapi2 as _sqlcipher
conn = _sqlcipher.connect(str(db_path))
conn.execute(f"PRAGMA key='{key}'")
return conn
else:
import sqlite3 as _sqlite3
return _sqlite3.connect(str(db_path))
"""Thin shim — delegates to circuitforge_core.db.get_connection."""
return _cf_get_connection(db_path, key)
CREATE_JOBS = """
@ -137,6 +121,15 @@ CREATE TABLE IF NOT EXISTS survey_responses (
);
"""
CREATE_DIGEST_QUEUE = """
CREATE TABLE IF NOT EXISTS digest_queue (
id INTEGER PRIMARY KEY,
job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id),
created_at TEXT DEFAULT (datetime('now')),
UNIQUE(job_contact_id)
)
"""
_MIGRATIONS = [
("cover_letter", "TEXT"),
("applied_at", "TEXT"),
@ -147,6 +140,9 @@ _MIGRATIONS = [
("offer_at", "TEXT"),
("hired_at", "TEXT"),
("survey_at", "TEXT"),
("calendar_event_id", "TEXT"),
("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier)
("ats_gap_report", "TEXT"), # JSON gap report (free tier)
]
@ -192,6 +188,7 @@ def init_db(db_path: Path = DEFAULT_DB) -> None:
conn.execute(CREATE_COMPANY_RESEARCH)
conn.execute(CREATE_BACKGROUND_TASKS)
conn.execute(CREATE_SURVEY_RESPONSES)
conn.execute(CREATE_DIGEST_QUEUE)
conn.commit()
conn.close()
_migrate_db(db_path)
@ -316,6 +313,38 @@ def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: st
conn.close()
def save_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
text: str = "", gap_report: str = "") -> None:
"""Persist ATS-optimized resume text and/or gap report for a job."""
if job_id is None:
return
conn = sqlite3.connect(db_path)
conn.execute(
"UPDATE jobs SET optimized_resume = ?, ats_gap_report = ? WHERE id = ?",
(text or None, gap_report or None, job_id),
)
conn.commit()
conn.close()
def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
"""Return optimized_resume and ats_gap_report for a job, or empty strings if absent."""
if job_id is None:
return {"optimized_resume": "", "ats_gap_report": ""}
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT optimized_resume, ats_gap_report FROM jobs WHERE id = ?", (job_id,)
).fetchone()
conn.close()
if not row:
return {"optimized_resume": "", "ats_gap_report": ""}
return {
"optimized_resume": row["optimized_resume"] or "",
"ats_gap_report": row["ats_gap_report"] or "",
}
_UPDATABLE_JOB_COLS = {
"title", "company", "url", "source", "location", "is_remote",
"salary", "description", "match_score", "keyword_gaps",
@ -354,6 +383,19 @@ def mark_applied(db_path: Path = DEFAULT_DB, ids: list[int] = None) -> None:
conn.close()
def cancel_task(db_path: Path = DEFAULT_DB, task_id: int = 0) -> bool:
"""Cancel a single queued/running task by id. Returns True if a row was updated."""
conn = sqlite3.connect(db_path)
count = conn.execute(
"UPDATE background_tasks SET status='failed', error='Cancelled by user',"
" finished_at=datetime('now') WHERE id=? AND status IN ('queued','running')",
(task_id,),
).rowcount
conn.commit()
conn.close()
return count > 0
def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int:
"""Mark all queued/running background tasks as failed. Returns count killed."""
conn = sqlite3.connect(db_path)
@ -508,6 +550,15 @@ def set_interview_date(db_path: Path = DEFAULT_DB, job_id: int = None,
conn.close()
def set_calendar_event_id(db_path: Path = DEFAULT_DB, job_id: int = None,
event_id: str = "") -> None:
"""Persist the calendar event ID returned after a successful push."""
conn = sqlite3.connect(db_path)
conn.execute("UPDATE jobs SET calendar_event_id = ? WHERE id = ?", (event_id, job_id))
conn.commit()
conn.close()
# ── Contact log helpers ───────────────────────────────────────────────────────
def add_contact(db_path: Path = DEFAULT_DB, job_id: int = None,

73
scripts/db_migrate.py Normal file
View file

@ -0,0 +1,73 @@
"""
db_migrate.py Rails-style numbered SQL migration runner for Peregrine user DBs.
Migration files live in migrations/ (sibling to this script's parent directory),
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
order and tracked in the schema_migrations table so each runs exactly once.
Usage:
from scripts.db_migrate import migrate_db
migrate_db(Path("/path/to/user.db"))
"""
import logging
import sqlite3
from pathlib import Path
log = logging.getLogger(__name__)
# Resolved at import time: peregrine repo root / migrations/
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
_CREATE_MIGRATIONS_TABLE = """
CREATE TABLE IF NOT EXISTS schema_migrations (
version TEXT PRIMARY KEY,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)
"""
def migrate_db(db_path: Path) -> list[str]:
"""Apply any pending migrations to db_path. Returns list of applied versions."""
applied: list[str] = []
con = sqlite3.connect(db_path)
try:
con.execute(_CREATE_MIGRATIONS_TABLE)
con.commit()
if not _MIGRATIONS_DIR.is_dir():
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
return applied
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
if not migration_files:
return applied
already_applied = {
row[0] for row in con.execute("SELECT version FROM schema_migrations")
}
for path in migration_files:
version = path.stem # e.g. "001_baseline"
if version in already_applied:
continue
sql = path.read_text(encoding="utf-8")
log.info("Applying migration %s to %s", version, db_path.name)
try:
con.executescript(sql)
con.execute(
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
)
con.commit()
applied.append(version)
log.info("Migration %s applied successfully", version)
except Exception as exc:
con.rollback()
log.error("Migration %s failed: %s", version, exc)
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
finally:
con.close()
return applied

View file

@ -34,17 +34,21 @@ CUSTOM_SCRAPERS: dict[str, object] = {
}
def load_config() -> tuple[dict, dict]:
profiles = yaml.safe_load(PROFILES_CFG.read_text())
notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
cfg = config_dir or CONFIG_DIR
profiles_path = cfg / "search_profiles.yaml"
notion_path = cfg / "notion.yaml"
profiles = yaml.safe_load(profiles_path.read_text())
notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
return profiles, notion_cfg
def load_blocklist() -> dict:
def load_blocklist(config_dir: Path | None = None) -> dict:
"""Load global blocklist config. Returns dict with companies, industries, locations lists."""
if not BLOCKLIST_CFG.exists():
blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml"
if not blocklist_path.exists():
return {"companies": [], "industries": [], "locations": []}
raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
raw = yaml.safe_load(blocklist_path.read_text()) or {}
return {
"companies": [c.lower() for c in raw.get("companies", []) if c],
"industries": [i.lower() for i in raw.get("industries", []) if i],
@ -117,10 +121,15 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
)
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
profiles_cfg, notion_cfg = load_config()
fm = notion_cfg["field_map"]
blocklist = load_blocklist()
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None:
# In cloud mode, config_dir is the per-user config directory derived from db_path.
# Falls back to the app-level /app/config for single-tenant deployments.
resolved_cfg = config_dir or Path(db_path).parent / "config"
if not resolved_cfg.exists():
resolved_cfg = CONFIG_DIR
profiles_cfg, notion_cfg = load_config(resolved_cfg)
fm = notion_cfg.get("field_map") or {}
blocklist = load_blocklist(resolved_cfg)
_bl_summary = {k: len(v) for k, v in blocklist.items() if v}
if _bl_summary:
@ -196,20 +205,30 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
exclude_kw = [kw.lower() for kw in profile.get("exclude_keywords", [])]
results_per_board = profile.get("results_per_board", 25)
# Map remote_preference → JobSpy is_remote param:
# 'remote' → True (remote-only listings)
# 'onsite' → False (on-site-only listings)
# 'both' → None (no filter — JobSpy default)
_rp = profile.get("remote_preference", "both")
_is_remote: bool | None = True if _rp == "remote" else (False if _rp == "onsite" else None)
for location in profile["locations"]:
# ── JobSpy boards ──────────────────────────────────────────────────
if boards:
print(f" [jobspy] {location} — boards: {', '.join(boards)}")
try:
jobs: pd.DataFrame = scrape_jobs(
jobspy_kwargs: dict = dict(
site_name=boards,
search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
location=location,
results_wanted=results_per_board,
hours_old=profile.get("hours_old", 72),
linkedin_fetch_description=True,
)
if _is_remote is not None:
jobspy_kwargs["is_remote"] = _is_remote
jobs: pd.DataFrame = scrape_jobs(**jobspy_kwargs)
print(f" [jobspy] {len(jobs)} raw results")
except Exception as exc:
print(f" [jobspy] ERROR: {exc}")

View file

@ -26,13 +26,14 @@ LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "Jo
LETTER_GLOB = "*Cover Letter*.md"
# Background injected into every prompt so the model has the candidate's facts
def _build_system_context() -> str:
if not _profile:
def _build_system_context(profile=None) -> str:
p = profile or _profile
if not p:
return "You are a professional cover letter writer. Write in first person."
parts = [f"You are writing cover letters for {_profile.name}. {_profile.career_summary}"]
if _profile.candidate_voice:
parts = [f"You are writing cover letters for {p.name}. {p.career_summary}"]
if p.candidate_voice:
parts.append(
f"Voice and personality: {_profile.candidate_voice} "
f"Voice and personality: {p.candidate_voice} "
"Write in a way that reflects these authentic traits — not as a checklist, "
"but as a natural expression of who this person is."
)
@ -125,15 +126,17 @@ _MISSION_DEFAULTS: dict[str, str] = {
}
def _build_mission_notes() -> dict[str, str]:
def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]:
"""Merge user's custom mission notes with generic defaults."""
prefs = _profile.mission_preferences if _profile else {}
p = profile or _profile
name = candidate_name or _candidate
prefs = p.mission_preferences if p else {}
notes = {}
for industry, default_note in _MISSION_DEFAULTS.items():
custom = (prefs.get(industry) or "").strip()
if custom:
notes[industry] = (
f"Mission alignment — {_candidate} shared: \"{custom}\". "
f"Mission alignment — {name} shared: \"{custom}\". "
"Para 3 should warmly and specifically reflect this authentic connection."
)
else:
@ -144,12 +147,15 @@ def _build_mission_notes() -> dict[str, str]:
_MISSION_NOTES = _build_mission_notes()
def detect_mission_alignment(company: str, description: str) -> str | None:
def detect_mission_alignment(
company: str, description: str, mission_notes: dict | None = None
) -> str | None:
"""Return a mission hint string if company/JD matches a preferred industry, else None."""
notes = mission_notes if mission_notes is not None else _MISSION_NOTES
text = f"{company} {description}".lower()
for industry, signals in _MISSION_SIGNALS.items():
if any(sig in text for sig in signals):
return _MISSION_NOTES[industry]
return notes[industry]
return None
@ -190,10 +196,14 @@ def build_prompt(
examples: list[dict],
mission_hint: str | None = None,
is_jobgether: bool = False,
system_context: str | None = None,
candidate_name: str | None = None,
) -> str:
parts = [SYSTEM_CONTEXT.strip(), ""]
ctx = system_context if system_context is not None else SYSTEM_CONTEXT
name = candidate_name or _candidate
parts = [ctx.strip(), ""]
if examples:
parts.append(f"=== STYLE EXAMPLES ({_candidate}'s past letters) ===\n")
parts.append(f"=== STYLE EXAMPLES ({name}'s past letters) ===\n")
for i, ex in enumerate(examples, 1):
parts.append(f"--- Example {i} ({ex['company']}) ---")
parts.append(ex["text"])
@ -231,16 +241,17 @@ def build_prompt(
return "\n".join(parts)
def _trim_to_letter_end(text: str) -> str:
def _trim_to_letter_end(text: str, profile=None) -> str:
"""Remove repetitive hallucinated content after the first complete sign-off.
Fine-tuned models sometimes loop after completing the letter. This cuts at
the first closing + candidate name so only the intended letter is saved.
"""
candidate_first = (_profile.name.split()[0] if _profile else "").strip()
p = profile or _profile
candidate_first = (p.name.split()[0] if p else "").strip()
pattern = (
r'(?:Warm regards|Sincerely|Best regards|Kind regards|Thank you)[,.]?\s*\n+\s*'
+ (re.escape(candidate_first) if candidate_first else r'\w+')
+ (re.escape(candidate_first) if candidate_first else r'\w+(?:\s+\w+)?')
+ r'\b'
)
m = re.search(pattern, text, re.IGNORECASE)
@ -257,6 +268,8 @@ def generate(
feedback: str = "",
is_jobgether: bool = False,
_router=None,
config_path: "Path | None" = None,
user_yaml_path: "Path | None" = None,
) -> str:
"""Generate a cover letter and return it as a string.
@ -264,15 +277,29 @@ def generate(
and requested changes are appended to the prompt so the LLM revises rather
than starting from scratch.
user_yaml_path overrides the module-level profile required in cloud mode
so each user's name/voice/mission prefs are used instead of the global default.
_router is an optional pre-built LLMRouter (used in tests to avoid real LLM calls).
"""
# Per-call profile override (cloud mode: each user has their own user.yaml)
if user_yaml_path and Path(user_yaml_path).exists():
_prof = UserProfile(Path(user_yaml_path))
else:
_prof = _profile
sys_ctx = _build_system_context(_prof)
mission_notes = _build_mission_notes(_prof, candidate_name=(_prof.name if _prof else None))
candidate_name = _prof.name if _prof else _candidate
corpus = load_corpus()
examples = find_similar_letters(description or f"{title} {company}", corpus)
mission_hint = detect_mission_alignment(company, description)
mission_hint = detect_mission_alignment(company, description, mission_notes=mission_notes)
if mission_hint:
print(f"[cover-letter] Mission alignment detected for {company}", file=sys.stderr)
prompt = build_prompt(title, company, description, examples,
mission_hint=mission_hint, is_jobgether=is_jobgether)
mission_hint=mission_hint, is_jobgether=is_jobgether,
system_context=sys_ctx, candidate_name=candidate_name)
if previous_result:
prompt += f"\n\n---\nPrevious draft:\n{previous_result}"
@ -281,8 +308,9 @@ def generate(
if _router is None:
sys.path.insert(0, str(Path(__file__).parent.parent))
from scripts.llm_router import LLMRouter
_router = LLMRouter()
from scripts.llm_router import LLMRouter, CONFIG_PATH
resolved = config_path if (config_path and Path(config_path).exists()) else CONFIG_PATH
_router = LLMRouter(resolved)
print(f"[cover-letter] Generating for: {title} @ {company}", file=sys.stderr)
print(f"[cover-letter] Style examples: {[e['company'] for e in examples]}", file=sys.stderr)
@ -292,7 +320,7 @@ def generate(
# max_tokens=1200 caps generation at ~900 words — enough for any cover letter
# and prevents fine-tuned models from looping into repetitive garbage output.
result = _router.complete(prompt, max_tokens=1200)
return _trim_to_letter_end(result)
return _trim_to_letter_end(result, _prof)
def main() -> None:

View file

@ -698,21 +698,43 @@ def _parse_message(conn: imaplib.IMAP4, uid: bytes) -> Optional[dict]:
return None
msg = email.message_from_bytes(data[0][1])
body = ""
# Prefer text/html (preserves href attributes for digest link extraction);
# fall back to text/plain if no HTML part exists.
html_body = ""
plain_body = ""
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain":
ct = part.get_content_type()
if ct == "text/html" and not html_body:
try:
body = part.get_payload(decode=True).decode("utf-8", errors="replace")
html_body = part.get_payload(decode=True).decode("utf-8", errors="replace")
except Exception:
pass
elif ct == "text/plain" and not plain_body:
try:
plain_body = part.get_payload(decode=True).decode("utf-8", errors="replace")
except Exception:
pass
break
else:
ct = msg.get_content_type()
try:
body = msg.get_payload(decode=True).decode("utf-8", errors="replace")
raw = msg.get_payload(decode=True).decode("utf-8", errors="replace")
if ct == "text/html":
html_body = raw
else:
plain_body = raw
except Exception:
pass
if html_body:
# Strip <head>…</head> (CSS, meta, title) and any stray <style> blocks.
# Keeps <body> HTML intact so href attributes survive for digest extraction.
body = re.sub(r"<head[\s\S]*?</head>", "", html_body, flags=re.I)
body = re.sub(r"<style[\s\S]*?</style>", "", body, flags=re.I)
body = re.sub(r"<script[\s\S]*?</script>", "", body, flags=re.I)
else:
body = plain_body
mid = msg.get("Message-ID", "").strip()
if not mid:
return None # No Message-ID → can't dedup; skip to avoid repeat inserts
@ -723,7 +745,7 @@ def _parse_message(conn: imaplib.IMAP4, uid: bytes) -> Optional[dict]:
"from_addr": _decode_str(msg.get("From")),
"to_addr": _decode_str(msg.get("To")),
"date": _decode_str(msg.get("Date")),
"body": body[:4000],
"body": body, # no truncation — digest emails need full content
}
except Exception:
return None

View file

@ -1,4 +1,5 @@
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from scripts.integrations.base import IntegrationBase
@ -46,3 +47,60 @@ class AppleCalendarIntegration(IntegrationBase):
return principal is not None
except Exception:
return False
def _get_calendar(self):
"""Return the configured caldav Calendar object."""
import caldav
client = caldav.DAVClient(
url=self._config["caldav_url"],
username=self._config["username"],
password=self._config["app_password"],
)
principal = client.principal()
cal_name = self._config.get("calendar_name", "Interviews")
for cal in principal.calendars():
if cal.name == cal_name:
return cal
# Calendar not found — create it
return principal.make_calendar(name=cal_name)
def create_event(self, uid: str, title: str, start_dt: datetime,
end_dt: datetime, description: str = "") -> str:
"""Create a calendar event. Returns the UID (used as calendar_event_id)."""
from icalendar import Calendar, Event
cal = Calendar()
cal.add("prodid", "-//CircuitForge Peregrine//EN")
cal.add("version", "2.0")
event = Event()
event.add("uid", uid)
event.add("summary", title)
event.add("dtstart", start_dt)
event.add("dtend", end_dt)
event.add("description", description)
cal.add_component(event)
dav_cal = self._get_calendar()
dav_cal.add_event(cal.to_ical().decode())
return uid
def update_event(self, uid: str, title: str, start_dt: datetime,
end_dt: datetime, description: str = "") -> str:
"""Update an existing event by UID, or create it if not found."""
from icalendar import Calendar, Event
dav_cal = self._get_calendar()
try:
existing = dav_cal.event_by_uid(uid)
cal = Calendar()
cal.add("prodid", "-//CircuitForge Peregrine//EN")
cal.add("version", "2.0")
event = Event()
event.add("uid", uid)
event.add("summary", title)
event.add("dtstart", start_dt)
event.add("dtend", end_dt)
event.add("description", description)
cal.add_component(event)
existing.data = cal.to_ical().decode()
existing.save()
except Exception:
return self.create_event(uid, title, start_dt, end_dt, description)
return uid

View file

@ -1,5 +1,6 @@
from __future__ import annotations
import os
from datetime import datetime
from scripts.integrations.base import IntegrationBase
@ -26,6 +27,53 @@ class GoogleCalendarIntegration(IntegrationBase):
return bool(config.get("calendar_id") and config.get("credentials_json"))
def test(self) -> bool:
# TODO: use google-api-python-client calendars().get()
creds = os.path.expanduser(self._config.get("credentials_json", ""))
return os.path.exists(creds)
try:
service = self._build_service()
service.calendars().get(calendarId=self._config["calendar_id"]).execute()
return True
except Exception:
return False
def _build_service(self):
from google.oauth2 import service_account
from googleapiclient.discovery import build
creds_path = os.path.expanduser(self._config["credentials_json"])
creds = service_account.Credentials.from_service_account_file(
creds_path,
scopes=["https://www.googleapis.com/auth/calendar"],
)
return build("calendar", "v3", credentials=creds)
def _fmt(self, dt: datetime) -> str:
return dt.strftime("%Y-%m-%dT%H:%M:%S") + "Z"
def create_event(self, uid: str, title: str, start_dt: datetime,
end_dt: datetime, description: str = "") -> str:
"""Create a Google Calendar event. Returns the Google event ID."""
service = self._build_service()
body = {
"summary": title,
"description": description,
"start": {"dateTime": self._fmt(start_dt), "timeZone": "UTC"},
"end": {"dateTime": self._fmt(end_dt), "timeZone": "UTC"},
"extendedProperties": {"private": {"peregrine_uid": uid}},
}
result = service.events().insert(
calendarId=self._config["calendar_id"], body=body
).execute()
return result["id"]
def update_event(self, uid: str, title: str, start_dt: datetime,
end_dt: datetime, description: str = "") -> str:
"""Update an existing Google Calendar event by its stored event ID (uid is the gcal id)."""
service = self._build_service()
body = {
"summary": title,
"description": description,
"start": {"dateTime": self._fmt(start_dt), "timeZone": "UTC"},
"end": {"dateTime": self._fmt(end_dt), "timeZone": "UTC"},
}
result = service.events().update(
calendarId=self._config["calendar_id"], eventId=uid, body=body
).execute()
return result["id"]

313
scripts/job_ranker.py Normal file
View file

@ -0,0 +1,313 @@
"""Job ranking engine — two-stage discovery → review pipeline.
Stage 1 (discover.py) scrapes a wide corpus and stores everything as 'pending'.
Stage 2 (this module) scores the corpus; GET /api/jobs/stack returns top-N best
matches for the user's current review session.
All signal functions return a float in [0, 1]. The final stack_score is 0100.
Usage:
from scripts.job_ranker import rank_jobs
ranked = rank_jobs(jobs, search_titles, salary_min, salary_max, user_level)
"""
from __future__ import annotations
import math
import re
from datetime import datetime, timezone
# ── TUNING ─────────────────────────────────────────────────────────────────────
# Adjust these constants to change how jobs are ranked.
# All individual signal scores are normalised to [0, 1] before weighting.
# Weights should sum to ≤ 1.0; the remainder is unallocated slack.
W_RESUME_MATCH = 0.40 # TF-IDF cosine similarity stored as match_score (0100 → 01)
W_TITLE_MATCH = 0.30 # seniority-aware title + domain keyword overlap
W_RECENCY = 0.15 # freshness — exponential decay from date_found
W_SALARY_FIT = 0.10 # salary range overlap vs user target (neutral when unknown)
W_DESC_QUALITY = 0.05 # posting completeness — penalises stub / ghost posts
# Keyword gap penalty: each missing keyword from the resume match costs points.
# Gaps are already partially captured by W_RESUME_MATCH (same TF-IDF source),
# so this is a soft nudge, not a hard filter.
GAP_PENALTY_PER_KEYWORD: float = 0.5 # points off per gap keyword (0100 scale)
GAP_MAX_PENALTY: float = 5.0 # hard cap so a gap-heavy job can still rank
# Recency half-life: score halves every N days past date_found
RECENCY_HALF_LIFE: int = 7 # days
# Description word-count thresholds
DESC_MIN_WORDS: int = 50 # below this → scaled penalty
DESC_TARGET_WORDS: int = 200 # at or above → full quality score
# ── END TUNING ─────────────────────────────────────────────────────────────────
# ── Seniority level map ────────────────────────────────────────────────────────
# (level, [keyword substrings that identify that level])
# Matched on " <lower_title> " with a space-padded check to avoid false hits.
# Level 3 is the default (mid-level, no seniority modifier in title).
_SENIORITY_MAP: list[tuple[int, list[str]]] = [
(1, ["intern", "internship", "trainee", "apprentice", "co-op", "coop"]),
(2, ["entry level", "entry-level", "junior", "jr ", "jr.", "associate "]),
(3, ["mid level", "mid-level", "intermediate"]),
(4, ["senior ", "senior,", "sr ", "sr.", " lead ", "lead,", " ii ", " iii ",
"specialist", "experienced"]),
(5, ["staff ", "principal ", "architect ", "expert ", "distinguished"]),
(6, ["director", "head of ", "manager ", "vice president", " vp "]),
(7, ["chief", "cto", "cio", "cpo", "president", "founder"]),
]
# job_level user_level → scoring multiplier
# Positive delta = job is more senior (stretch up = encouraged)
# Negative delta = job is below the user's level
_LEVEL_MULTIPLIER: dict[int, float] = {
-4: 0.05, -3: 0.10, -2: 0.25, -1: 0.65,
0: 1.00,
1: 0.90, 2: 0.65, 3: 0.25, 4: 0.05,
}
_DEFAULT_LEVEL_MULTIPLIER = 0.05
# ── Seniority helpers ─────────────────────────────────────────────────────────
def infer_seniority(title: str) -> int:
"""Return seniority level 17 for a job or resume title. Defaults to 3."""
padded = f" {title.lower()} "
# Iterate highest → lowest so "Senior Lead" resolves to 4, not 6
for level, keywords in reversed(_SENIORITY_MAP):
for kw in keywords:
if kw in padded:
return level
return 3
def seniority_from_experience(titles: list[str]) -> int:
"""Estimate user's current level from their most recent experience titles.
Averages the levels of the top-3 most recent titles (first in the list).
Falls back to 3 (mid-level) if no titles are provided.
"""
if not titles:
return 3
sample = [t for t in titles if t.strip()][:3]
if not sample:
return 3
levels = [infer_seniority(t) for t in sample]
return round(sum(levels) / len(levels))
def _strip_level_words(text: str) -> str:
"""Remove seniority/modifier words so domain keywords stand out."""
strip = {
"senior", "sr", "junior", "jr", "lead", "staff", "principal",
"associate", "entry", "mid", "intermediate", "experienced",
"director", "head", "manager", "architect", "chief", "intern",
"ii", "iii", "iv", "i",
}
return " ".join(w for w in text.lower().split() if w not in strip)
# ── Signal functions ──────────────────────────────────────────────────────────
def title_match_score(job_title: str, search_titles: list[str], user_level: int) -> float:
"""Seniority-aware title similarity in [0, 1].
Combines:
- Domain overlap: keyword intersection between job title and search titles
after stripping level modifiers (so "Senior Software Engineer" vs
"Software Engineer" compares only on "software engineer").
- Seniority multiplier: rewards same-level and +1 stretch; penalises
large downgrade or unreachable stretch.
"""
if not search_titles:
return 0.5 # neutral — user hasn't set title prefs yet
job_level = infer_seniority(job_title)
level_delta = job_level - user_level
seniority_factor = _LEVEL_MULTIPLIER.get(level_delta, _DEFAULT_LEVEL_MULTIPLIER)
job_core_words = {w for w in _strip_level_words(job_title).split() if len(w) > 2}
best_domain = 0.0
for st in search_titles:
st_core_words = {w for w in _strip_level_words(st).split() if len(w) > 2}
if not st_core_words:
continue
# Recall-biased overlap: what fraction of the search title keywords
# appear in the job title? (A job posting may use synonyms but we
# at least want the core nouns to match.)
overlap = len(st_core_words & job_core_words) / len(st_core_words)
best_domain = max(best_domain, overlap)
# Base score from domain match scaled by seniority appropriateness.
# A small seniority_factor bonus (×0.2) ensures that even a near-miss
# domain match still benefits from seniority alignment.
return min(1.0, best_domain * seniority_factor + seniority_factor * 0.15)
def recency_decay(date_found: str) -> float:
"""Exponential decay starting from date_found.
Returns 1.0 for today, 0.5 after RECENCY_HALF_LIFE days, ~0.0 after ~4×.
Returns 0.5 (neutral) if the date is unparseable.
"""
try:
# Support both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS"
found = datetime.fromisoformat(date_found.split("T")[0].split(" ")[0])
found = found.replace(tzinfo=timezone.utc)
now = datetime.now(tz=timezone.utc)
days_old = max(0.0, (now - found).total_seconds() / 86400)
return math.exp(-math.log(2) * days_old / RECENCY_HALF_LIFE)
except Exception:
return 0.5
def _parse_salary_range(text: str | None) -> tuple[int | None, int | None]:
"""Extract (low, high) salary integers from free-text. Returns (None, None) on failure.
Handles: "$80k - $120k", "USD 80,000 - 120,000 per year", "£45,000",
"80000", "80K/yr", "80-120k", etc.
"""
if not text:
return None, None
normalized = re.sub(r"[$,£€₹¥\s]", "", text.lower())
# Match numbers optionally followed by 'k'
raw_nums = re.findall(r"(\d+(?:\.\d+)?)k?", normalized)
values = []
for n, full in zip(raw_nums, re.finditer(r"(\d+(?:\.\d+)?)(k?)", normalized)):
val = float(full.group(1))
if full.group(2): # ends with 'k'
val *= 1000
elif val < 1000: # bare numbers < 1000 are likely thousands (e.g., "80" in "80-120k")
val *= 1000
if val >= 10_000: # sanity: ignore clearly wrong values
values.append(int(val))
values = sorted(set(values))
if not values:
return None, None
return values[0], values[-1]
def salary_fit(
salary_text: str | None,
target_min: int | None,
target_max: int | None,
) -> float:
"""Salary range overlap score in [0, 1].
Returns 0.5 (neutral) when either range is unknown a missing salary
line is not inherently negative.
"""
if not salary_text or (target_min is None and target_max is None):
return 0.5
job_low, job_high = _parse_salary_range(salary_text)
if job_low is None:
return 0.5
t_min = target_min or 0
t_max = target_max or (int(target_min * 1.5) if target_min else job_high or job_low)
job_high = job_high or job_low
overlap_low = max(job_low, t_min)
overlap_high = min(job_high, t_max)
overlap = max(0, overlap_high - overlap_low)
target_span = max(1, t_max - t_min)
return min(1.0, overlap / target_span)
def description_quality(description: str | None) -> float:
"""Posting completeness score in [0, 1].
Stubs and ghost posts score near 0; well-written descriptions score 1.0.
"""
if not description:
return 0.0
words = len(description.split())
if words < DESC_MIN_WORDS:
return (words / DESC_MIN_WORDS) * 0.4 # steep penalty for stubs
if words >= DESC_TARGET_WORDS:
return 1.0
return 0.4 + 0.6 * (words - DESC_MIN_WORDS) / (DESC_TARGET_WORDS - DESC_MIN_WORDS)
# ── Composite scorer ──────────────────────────────────────────────────────────
def score_job(
job: dict,
search_titles: list[str],
target_salary_min: int | None,
target_salary_max: int | None,
user_level: int,
) -> float:
"""Compute composite stack_score (0100) for a single job dict.
Args:
job: Row dict from the jobs table (must have title, match_score,
date_found, salary, description, keyword_gaps).
search_titles: User's desired job titles (from search prefs).
target_salary_*: User's salary target from resume profile (or None).
user_level: Inferred seniority level 17.
Returns:
A float 0100. Higher = better match for this user's session.
"""
# ── Individual signals (all 01) ──────────────────────────────────────────
match_raw = job.get("match_score")
s_resume = (match_raw / 100.0) if match_raw is not None else 0.5
s_title = title_match_score(job.get("title", ""), search_titles, user_level)
s_recency = recency_decay(job.get("date_found", ""))
s_salary = salary_fit(job.get("salary"), target_salary_min, target_salary_max)
s_desc = description_quality(job.get("description"))
# ── Weighted sum ──────────────────────────────────────────────────────────
base = (
W_RESUME_MATCH * s_resume
+ W_TITLE_MATCH * s_title
+ W_RECENCY * s_recency
+ W_SALARY_FIT * s_salary
+ W_DESC_QUALITY * s_desc
)
# ── Keyword gap penalty (applied on the 0100 scale) ─────────────────────
gaps_raw = job.get("keyword_gaps") or ""
gap_count = len([g for g in gaps_raw.split(",") if g.strip()]) if gaps_raw else 0
gap_penalty = min(GAP_MAX_PENALTY, gap_count * GAP_PENALTY_PER_KEYWORD) / 100.0
return round(max(0.0, base - gap_penalty) * 100, 1)
# ── Public API ────────────────────────────────────────────────────────────────
def rank_jobs(
jobs: list[dict],
search_titles: list[str],
target_salary_min: int | None = None,
target_salary_max: int | None = None,
user_level: int = 3,
limit: int = 10,
min_score: float = 20.0,
) -> list[dict]:
"""Score and rank pending jobs; return top-N above min_score.
Args:
jobs: List of job dicts (from DB or any source).
search_titles: User's desired job titles from search prefs.
target_salary_*: User's salary target (from resume profile).
user_level: Seniority level 17 (use seniority_from_experience()).
limit: Stack size; pass 0 to return all qualifying jobs.
min_score: Minimum stack_score to include (0100).
Returns:
Sorted list (best first) with 'stack_score' key added to each dict.
"""
scored = []
for job in jobs:
s = score_job(job, search_titles, target_salary_min, target_salary_max, user_level)
if s >= min_score:
scored.append({**job, "stack_score": s})
scored.sort(key=lambda j: j["stack_score"], reverse=True)
return scored[:limit] if limit > 0 else scored

View file

@ -1,169 +1,46 @@
"""
LLM abstraction layer with priority fallback chain.
Reads config/llm.yaml. Tries backends in order; falls back on any error.
Config lookup order:
1. <repo>/config/llm.yaml per-install local config
2. ~/.config/circuitforge/llm.yaml user-level config (circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, )
"""
import os
import yaml
import requests
from pathlib import Path
from openai import OpenAI
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
# from this module continue to work.
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
class LLMRouter:
def __init__(self, config_path: Path = CONFIG_PATH):
with open(config_path) as f:
self.config = yaml.safe_load(f)
class LLMRouter(_CoreLLMRouter):
"""Peregrine-specific LLMRouter — tri-level config path priority.
def _is_reachable(self, base_url: str) -> bool:
"""Quick health-check ping. Returns True if backend is up."""
health_url = base_url.rstrip("/").removesuffix("/v1") + "/health"
try:
resp = requests.get(health_url, timeout=2)
return resp.status_code < 500
except Exception:
return False
def _resolve_model(self, client: OpenAI, model: str) -> str:
"""Resolve __auto__ to the first model served by vLLM."""
if model != "__auto__":
return model
models = client.models.list()
return models.data[0].id
def complete(self, prompt: str, system: str | None = None,
model_override: str | None = None,
fallback_order: list[str] | None = None,
images: list[str] | None = None,
max_tokens: int | None = None) -> str:
When ``config_path`` is supplied (e.g. in tests) it is passed straight
through to the core. When omitted, the lookup order is:
1. <repo>/config/llm.yaml (per-install local config)
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST )
"""
Generate a completion. Tries each backend in fallback_order.
model_override: when set, replaces the configured model for
openai_compat backends (e.g. pass a research-specific ollama model).
fallback_order: when set, overrides config fallback_order for this
call (e.g. pass config["research_fallback_order"] for research tasks).
images: optional list of base64-encoded PNG/JPG strings. When provided,
backends without supports_images=true are skipped. vision_service backends
are only tried when images is provided.
Raises RuntimeError if all backends are exhausted.
"""
if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"):
raise RuntimeError(
"AI inference is disabled in the public demo. "
"Run your own instance to use AI features."
)
order = fallback_order if fallback_order is not None else self.config["fallback_order"]
for name in order:
backend = self.config["backends"][name]
def __init__(self, config_path: Path | None = None) -> None:
if config_path is not None:
# Explicit path supplied — use it directly (e.g. tests, CLI override).
super().__init__(config_path)
return
if not backend.get("enabled", True):
print(f"[LLMRouter] {name}: disabled, skipping")
continue
supports_images = backend.get("supports_images", False)
is_vision_service = backend["type"] == "vision_service"
# vision_service only used when images provided
if is_vision_service and not images:
print(f"[LLMRouter] {name}: vision_service skipped (no images)")
continue
# non-vision backends skipped when images provided and they don't support it
if images and not supports_images and not is_vision_service:
print(f"[LLMRouter] {name}: no image support, skipping")
continue
if is_vision_service:
if not self._is_reachable(backend["base_url"]):
print(f"[LLMRouter] {name}: unreachable, skipping")
continue
try:
resp = requests.post(
backend["base_url"].rstrip("/") + "/analyze",
json={
"prompt": prompt,
"image_base64": images[0] if images else "",
},
timeout=60,
)
resp.raise_for_status()
print(f"[LLMRouter] Used backend: {name} (vision_service)")
return resp.json()["text"]
except Exception as e:
print(f"[LLMRouter] {name}: error — {e}, trying next")
continue
elif backend["type"] == "openai_compat":
if not self._is_reachable(backend["base_url"]):
print(f"[LLMRouter] {name}: unreachable, skipping")
continue
try:
client = OpenAI(
base_url=backend["base_url"],
api_key=backend.get("api_key") or "any",
)
raw_model = model_override or backend["model"]
model = self._resolve_model(client, raw_model)
messages = []
if system:
messages.append({"role": "system", "content": system})
if images and supports_images:
content = [{"type": "text", "text": prompt}]
for img in images:
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{img}"},
})
messages.append({"role": "user", "content": content})
local = Path(__file__).parent.parent / "config" / "llm.yaml"
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
if local.exists():
super().__init__(local)
elif user_level.exists():
super().__init__(user_level)
else:
messages.append({"role": "user", "content": prompt})
create_kwargs: dict = {"model": model, "messages": messages}
if max_tokens is not None:
create_kwargs["max_tokens"] = max_tokens
resp = client.chat.completions.create(**create_kwargs)
print(f"[LLMRouter] Used backend: {name} ({model})")
return resp.choices[0].message.content
except Exception as e:
print(f"[LLMRouter] {name}: error — {e}, trying next")
continue
elif backend["type"] == "anthropic":
api_key = os.environ.get(backend["api_key_env"], "")
if not api_key:
print(f"[LLMRouter] {name}: {backend['api_key_env']} not set, skipping")
continue
try:
import anthropic as _anthropic
client = _anthropic.Anthropic(api_key=api_key)
if images and supports_images:
content = []
for img in images:
content.append({
"type": "image",
"source": {"type": "base64", "media_type": "image/png", "data": img},
})
content.append({"type": "text", "text": prompt})
else:
content = prompt
kwargs: dict = {
"model": backend["model"],
"max_tokens": 4096,
"messages": [{"role": "user", "content": content}],
}
if system:
kwargs["system"] = system
msg = client.messages.create(**kwargs)
print(f"[LLMRouter] Used backend: {name}")
return msg.content[0].text
except Exception as e:
print(f"[LLMRouter] {name}: error — {e}, trying next")
continue
raise RuntimeError("All LLM backends exhausted")
# No yaml found — let circuitforge-core's env-var auto-config run.
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
# won't exist either, so _auto_config_from_env() will be triggered.
super().__init__()
# Module-level singleton for convenience

View file

@ -47,7 +47,7 @@ OVERRIDE_YML = ROOT / "compose.override.yml"
_SERVICES: dict[str, tuple[str, int, str, bool, bool]] = {
"streamlit": ("streamlit_port", 8501, "STREAMLIT_PORT", True, False),
"searxng": ("searxng_port", 8888, "SEARXNG_PORT", True, True),
"vllm": ("vllm_port", 8000, "VLLM_PORT", True, True),
# vllm removed — now managed by cf-orch (host process), not a Docker service
"vision": ("vision_port", 8002, "VISION_PORT", True, True),
"ollama": ("ollama_port", 11434, "OLLAMA_PORT", True, True),
"ollama_research": ("ollama_research_port", 11435, "OLLAMA_RESEARCH_PORT", True, True),
@ -65,7 +65,6 @@ _LLM_BACKENDS: dict[str, list[tuple[str, str]]] = {
_DOCKER_INTERNAL: dict[str, tuple[str, int]] = {
"ollama": ("ollama", 11434),
"ollama_research": ("ollama_research", 11434), # container-internal port is always 11434
"vllm": ("vllm", 8000),
"vision": ("vision", 8002),
"searxng": ("searxng", 8080), # searxng internal port differs from host port
}
@ -493,6 +492,12 @@ def main() -> None:
# binds a harmless free port instead of conflicting with the external service.
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
env_updates["RECOMMENDED_PROFILE"] = profile
# When Ollama is adopted from the host process, write OLLAMA_HOST so
# LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
ollama_info = ports.get("ollama")
if ollama_info and ollama_info.get("external"):
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
if offload_gb > 0:
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
# GPU info for the app container (which lacks nvidia-smi access)

439
scripts/resume_optimizer.py Normal file
View file

@ -0,0 +1,439 @@
"""
ATS Resume Optimizer rewrite a candidate's resume to maximize keyword match
for a specific job description without fabricating experience.
Tier behaviour:
Free gap report only (extract_jd_signals + prioritize_gaps, no LLM rewrite)
Paid full LLM rewrite targeting the JD (rewrite_for_ats)
Premium same as paid for now; fine-tuned voice model is a future enhancement
Pipeline:
job.description
extract_jd_signals() # TF-IDF gaps + LLM-extracted ATS signals
prioritize_gaps() # rank by impact, map to resume sections
rewrite_for_ats() # per-section LLM rewrite (paid+)
hallucination_check() # reject rewrites that invent new experience
"""
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
from typing import Any
log = logging.getLogger(__name__)
# ── Signal extraction ─────────────────────────────────────────────────────────
def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
"""Return ATS keyword signals from a job description.
Combines two sources:
1. TF-IDF keyword gaps from match.py (fast, deterministic, no LLM cost)
2. LLM extraction for phrasing nuance TF-IDF misses (e.g. "cross-functional"
vs "cross-team", "led" vs "managed")
Falls back to TF-IDF-only if LLM is unavailable.
Args:
description: Raw job description text.
resume_text: Candidate's resume text (used to compute gap vs. already present).
Returns:
Deduplicated list of ATS keyword signals, most impactful first.
"""
# Phase 1: deterministic TF-IDF gaps (always available)
tfidf_gaps: list[str] = []
if resume_text:
try:
from scripts.match import match_score
_, tfidf_gaps = match_score(resume_text, description)
except Exception:
log.warning("[resume_optimizer] TF-IDF gap extraction failed", exc_info=True)
# Phase 2: LLM extraction for phrasing/qualifier nuance
llm_signals: list[str] = []
try:
from scripts.llm_router import LLMRouter
prompt = (
"Extract the most important ATS (applicant tracking system) keywords and "
"phrases from this job description. Focus on:\n"
"- Required skills and technologies (exact phrasing matters)\n"
"- Action verbs used to describe responsibilities\n"
"- Qualification signals ('required', 'must have', 'preferred')\n"
"- Industry-specific terminology\n\n"
"Return a JSON array of strings only. No explanation.\n\n"
f"Job description:\n{description[:3000]}"
)
raw = LLMRouter().complete(prompt)
# Extract JSON array from response (LLM may wrap it in markdown)
match = re.search(r"\[.*\]", raw, re.DOTALL)
if match:
llm_signals = json.loads(match.group(0))
llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
except Exception:
log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
# Merge: LLM signals first (richer phrasing), TF-IDF fills gaps
seen: set[str] = set()
merged: list[str] = []
for term in llm_signals + tfidf_gaps:
key = term.lower()
if key not in seen:
seen.add(key)
merged.append(term)
return merged
# ── Gap prioritization ────────────────────────────────────────────────────────
# Map each gap term to the resume section where it would have the most ATS impact.
# ATS systems weight keywords higher in certain sections:
# skills — direct keyword match, highest density, indexed first
# summary — executive summary keywords often boost overall relevance score
# experience — verbs + outcomes in bullet points; adds context weight
_SECTION_KEYWORDS: dict[str, list[str]] = {
"skills": [
"python", "sql", "java", "typescript", "react", "vue", "docker",
"kubernetes", "aws", "gcp", "azure", "terraform", "ci/cd", "git",
"postgresql", "redis", "kafka", "spark", "tableau", "salesforce",
"jira", "figma", "excel", "powerpoint", "machine learning", "llm",
"deep learning", "pytorch", "tensorflow", "scikit-learn",
],
"summary": [
"leadership", "strategy", "vision", "executive", "director", "vp",
"growth", "transformation", "stakeholder", "cross-functional",
"p&l", "revenue", "budget", "board", "c-suite",
],
}
def prioritize_gaps(gaps: list[str], resume_sections: dict[str, Any]) -> list[dict]:
"""Rank keyword gaps by ATS impact and map each to a target resume section.
Args:
gaps: List of missing keyword signals from extract_jd_signals().
resume_sections: Structured resume dict from resume_parser.parse_resume().
Returns:
List of dicts, sorted by priority score descending:
{
"term": str, # the keyword/phrase to inject
"section": str, # target resume section ("skills", "summary", "experience")
"priority": int, # 1=high, 2=medium, 3=low
"rationale": str, # why this section was chosen
}
TODO: implement the ranking logic below.
The current stub assigns every gap to "experience" at medium priority.
A good implementation should:
- Score "skills" section terms highest (direct keyword density)
- Score "summary" terms next (executive/leadership signals)
- Route remaining gaps to "experience" bullets
- Deprioritize terms already present in any section (case-insensitive)
- Consider gap term length: multi-word phrases > single words (more specific = higher ATS weight)
"""
existing_text = _flatten_resume_text(resume_sections).lower()
prioritized: list[dict] = []
for term in gaps:
# Skip terms already present anywhere in the resume
if term.lower() in existing_text:
continue
# REVIEW: _SECTION_KEYWORDS lists are tech-centric; domain-specific roles
# (creative, healthcare, operations) may over-route to experience.
# Consider expanding the lists or making them config-driven.
term_lower = term.lower()
# Partial-match: term contains a skills keyword (handles "PostgreSQL" vs "postgresql",
# "AWS Lambda" vs "aws", etc.)
skills_match = any(kw in term_lower or term_lower in kw
for kw in _SECTION_KEYWORDS["skills"])
summary_match = any(kw in term_lower or term_lower in kw
for kw in _SECTION_KEYWORDS["summary"])
if skills_match:
section = "skills"
priority = 1
rationale = "matched technical skills list — highest ATS keyword density"
elif summary_match:
section = "summary"
priority = 1
rationale = "matched leadership/executive signals — boosts overall relevance score"
elif len(term.split()) > 1:
section = "experience"
priority = 2
rationale = "multi-word phrase — more specific than single keywords, context weight in bullets"
else:
section = "experience"
priority = 3
rationale = "single generic term — lowest ATS impact, added to experience for coverage"
prioritized.append({
"term": term,
"section": section,
"priority": priority,
"rationale": rationale,
})
prioritized.sort(key=lambda x: x["priority"])
return prioritized
def _flatten_resume_text(resume: dict[str, Any]) -> str:
"""Concatenate all text from a structured resume dict into one searchable string."""
parts: list[str] = []
parts.append(resume.get("career_summary", "") or "")
parts.extend(resume.get("skills", []))
for exp in resume.get("experience", []):
parts.append(exp.get("title", ""))
parts.append(exp.get("company", ""))
parts.extend(exp.get("bullets", []))
for edu in resume.get("education", []):
parts.append(edu.get("degree", ""))
parts.append(edu.get("field", ""))
parts.append(edu.get("institution", ""))
parts.extend(resume.get("achievements", []))
return " ".join(parts)
# ── LLM rewrite ───────────────────────────────────────────────────────────────
def rewrite_for_ats(
resume: dict[str, Any],
prioritized_gaps: list[dict],
job: dict[str, Any],
candidate_voice: str = "",
) -> dict[str, Any]:
"""Rewrite resume sections to naturally incorporate ATS keyword gaps.
Operates section-by-section. For each target section in prioritized_gaps,
builds a focused prompt that injects only the gaps destined for that section.
The hallucination constraint is enforced in the prompt itself and verified
post-hoc by hallucination_check().
Args:
resume: Structured resume dict (from resume_parser.parse_resume).
prioritized_gaps: Output of prioritize_gaps().
job: Job dict with at minimum {"title": str, "company": str, "description": str}.
candidate_voice: Free-text personality/style note from user.yaml (may be empty).
Returns:
New resume dict (same structure as input) with rewritten sections.
Sections with no relevant gaps are copied through unchanged.
"""
from scripts.llm_router import LLMRouter
router = LLMRouter()
# Group gaps by target section
by_section: dict[str, list[str]] = {}
for gap in prioritized_gaps:
by_section.setdefault(gap["section"], []).append(gap["term"])
rewritten = dict(resume) # shallow copy — sections replaced below
for section, terms in by_section.items():
terms_str = ", ".join(f'"{t}"' for t in terms)
original_content = _section_text_for_prompt(resume, section)
voice_note = (
f'\n\nCandidate voice/style: "{candidate_voice}". '
"Preserve this authentic tone — do not write generically."
) if candidate_voice else ""
prompt = (
f"You are rewriting the **{section}** section of a resume to help it pass "
f"ATS (applicant tracking system) screening for this role:\n"
f" Job title: {job.get('title', 'Unknown')}\n"
f" Company: {job.get('company', 'Unknown')}\n\n"
f"Inject these missing ATS keywords naturally into the section:\n"
f" {terms_str}\n\n"
f"CRITICAL RULES — violating any of these invalidates the rewrite:\n"
f"1. Do NOT invent new employers, job titles, dates, or education.\n"
f"2. Do NOT add skills the candidate did not already demonstrate.\n"
f"3. Only rephrase existing content — replace vague verbs/nouns with the "
f" ATS-preferred equivalents listed above.\n"
f"4. Keep the same number of bullet points in experience entries.\n"
f"5. Return ONLY the rewritten section content, no labels or explanation."
f"{voice_note}\n\n"
f"Original {section} section:\n{original_content}"
)
try:
result = router.complete(prompt)
rewritten = _apply_section_rewrite(rewritten, section, result.strip())
except Exception:
log.warning("[resume_optimizer] rewrite failed for section %r", section, exc_info=True)
# Leave section unchanged on failure
return rewritten
def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
"""Render a resume section as plain text suitable for an LLM prompt."""
if section == "summary":
return resume.get("career_summary", "") or "(empty)"
if section == "skills":
skills = resume.get("skills", [])
return ", ".join(skills) if skills else "(empty)"
if section == "experience":
lines: list[str] = []
for exp in resume.get("experience", []):
lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}{exp['end_date']})")
for b in exp.get("bullets", []):
lines.append(f"{b}")
return "\n".join(lines) if lines else "(empty)"
return "(unsupported section)"
def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) -> dict[str, Any]:
"""Return a new resume dict with the given section replaced by rewritten text."""
updated = dict(resume)
if section == "summary":
updated["career_summary"] = rewritten
elif section == "skills":
# LLM returns comma-separated or newline-separated skills
skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
updated["skills"] = skills
elif section == "experience":
# For experience, we keep the structured entries but replace the bullets.
# The LLM rewrites the whole section as plain text; we re-parse the bullets.
updated["experience"] = _reparse_experience_bullets(resume["experience"], rewritten)
return updated
def _reparse_experience_bullets(
original_entries: list[dict],
rewritten_text: str,
) -> list[dict]:
"""Re-associate rewritten bullet text with the original experience entries.
The LLM rewrites the section as a block of text. We split on the original
entry headers (title + company) to re-bind bullets to entries. Falls back
to the original entries if splitting fails.
"""
if not original_entries:
return original_entries
result: list[dict] = []
remaining = rewritten_text
for i, entry in enumerate(original_entries):
# Find where the next entry starts so we can slice out this entry's bullets
if i + 1 < len(original_entries):
next_title = original_entries[i + 1]["title"]
# Look for the next entry header in the remaining text
split_pat = re.escape(next_title)
m = re.search(split_pat, remaining, re.IGNORECASE)
chunk = remaining[:m.start()] if m else remaining
remaining = remaining[m.start():] if m else ""
else:
chunk = remaining
bullets = [
re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
for line in chunk.splitlines()
if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
]
new_entry = dict(entry)
new_entry["bullets"] = bullets if bullets else entry["bullets"]
result.append(new_entry)
return result
# ── Hallucination guard ───────────────────────────────────────────────────────
def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
"""Return True if the rewrite is safe (no fabricated facts detected).
Checks that the set of employers, job titles, and date ranges in the
rewritten resume is a subset of those in the original. Any new entry
signals hallucination.
Args:
original: Structured resume dict before rewrite.
rewritten: Structured resume dict after rewrite.
Returns:
True rewrite is safe to use
False hallucination detected; caller should fall back to original
"""
orig_anchors = _extract_anchors(original)
rewrite_anchors = _extract_anchors(rewritten)
new_anchors = rewrite_anchors - orig_anchors
if new_anchors:
log.warning(
"[resume_optimizer] hallucination_check FAILED — new anchors in rewrite: %s",
new_anchors,
)
return False
return True
def _extract_anchors(resume: dict[str, Any]) -> frozenset[str]:
"""Extract stable factual anchors (company, title, dates) from experience entries."""
anchors: set[str] = set()
for exp in resume.get("experience", []):
for field in ("company", "title", "start_date", "end_date"):
val = (exp.get(field) or "").strip().lower()
if val:
anchors.add(val)
for edu in resume.get("education", []):
val = (edu.get("institution") or "").strip().lower()
if val:
anchors.add(val)
return frozenset(anchors)
# ── Resume → plain text renderer ─────────────────────────────────────────────
def render_resume_text(resume: dict[str, Any]) -> str:
"""Render a structured resume dict back to formatted plain text for PDF export."""
lines: list[str] = []
contact_parts = [resume.get("name", ""), resume.get("email", ""), resume.get("phone", "")]
lines.append(" ".join(p for p in contact_parts if p))
lines.append("")
if resume.get("career_summary"):
lines.append("SUMMARY")
lines.append(resume["career_summary"])
lines.append("")
if resume.get("experience"):
lines.append("EXPERIENCE")
for exp in resume["experience"]:
lines.append(
f"{exp.get('title', '')} | {exp.get('company', '')} "
f"({exp.get('start_date', '')}{exp.get('end_date', '')})"
)
for b in exp.get("bullets", []):
lines.append(f"{b}")
lines.append("")
if resume.get("education"):
lines.append("EDUCATION")
for edu in resume["education"]:
lines.append(
f"{edu.get('degree', '')} {edu.get('field', '')} | "
f"{edu.get('institution', '')} {edu.get('graduation_year', '')}"
)
lines.append("")
if resume.get("skills"):
lines.append("SKILLS")
lines.append(", ".join(resume["skills"]))
lines.append("")
if resume.get("achievements"):
lines.append("ACHIEVEMENTS")
for a in resume["achievements"]:
lines.append(f"{a}")
lines.append("")
return "\n".join(lines)

View file

@ -9,10 +9,13 @@ and marks the task completed or failed.
Deduplication: only one queued/running task per (task_type, job_id) is allowed.
Different task types for the same job run concurrently (e.g. cover letter + research).
"""
import logging
import sqlite3
import threading
from pathlib import Path
log = logging.getLogger(__name__)
from scripts.db import (
DEFAULT_DB,
insert_task,
@ -20,6 +23,7 @@ from scripts.db import (
update_task_stage,
update_cover_letter,
save_research,
save_optimized_resume,
)
@ -39,9 +43,13 @@ def submit_task(db_path: Path = DEFAULT_DB, task_type: str = "",
if is_new:
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
if task_type in LLM_TASK_TYPES:
get_scheduler(db_path, run_task_fn=_run_task).enqueue(
enqueued = get_scheduler(db_path, run_task_fn=_run_task).enqueue(
task_id, task_type, job_id or 0, params
)
if not enqueued:
update_task_status(
db_path, task_id, "failed", error="Queue depth limit reached"
)
else:
t = threading.Thread(
target=_run_task,
@ -150,8 +158,16 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
try:
if task_type == "discovery":
import os as _os
if _os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"):
update_task_status(
db_path, task_id, "failed",
error="Discovery is disabled in the public demo. Run your own instance to use this feature.",
)
return
from scripts.discover import run_discovery
new_count = run_discovery(db_path)
from pathlib import Path as _Path
new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config")
n = new_count or 0
update_task_status(
db_path, task_id, "completed",
@ -163,6 +179,9 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
import json as _json
p = _json.loads(params or "{}")
from scripts.generate_cover_letter import generate
_cfg_dir = Path(db_path).parent / "config"
_user_llm_cfg = _cfg_dir / "llm.yaml"
_user_yaml = _cfg_dir / "user.yaml"
result = generate(
job.get("title", ""),
job.get("company", ""),
@ -170,6 +189,8 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
previous_result=p.get("previous_result", ""),
feedback=p.get("feedback", ""),
is_jobgether=job.get("source") == "jobgether",
config_path=_user_llm_cfg,
user_yaml_path=_user_yaml,
)
update_cover_letter(db_path, job_id, result)
@ -254,6 +275,48 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
)
return
elif task_type == "resume_optimize":
import json as _json
from scripts.resume_parser import structure_resume
from scripts.resume_optimizer import (
extract_jd_signals,
prioritize_gaps,
rewrite_for_ats,
hallucination_check,
render_resume_text,
)
from scripts.user_profile import load_user_profile
description = job.get("description", "")
resume_path = load_user_profile().get("resume_path", "")
# Parse the candidate's resume
update_task_stage(db_path, task_id, "parsing resume")
resume_text = Path(resume_path).read_text(errors="replace") if resume_path else ""
resume_struct, parse_err = structure_resume(resume_text)
# Extract keyword gaps and build gap report (free tier)
update_task_stage(db_path, task_id, "extracting keyword gaps")
gaps = extract_jd_signals(description, resume_text)
prioritized = prioritize_gaps(gaps, resume_struct)
gap_report = _json.dumps(prioritized, indent=2)
# Full rewrite (paid tier only)
rewritten_text = ""
p = _json.loads(params or "{}")
if p.get("full_rewrite", False):
update_task_stage(db_path, task_id, "rewriting resume sections")
candidate_voice = load_user_profile().get("candidate_voice", "")
rewritten = rewrite_for_ats(resume_struct, prioritized, job, candidate_voice)
if hallucination_check(resume_struct, rewritten):
rewritten_text = render_resume_text(rewritten)
else:
log.warning("[task_runner] resume_optimize hallucination check failed for job %d", job_id)
save_optimized_resume(db_path, job_id=job_id,
text=rewritten_text,
gap_report=gap_report)
elif task_type == "prepare_training":
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
records = build_records()

View file

@ -1,34 +1,39 @@
# scripts/task_scheduler.py
"""Resource-aware batch scheduler for LLM background tasks.
"""Peregrine LLM task scheduler — thin shim over circuitforge_core.tasks.scheduler.
Routes LLM task types through per-type deques with VRAM-aware scheduling.
Non-LLM tasks bypass this module routing lives in scripts/task_runner.py.
All scheduling logic lives in circuitforge_core. This module defines
Peregrine-specific task types, VRAM budgets, and config loading.
Public API:
LLM_TASK_TYPES set of task type strings routed through the scheduler
get_scheduler() lazy singleton accessor
Public API (unchanged callers do not need to change):
LLM_TASK_TYPES frozenset of task type strings routed through the scheduler
DEFAULT_VRAM_BUDGETS dict of conservative peak VRAM estimates per task type
TaskSpec lightweight task descriptor (re-exported from core)
TaskScheduler backward-compatible wrapper around the core scheduler class
get_scheduler() returns the process-level TaskScheduler singleton
reset_scheduler() test teardown only
"""
from __future__ import annotations
import logging
import sqlite3
import os
import threading
from collections import deque, namedtuple
from pathlib import Path
from typing import Callable, Optional
# Module-level import so tests can monkeypatch scripts.task_scheduler._get_gpus
try:
from scripts.preflight import get_gpus as _get_gpus
except Exception: # graceful degradation if preflight unavailable
_get_gpus = lambda: []
from circuitforge_core.tasks.scheduler import (
TaskSpec, # re-export unchanged
LocalScheduler as _CoreTaskScheduler,
)
logger = logging.getLogger(__name__)
# Task types that go through the scheduler (all others spawn free threads)
# ── Peregrine task types and VRAM budgets ─────────────────────────────────────
LLM_TASK_TYPES: frozenset[str] = frozenset({
"cover_letter",
"company_research",
"wizard_generate",
"resume_optimize",
})
# Conservative peak VRAM estimates (GB) per task type.
@ -37,196 +42,126 @@ DEFAULT_VRAM_BUDGETS: dict[str, float] = {
"cover_letter": 2.5, # alex-cover-writer:latest (~2 GB GGUF + headroom)
"company_research": 5.0, # llama3.1:8b or vllm model
"wizard_generate": 2.5, # same model family as cover_letter
"resume_optimize": 5.0, # section-by-section rewrite; same budget as research
}
# Lightweight task descriptor stored in per-type deques
TaskSpec = namedtuple("TaskSpec", ["id", "job_id", "params"])
_DEFAULT_MAX_QUEUE_DEPTH = 500
class TaskScheduler:
"""Resource-aware LLM task batch scheduler. Use get_scheduler() — not direct construction."""
def __init__(self, db_path: Path, run_task_fn: Callable) -> None:
self._db_path = db_path
self._run_task = run_task_fn
self._lock = threading.Lock()
self._wake = threading.Event()
self._stop = threading.Event()
self._queues: dict[str, deque] = {}
self._active: dict[str, threading.Thread] = {}
self._reserved_vram: float = 0.0
self._thread: Optional[threading.Thread] = None
# Load VRAM budgets: defaults + optional config overrides
self._budgets: dict[str, float] = dict(DEFAULT_VRAM_BUDGETS)
def _load_config_overrides(db_path: Path) -> tuple[dict[str, float], int]:
"""Load VRAM budget overrides and max_queue_depth from config/llm.yaml."""
budgets = dict(DEFAULT_VRAM_BUDGETS)
max_depth = _DEFAULT_MAX_QUEUE_DEPTH
config_path = db_path.parent.parent / "config" / "llm.yaml"
self._max_queue_depth: int = 500
if config_path.exists():
try:
import yaml
with open(config_path) as f:
cfg = yaml.safe_load(f) or {}
sched_cfg = cfg.get("scheduler", {})
self._budgets.update(sched_cfg.get("vram_budgets", {}))
self._max_queue_depth = sched_cfg.get("max_queue_depth", 500)
budgets.update(sched_cfg.get("vram_budgets", {}))
max_depth = int(sched_cfg.get("max_queue_depth", max_depth))
except Exception as exc:
logger.warning("Failed to load scheduler config from %s: %s", config_path, exc)
logger.warning(
"Failed to load scheduler config from %s: %s", config_path, exc
)
return budgets, max_depth
# Warn on LLM types with no budget entry after merge
# Module-level stub so tests can monkeypatch scripts.task_scheduler._get_gpus
# (existing tests monkeypatch this symbol — keep it here for backward compat).
try:
from scripts.preflight import get_gpus as _get_gpus
except Exception:
_get_gpus = lambda: [] # noqa: E731
class TaskScheduler(_CoreTaskScheduler):
"""Peregrine-specific TaskScheduler.
Extends circuitforge_core.tasks.scheduler.TaskScheduler with:
- Peregrine default VRAM budgets and task types wired into __init__
- Config loading from config/llm.yaml
- Backward-compatible two-argument __init__ signature (db_path, run_task_fn)
- _get_gpus monkeypatch support (existing tests patch this module-level symbol)
- Backward-compatible enqueue() that marks dropped tasks failed in the DB
and logs under the scripts.task_scheduler logger
Direct construction is still supported for tests; production code should
use get_scheduler() instead.
"""
def __init__(self, db_path: Path, run_task_fn: Callable) -> None:
budgets, max_depth = _load_config_overrides(db_path)
# Warn under this module's logger for any task types with no VRAM budget
# (mirrors the core warning but captures under scripts.task_scheduler
# so existing tests using caplog.at_level(logger="scripts.task_scheduler") pass)
for t in LLM_TASK_TYPES:
if t not in self._budgets:
if t not in budgets:
logger.warning(
"No VRAM budget defined for LLM task type %r"
"defaulting to 0.0 GB (unlimited concurrency for this type)", t
)
# Detect total GPU VRAM; fall back to unlimited (999) on CPU-only systems.
# Uses module-level _get_gpus so tests can monkeypatch scripts.task_scheduler._get_gpus.
try:
gpus = _get_gpus()
self._available_vram: float = (
sum(g["vram_total_gb"] for g in gpus) if gpus else 999.0
super().__init__(
db_path=db_path,
run_task_fn=run_task_fn,
task_types=LLM_TASK_TYPES,
vram_budgets=budgets,
max_queue_depth=max_depth,
)
except Exception:
self._available_vram = 999.0
# Durability: reload surviving 'queued' LLM tasks from prior run
self._load_queued_tasks()
def enqueue(self, task_id: int, task_type: str, job_id: int,
params: Optional[str]) -> None:
def enqueue(
self,
task_id: int,
task_type: str,
job_id: int,
params: Optional[str],
) -> bool:
"""Add an LLM task to the scheduler queue.
If the queue for this type is at max_queue_depth, the task is marked
failed in SQLite immediately (no ghost queued rows) and a warning is logged.
"""
from scripts.db import update_task_status
When the queue is full, marks the task failed in SQLite immediately
(backward-compatible with the original Peregrine behavior) and logs a
warning under the scripts.task_scheduler logger.
with self._lock:
q = self._queues.setdefault(task_type, deque())
if len(q) >= self._max_queue_depth:
Returns True if enqueued, False if the queue was full.
"""
enqueued = super().enqueue(task_id, task_type, job_id, params)
if not enqueued:
# Log under this module's logger so existing caplog tests pass
logger.warning(
"Queue depth limit reached for %s (max=%d) — task %d dropped",
task_type, self._max_queue_depth, task_id,
)
update_task_status(self._db_path, task_id, "failed",
error="Queue depth limit reached")
return
q.append(TaskSpec(task_id, job_id, params))
self._wake.set()
def start(self) -> None:
"""Start the background scheduler loop thread. Call once after construction."""
self._thread = threading.Thread(
target=self._scheduler_loop, name="task-scheduler", daemon=True
from scripts.db import update_task_status
update_task_status(
self._db_path, task_id, "failed", error="Queue depth limit reached"
)
self._thread.start()
def shutdown(self, timeout: float = 5.0) -> None:
"""Signal the scheduler to stop and wait for it to exit."""
self._stop.set()
self._wake.set() # unblock any wait()
if self._thread and self._thread.is_alive():
self._thread.join(timeout=timeout)
def _scheduler_loop(self) -> None:
"""Main scheduler daemon — wakes on enqueue or batch completion."""
while not self._stop.is_set():
self._wake.wait(timeout=30)
self._wake.clear()
with self._lock:
# Defense in depth: reap externally-killed batch threads.
# In normal operation _active.pop() runs in finally before _wake fires,
# so this reap finds nothing — no double-decrement risk.
for t, thread in list(self._active.items()):
if not thread.is_alive():
self._reserved_vram -= self._budgets.get(t, 0.0)
del self._active[t]
# Start new type batches while VRAM allows
candidates = sorted(
[t for t in self._queues if self._queues[t] and t not in self._active],
key=lambda t: len(self._queues[t]),
reverse=True,
)
for task_type in candidates:
budget = self._budgets.get(task_type, 0.0)
# Always allow at least one batch to run even if its budget
# exceeds _available_vram (prevents permanent starvation when
# a single type's budget is larger than the VRAM ceiling).
if self._reserved_vram == 0.0 or self._reserved_vram + budget <= self._available_vram:
thread = threading.Thread(
target=self._batch_worker,
args=(task_type,),
name=f"batch-{task_type}",
daemon=True,
)
self._active[task_type] = thread
self._reserved_vram += budget
thread.start()
def _batch_worker(self, task_type: str) -> None:
"""Serial consumer for one task type. Runs until the type's deque is empty."""
try:
while True:
with self._lock:
q = self._queues.get(task_type)
if not q:
break
task = q.popleft()
# _run_task is scripts.task_runner._run_task (passed at construction)
self._run_task(
self._db_path, task.id, task_type, task.job_id, task.params
)
finally:
# Always release — even if _run_task raises.
# _active.pop here prevents the scheduler loop reap from double-decrementing.
with self._lock:
self._active.pop(task_type, None)
self._reserved_vram -= self._budgets.get(task_type, 0.0)
self._wake.set()
def _load_queued_tasks(self) -> None:
"""Load pre-existing queued LLM tasks from SQLite into deques (called once in __init__)."""
llm_types = sorted(LLM_TASK_TYPES) # sorted for deterministic SQL params in logs
placeholders = ",".join("?" * len(llm_types))
conn = sqlite3.connect(self._db_path)
rows = conn.execute(
f"SELECT id, task_type, job_id, params FROM background_tasks"
f" WHERE status='queued' AND task_type IN ({placeholders})"
f" ORDER BY created_at ASC",
llm_types,
).fetchall()
conn.close()
for row_id, task_type, job_id, params in rows:
q = self._queues.setdefault(task_type, deque())
q.append(TaskSpec(row_id, job_id, params))
if rows:
logger.info("Scheduler: resumed %d queued task(s) from prior run", len(rows))
return enqueued
# ── Singleton ─────────────────────────────────────────────────────────────────
# ── Peregrine-local singleton ──────────────────────────────────────────────────
# We manage our own singleton (not the core one) so the process-level instance
# is always a Peregrine TaskScheduler (with the enqueue() override).
_scheduler: Optional[TaskScheduler] = None
_scheduler_lock = threading.Lock()
def get_scheduler(db_path: Path, run_task_fn: Callable = None) -> TaskScheduler:
"""Return the process-level TaskScheduler singleton, constructing it if needed.
def get_scheduler(
db_path: Path,
run_task_fn: Optional[Callable] = None,
) -> TaskScheduler:
"""Return the process-level Peregrine TaskScheduler singleton.
run_task_fn is required on the first call; ignored on subsequent calls.
Safety: inner lock + double-check prevents double-construction under races.
The outer None check is a fast-path performance optimisation only.
run_task_fn is required on the first call; ignored on subsequent calls
(double-checked locking singleton already constructed).
"""
global _scheduler
if _scheduler is None: # fast path — avoids lock on steady state
if _scheduler is None: # fast path — no lock on steady state
with _scheduler_lock:
if _scheduler is None: # re-check under lock (double-checked locking)
if _scheduler is None: # re-check under lock
if run_task_fn is None:
raise ValueError("run_task_fn required on first get_scheduler() call")
_scheduler = TaskScheduler(db_path, run_task_fn)

View file

@ -7,6 +7,8 @@ here so port/host/SSL changes propagate everywhere automatically.
"""
from __future__ import annotations
from pathlib import Path
import os
import tempfile
import yaml
_DEFAULTS = {
@ -29,6 +31,7 @@ _DEFAULTS = {
"wizard_complete": False,
"wizard_step": 0,
"dismissed_banners": [],
"ui_preference": "streamlit",
"services": {
"streamlit_port": 8501,
"ollama_host": "localhost",
@ -76,7 +79,37 @@ class UserProfile:
self.wizard_complete: bool = bool(data.get("wizard_complete", False))
self.wizard_step: int = int(data.get("wizard_step", 0))
self.dismissed_banners: list[str] = list(data.get("dismissed_banners", []))
raw_pref = data.get("ui_preference", "streamlit")
self.ui_preference: str = raw_pref if raw_pref in ("streamlit", "vue") else "streamlit"
self._svc = data["services"]
self._path = path
def save(self) -> None:
"""Save all profile fields back to user.yaml."""
output = {
"name": self.name,
"email": self.email,
"phone": self.phone,
"linkedin": self.linkedin,
"career_summary": self.career_summary,
"candidate_voice": self.candidate_voice,
"nda_companies": self.nda_companies,
"docs_dir": str(self.docs_dir),
"ollama_models_dir": str(self.ollama_models_dir),
"vllm_models_dir": str(self.vllm_models_dir),
"inference_profile": self.inference_profile,
"mission_preferences": self.mission_preferences,
"candidate_accessibility_focus": self.candidate_accessibility_focus,
"candidate_lgbtq_focus": self.candidate_lgbtq_focus,
"tier": self.tier,
"dev_tier_override": self.dev_tier_override,
"wizard_complete": self.wizard_complete,
"wizard_step": self.wizard_step,
"dismissed_banners": self.dismissed_banners,
"ui_preference": self.ui_preference,
"services": self._svc,
}
self._path.write_text(yaml.dump(output, default_flow_style=False))
# ── Service URLs ──────────────────────────────────────────────────────────
def _url(self, host: str, port: int, ssl: bool) -> str:
@ -130,3 +163,30 @@ class UserProfile:
"ollama_research": f"{self.ollama_url}/v1",
"vllm": f"{self.vllm_url}/v1",
}
# ── Free functions for plain-dict access (used by dev-api.py) ─────────────────
def load_user_profile(config_path: str) -> dict:
"""Load user.yaml and return as a plain dict with safe defaults."""
path = Path(config_path)
if not path.exists():
return {}
with open(path) as f:
data = yaml.safe_load(f) or {}
return data
def save_user_profile(config_path: str, data: dict) -> None:
"""Atomically write the user profile dict to user.yaml."""
path = Path(config_path)
path.parent.mkdir(parents=True, exist_ok=True)
# Write to temp file then rename for atomicity
fd, tmp = tempfile.mkstemp(dir=path.parent, suffix='.yaml.tmp')
try:
with os.fdopen(fd, 'w') as f:
yaml.dump(data, f, allow_unicode=True, default_flow_style=False)
os.replace(tmp, path)
except Exception:
os.unlink(tmp)
raise

0
tests/e2e/__init__.py Normal file
View file

180
tests/e2e/conftest.py Normal file
View file

@ -0,0 +1,180 @@
"""
Peregrine E2E test harness shared fixtures and Streamlit helpers.
Run with: pytest tests/e2e/ --mode=demo|cloud|local|all
"""
from __future__ import annotations
import os
import logging
from pathlib import Path
import pytest
from dotenv import load_dotenv
from playwright.sync_api import Page, BrowserContext
from tests.e2e.models import ErrorRecord, ModeConfig, diff_errors
from tests.e2e.modes.demo import DEMO
from tests.e2e.modes.cloud import CLOUD
from tests.e2e.modes.local import LOCAL
load_dotenv(".env.e2e")
log = logging.getLogger(__name__)
_ALL_MODES = {"demo": DEMO, "cloud": CLOUD, "local": LOCAL}
_CONSOLE_NOISE = [
"WebSocket connection",
"WebSocket is closed",
"_stcore/stream",
"favicon.ico",
]
def pytest_addoption(parser):
parser.addoption(
"--mode",
action="store",
default="demo",
choices=["demo", "cloud", "local", "all"],
help="Which Peregrine instance(s) to test against",
)
def pytest_configure(config):
config.addinivalue_line("markers", "e2e: mark test as E2E (requires running Peregrine instance)")
def pytest_collection_modifyitems(config, items):
"""Skip E2E tests if --mode not explicitly passed (belt-and-suspenders isolation)."""
# Only skip if we're collecting from tests/e2e/ without explicit --mode
e2e_items = [i for i in items if "tests/e2e/" in str(i.fspath)]
if e2e_items and not any("--mode" in arg for arg in config.invocation_params.args):
skip = pytest.mark.skip(reason="E2E tests require explicit --mode flag")
for item in e2e_items:
item.add_marker(skip)
@pytest.fixture(scope="session")
def active_modes(pytestconfig) -> list[ModeConfig]:
mode_arg = pytestconfig.getoption("--mode")
if mode_arg == "all":
return list(_ALL_MODES.values())
return [_ALL_MODES[mode_arg]]
@pytest.fixture(scope="session", autouse=True)
def assert_instances_reachable(active_modes):
"""Fail fast with a clear message if any target instance is not running."""
import socket
from urllib.parse import urlparse
for mode in active_modes:
parsed = urlparse(mode.base_url)
host, port = parsed.hostname, parsed.port or 80
try:
with socket.create_connection((host, port), timeout=3):
pass
except OSError:
pytest.exit(
f"[{mode.name}] Instance not reachable at {mode.base_url}"
"start the instance before running E2E tests.",
returncode=1,
)
@pytest.fixture(scope="session")
def mode_contexts(active_modes, playwright) -> dict[str, BrowserContext]:
"""One browser context per active mode, with auth injected via route handler."""
from tests.e2e.modes.cloud import _get_jwt
headless = os.environ.get("E2E_HEADLESS", "true").lower() != "false"
slow_mo = int(os.environ.get("E2E_SLOW_MO", "0"))
browser = playwright.chromium.launch(headless=headless, slow_mo=slow_mo)
contexts = {}
for mode in active_modes:
ctx = browser.new_context(viewport={"width": 1280, "height": 900})
if mode.name == "cloud":
# Cookies are sent on WebSocket upgrade requests; set_extra_http_headers
# and ctx.route() are both HTTP-only and miss st.context.headers.
# cloud_session.py falls back to the Cookie header when X-CF-Session
# is absent (direct access without Caddy).
jwt = _get_jwt()
ctx.add_cookies([{
"name": "cf_session",
"value": jwt,
"domain": "localhost",
"path": "/",
}])
else:
mode.auth_setup(ctx)
contexts[mode.name] = ctx
yield contexts
browser.close()
def wait_for_streamlit(page: Page, timeout: int = 10_000) -> None:
"""
Wait until Streamlit has finished rendering.
Uses 2000ms idle window NOT networkidle (Playwright's networkidle uses
500ms which is too short for Peregrine's 3s sidebar fragment poller).
"""
try:
page.wait_for_selector('[data-testid="stSpinner"]', state="hidden", timeout=timeout)
except Exception:
pass
try:
page.wait_for_function(
"() => !document.querySelector('[data-testid=\"stStatusWidget\"]')"
"?.textContent?.includes('running')",
timeout=5_000,
)
except Exception:
pass
page.wait_for_timeout(2_000)
def get_page_errors(page) -> list[ErrorRecord]:
"""Scan DOM for Streamlit error indicators."""
errors: list[ErrorRecord] = []
for el in page.query_selector_all('[data-testid="stException"]'):
# text_content() includes text from CSS-hidden elements (e.g. collapsed expanders)
msg = (el.text_content() or "").strip()[:500]
errors.append(ErrorRecord(
type="exception",
message=msg,
element_html=el.inner_html()[:1000],
))
for el in page.query_selector_all('[data-testid="stAlert"]'):
# Streamlit 1.35+: st.error() renders child [data-testid="stAlertContentError"]
# kind is a React prop — NOT a DOM attribute. Child detection is authoritative.
if el.query_selector('[data-testid="stAlertContentError"]'):
msg = (el.text_content() or "").strip()[:500]
errors.append(ErrorRecord(
type="alert",
message=msg,
element_html=el.inner_html()[:1000],
))
return errors
def get_console_errors(messages) -> list[str]:
"""Filter browser console messages to real errors, excluding Streamlit noise."""
result = []
for msg in messages:
if msg.type != "error":
continue
text = msg.text
if any(noise in text for noise in _CONSOLE_NOISE):
continue
result.append(text)
return result
def screenshot_on_fail(page: Page, mode_name: str, test_name: str) -> Path:
results_dir = Path(f"tests/e2e/results/{mode_name}/screenshots")
results_dir.mkdir(parents=True, exist_ok=True)
path = results_dir / f"{test_name}.png"
page.screenshot(path=str(path), full_page=True)
return path

41
tests/e2e/models.py Normal file
View file

@ -0,0 +1,41 @@
"""Shared data models for the Peregrine E2E test harness."""
from __future__ import annotations
import fnmatch
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Any
@dataclass(frozen=True)
class ErrorRecord:
type: str # "exception" | "alert"
message: str
element_html: str
def __eq__(self, other: object) -> bool:
if not isinstance(other, ErrorRecord):
return NotImplemented
return (self.type, self.message) == (other.type, other.message)
def __hash__(self) -> int:
return hash((self.type, self.message))
def diff_errors(before: list[ErrorRecord], after: list[ErrorRecord]) -> list[ErrorRecord]:
"""Return errors in `after` that were not present in `before`."""
before_set = set(before)
return [e for e in after if e not in before_set]
@dataclass
class ModeConfig:
name: str
base_url: str
auth_setup: Callable[[Any], None]
expected_failures: list[str] # fnmatch glob patterns against element labels
results_dir: Path | None
settings_tabs: list[str] # tabs expected per mode
def matches_expected_failure(self, label: str) -> bool:
"""Return True if label matches any expected_failure pattern (fnmatch)."""
return any(fnmatch.fnmatch(label, pattern) for pattern in self.expected_failures)

View file

76
tests/e2e/modes/cloud.py Normal file
View file

@ -0,0 +1,76 @@
"""Cloud mode config — port 8505, CLOUD_MODE=true, Directus JWT auth."""
from __future__ import annotations
import os
import time
import logging
from pathlib import Path
from typing import Any
import requests
from dotenv import load_dotenv
from tests.e2e.models import ModeConfig
load_dotenv(".env.e2e")
log = logging.getLogger(__name__)
_BASE_SETTINGS_TABS = [
"👤 My Profile", "📝 Resume Profile", "🔎 Search",
"⚙️ System", "🎯 Fine-Tune", "🔑 License", "💾 Data", "🔒 Privacy",
]
_token_cache: dict[str, Any] = {"token": None, "expires_at": 0.0}
def _get_jwt() -> str:
"""
Acquire a Directus JWT for the e2e test user.
Strategy A: user/pass login (preferred).
Strategy B: persistent JWT from E2E_DIRECTUS_JWT env var.
Caches the token and refreshes 100s before expiry.
"""
if not os.environ.get("E2E_DIRECTUS_EMAIL"):
jwt = os.environ.get("E2E_DIRECTUS_JWT", "")
if not jwt:
raise RuntimeError(
"Cloud mode requires E2E_DIRECTUS_EMAIL+PASSWORD or E2E_DIRECTUS_JWT in .env.e2e"
)
return jwt
if _token_cache["token"] and time.time() < _token_cache["expires_at"] - 100:
return _token_cache["token"]
directus_url = os.environ.get("E2E_DIRECTUS_URL", "http://172.31.0.2:8055")
resp = requests.post(
f"{directus_url}/auth/login",
json={
"email": os.environ["E2E_DIRECTUS_EMAIL"],
"password": os.environ["E2E_DIRECTUS_PASSWORD"],
},
timeout=10,
)
resp.raise_for_status()
data = resp.json()["data"]
token = data["access_token"]
expires_in_ms = data.get("expires", 900_000)
_token_cache["token"] = token
_token_cache["expires_at"] = time.time() + (expires_in_ms / 1000)
log.info("Acquired Directus JWT (expires in %ds)", expires_in_ms // 1000)
return token
def _cloud_auth_setup(context: Any) -> None:
"""Placeholder — actual JWT injection done via context.route() in conftest."""
pass # Route-based injection set up in conftest.py mode_contexts fixture
CLOUD = ModeConfig(
name="cloud",
base_url="http://localhost:8505/peregrine",
auth_setup=_cloud_auth_setup,
expected_failures=[],
results_dir=Path("tests/e2e/results/cloud"),
settings_tabs=_BASE_SETTINGS_TABS,
)

25
tests/e2e/modes/demo.py Normal file
View file

@ -0,0 +1,25 @@
"""Demo mode config — port 8504, DEMO_MODE=true, LLM/scraping neutered."""
from pathlib import Path
from tests.e2e.models import ModeConfig
_BASE_SETTINGS_TABS = [
"👤 My Profile", "📝 Resume Profile", "🔎 Search",
"⚙️ System", "🎯 Fine-Tune", "🔑 License", "💾 Data",
]
DEMO = ModeConfig(
name="demo",
base_url="http://localhost:8504/peregrine",
auth_setup=lambda ctx: None,
expected_failures=[
"Fetch*",
"Generate Cover Letter*",
"Generate*",
"Analyze Screenshot*",
"Push to Calendar*",
"Sync Email*",
"Start Email Sync*",
],
results_dir=Path("tests/e2e/results/demo"),
settings_tabs=_BASE_SETTINGS_TABS,
)

17
tests/e2e/modes/local.py Normal file
View file

@ -0,0 +1,17 @@
"""Local mode config — port 8502, full features, no auth."""
from pathlib import Path
from tests.e2e.models import ModeConfig
_BASE_SETTINGS_TABS = [
"👤 My Profile", "📝 Resume Profile", "🔎 Search",
"⚙️ System", "🎯 Fine-Tune", "🔑 License", "💾 Data",
]
LOCAL = ModeConfig(
name="local",
base_url="http://localhost:8501/peregrine",
auth_setup=lambda ctx: None,
expected_failures=[],
results_dir=Path("tests/e2e/results/local"),
settings_tabs=_BASE_SETTINGS_TABS,
)

View file

View file

@ -0,0 +1,4 @@
from tests.e2e.pages.base_page import BasePage
class ApplyPage(BasePage):
nav_label = "Apply Workspace"

View file

@ -0,0 +1,79 @@
"""Base page object — navigation, error capture, interactable discovery."""
from __future__ import annotations
import logging
import warnings
import fnmatch
from dataclasses import dataclass
from playwright.sync_api import Page
from tests.e2e.models import ErrorRecord, ModeConfig
log = logging.getLogger(__name__)
INTERACTABLE_SELECTORS = [
'[data-testid="baseButton-primary"] button',
'[data-testid="baseButton-secondary"] button',
'[data-testid="stTab"] button[role="tab"]',
'[data-testid="stSelectbox"]',
'[data-testid="stCheckbox"] input',
]
@dataclass
class InteractableElement:
label: str
selector: str
index: int
class BasePage:
"""Base page object for all Peregrine pages."""
nav_label: str = ""
def __init__(self, page: Page, mode: ModeConfig, console_messages: list):
self.page = page
self.mode = mode
self._console_messages = console_messages
def navigate(self) -> None:
"""Navigate to this page by clicking its sidebar nav link."""
from tests.e2e.conftest import wait_for_streamlit
sidebar = self.page.locator('[data-testid="stSidebarNav"]')
sidebar.get_by_text(self.nav_label, exact=False).first.click()
wait_for_streamlit(self.page)
def get_errors(self) -> list[ErrorRecord]:
from tests.e2e.conftest import get_page_errors
return get_page_errors(self.page)
def get_console_errors(self) -> list[str]:
from tests.e2e.conftest import get_console_errors
return get_console_errors(self._console_messages)
def discover_interactables(self, skip_sidebar: bool = True) -> list[InteractableElement]:
"""Find all interactive elements on the current page, excluding sidebar."""
found: list[InteractableElement] = []
for selector in INTERACTABLE_SELECTORS:
elements = self.page.query_selector_all(selector)
for i, el in enumerate(elements):
if skip_sidebar and el.evaluate(
"el => el.closest('[data-testid=\"stSidebar\"]') !== null"
):
continue
label = (el.inner_text() or el.get_attribute("aria-label") or f"element-{i}").strip()
label = label[:80]
found.append(InteractableElement(label=label, selector=selector, index=i))
for pattern in self.mode.expected_failures:
matches = [e for e in found if fnmatch.fnmatch(e.label, pattern)]
if len(matches) > 1:
warnings.warn(
f"expected_failure pattern '{pattern}' matches {len(matches)} elements: "
+ ", ".join(f'"{m.label}"' for m in matches),
stacklevel=2,
)
return found

View file

@ -0,0 +1,4 @@
from tests.e2e.pages.base_page import BasePage
class HomePage(BasePage):
nav_label = "Home"

View file

@ -0,0 +1,4 @@
from tests.e2e.pages.base_page import BasePage
class InterviewPrepPage(BasePage):
nav_label = "Interview Prep"

View file

@ -0,0 +1,4 @@
from tests.e2e.pages.base_page import BasePage
class InterviewsPage(BasePage):
nav_label = "Interviews"

View file

@ -0,0 +1,4 @@
from tests.e2e.pages.base_page import BasePage
class JobReviewPage(BasePage):
nav_label = "Job Review"

View file

@ -0,0 +1,44 @@
"""Settings page — tab-aware page object."""
from __future__ import annotations
import logging
from tests.e2e.pages.base_page import BasePage, InteractableElement
log = logging.getLogger(__name__)
class SettingsPage(BasePage):
nav_label = "Settings"
def discover_interactables(self, skip_sidebar: bool = True) -> list[InteractableElement]:
"""
Settings has multiple tabs. Click each expected tab, collect interactables,
return the full combined list.
"""
from tests.e2e.conftest import wait_for_streamlit
all_elements: list[InteractableElement] = []
tab_labels = self.mode.settings_tabs
for tab_label in tab_labels:
# Match on full label text — handles emoji correctly.
# Do NOT use tab_label.split()[-1]: "My Profile" and "Resume Profile"
# both end in "Profile", causing a silent collision.
tab_btn = self.page.locator(
'[data-testid="stTab"] button[role="tab"]'
).filter(has_text=tab_label)
if tab_btn.count() == 0:
log.warning("Settings tab not found: %s", tab_label)
continue
tab_btn.first.click()
wait_for_streamlit(self.page)
tab_elements = super().discover_interactables(skip_sidebar=skip_sidebar)
# Exclude tab buttons (already handled by clicking)
tab_elements = [
e for e in tab_elements
if 'role="tab"' not in e.selector
]
all_elements.extend(tab_elements)
return all_elements

View file

@ -0,0 +1,4 @@
from tests.e2e.pages.base_page import BasePage
class SurveyPage(BasePage):
nav_label = "Survey Assistant"

View file

View file

@ -0,0 +1,126 @@
"""
Interaction pass discover every interactable element on each page, click it,
diff errors before/after. Demo mode XFAIL patterns are checked; unexpected passes
are flagged as regressions.
Run: pytest tests/e2e/test_interactions.py --mode=demo -v
"""
from __future__ import annotations
import pytest
from tests.e2e.conftest import (
wait_for_streamlit, get_page_errors, screenshot_on_fail,
)
from tests.e2e.models import ModeConfig, diff_errors
from tests.e2e.pages.home_page import HomePage
from tests.e2e.pages.job_review_page import JobReviewPage
from tests.e2e.pages.apply_page import ApplyPage
from tests.e2e.pages.interviews_page import InterviewsPage
from tests.e2e.pages.interview_prep_page import InterviewPrepPage
from tests.e2e.pages.survey_page import SurveyPage
from tests.e2e.pages.settings_page import SettingsPage
PAGE_CLASSES = [
HomePage, JobReviewPage, ApplyPage, InterviewsPage,
InterviewPrepPage, SurveyPage, SettingsPage,
]
@pytest.mark.e2e
def test_interactions_all_pages(active_modes, mode_contexts, playwright):
"""
For each active mode and page: click every discovered interactable,
diff errors, XFAIL expected demo failures, FAIL on unexpected errors.
XPASS (expected failure that didn't fail) is also reported.
"""
failures: list[str] = []
xfails: list[str] = []
xpasses: list[str] = []
for mode in active_modes:
ctx = mode_contexts[mode.name]
page = ctx.new_page()
console_msgs: list = []
page.on("console", lambda msg: console_msgs.append(msg))
page.goto(mode.base_url)
wait_for_streamlit(page)
for PageClass in PAGE_CLASSES:
pg = PageClass(page, mode, console_msgs)
pg.navigate()
elements = pg.discover_interactables()
for element in elements:
pg.navigate()
before = pg.get_errors()
try:
all_matches = page.query_selector_all(element.selector)
content_matches = [
el for el in all_matches
if not el.evaluate(
"el => el.closest('[data-testid=\"stSidebar\"]') !== null"
)
]
if element.index < len(content_matches):
content_matches[element.index].click()
else:
continue
except Exception as e:
failures.append(
f"[{mode.name}] {PageClass.nav_label} / '{element.label}'"
f"could not interact: {e}"
)
continue
wait_for_streamlit(page)
after = pg.get_errors()
new_errors = diff_errors(before, after)
is_expected = mode.matches_expected_failure(element.label)
if new_errors:
if is_expected:
xfails.append(
f"[{mode.name}] {PageClass.nav_label} / '{element.label}' "
f"(expected) — {new_errors[0].message[:120]}"
)
else:
shot = screenshot_on_fail(
page, mode.name,
f"interact_{PageClass.__name__}_{element.label[:30]}"
)
failures.append(
f"[{mode.name}] {PageClass.nav_label} / '{element.label}'"
f"unexpected error: {new_errors[0].message[:200]}\n screenshot: {shot}"
)
else:
if is_expected:
xpasses.append(
f"[{mode.name}] {PageClass.nav_label} / '{element.label}' "
f"— expected to fail but PASSED (neutering guard may be broken!)"
)
page.close()
report_lines = []
if xfails:
report_lines.append(f"XFAIL ({len(xfails)} expected failures, demo mode working correctly):")
report_lines.extend(f" {x}" for x in xfails)
if xpasses:
report_lines.append(f"\nXPASS — REGRESSION ({len(xpasses)} neutering guards broken!):")
report_lines.extend(f" {x}" for x in xpasses)
if failures:
report_lines.append(f"\nFAIL ({len(failures)} unexpected errors):")
report_lines.extend(f" {x}" for x in failures)
if report_lines:
print("\n\n=== E2E Interaction Report ===\n" + "\n".join(report_lines))
if xpasses or failures:
pytest.fail(
f"{len(failures)} unexpected error(s), {len(xpasses)} xpass regression(s). "
"See report above."
)

61
tests/e2e/test_smoke.py Normal file
View file

@ -0,0 +1,61 @@
"""
Smoke pass navigate each page, wait for Streamlit to settle, assert no errors on load.
Errors on page load are always real bugs (not mode-specific).
Run: pytest tests/e2e/test_smoke.py --mode=demo
"""
from __future__ import annotations
import pytest
from tests.e2e.conftest import wait_for_streamlit, get_page_errors, get_console_errors, screenshot_on_fail
from tests.e2e.models import ModeConfig
from tests.e2e.pages.home_page import HomePage
from tests.e2e.pages.job_review_page import JobReviewPage
from tests.e2e.pages.apply_page import ApplyPage
from tests.e2e.pages.interviews_page import InterviewsPage
from tests.e2e.pages.interview_prep_page import InterviewPrepPage
from tests.e2e.pages.survey_page import SurveyPage
from tests.e2e.pages.settings_page import SettingsPage
PAGE_CLASSES = [
HomePage, JobReviewPage, ApplyPage, InterviewsPage,
InterviewPrepPage, SurveyPage, SettingsPage,
]
@pytest.mark.e2e
def test_smoke_all_pages(active_modes, mode_contexts, playwright):
"""For each active mode: navigate to every page and assert no errors on load."""
failures: list[str] = []
for mode in active_modes:
ctx = mode_contexts[mode.name]
page = ctx.new_page()
console_msgs: list = []
page.on("console", lambda msg: console_msgs.append(msg))
page.goto(mode.base_url)
wait_for_streamlit(page)
for PageClass in PAGE_CLASSES:
pg = PageClass(page, mode, console_msgs)
pg.navigate()
console_msgs.clear()
dom_errors = pg.get_errors()
console_errors = pg.get_console_errors()
if dom_errors or console_errors:
shot_path = screenshot_on_fail(page, mode.name, f"smoke_{PageClass.__name__}")
detail = "\n".join(
[f" DOM: {e.message}" for e in dom_errors]
+ [f" Console: {e}" for e in console_errors]
)
failures.append(
f"[{mode.name}] {PageClass.nav_label} — errors on load:\n{detail}\n screenshot: {shot_path}"
)
page.close()
if failures:
pytest.fail("Smoke test failures:\n\n" + "\n\n".join(failures))

193
tests/test_calendar_push.py Normal file
View file

@ -0,0 +1,193 @@
# tests/test_calendar_push.py
"""Unit tests for scripts/calendar_push.py.
Integration classes are mocked no real CalDAV or Google API calls.
"""
import sys
from datetime import timezone
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
# ── Fixtures ──────────────────────────────────────────────────────────────────
def _make_db(tmp_path, interview_date="2026-04-15", calendar_event_id=None):
from scripts.db import init_db, insert_job, set_interview_date, set_calendar_event_id
db = tmp_path / "test.db"
init_db(db)
job_id = insert_job(db, {
"title": "Customer Success Manager", "company": "Acme Corp",
"url": "https://example.com/job/1", "source": "linkedin",
"location": "Remote", "is_remote": True,
"salary": "", "description": "Great role.", "date_found": "2026-04-01",
"status": "phone_screen",
})
if interview_date:
set_interview_date(db, job_id=job_id, date_str=interview_date)
if calendar_event_id:
set_calendar_event_id(db, job_id=job_id, event_id=calendar_event_id)
return db, job_id
def _config_dir_with(tmp_path, integration_name: str) -> Path:
"""Create a minimal integration config file and return the config dir."""
integrations_dir = tmp_path / "config" / "integrations"
integrations_dir.mkdir(parents=True)
(integrations_dir / f"{integration_name}.yaml").write_text(
"caldav_url: https://caldav.example.com/\n"
"username: user@example.com\n"
"app_password: test-password\n"
"calendar_name: Interviews\n"
)
return tmp_path / "config"
# ── No integration configured ─────────────────────────────────────────────────
def test_push_returns_error_when_no_integration_configured(tmp_path):
db, job_id = _make_db(tmp_path)
config_dir = tmp_path / "config"
config_dir.mkdir()
from scripts.calendar_push import push_interview_event
result = push_interview_event(db, job_id=job_id, config_dir=config_dir)
assert result["ok"] is False
assert "No calendar integration" in result["error"]
# ── No interview date ─────────────────────────────────────────────────────────
def test_push_returns_error_when_no_interview_date(tmp_path):
db, job_id = _make_db(tmp_path, interview_date=None)
config_dir = _config_dir_with(tmp_path, "apple_calendar")
from scripts.calendar_push import push_interview_event
result = push_interview_event(db, job_id=job_id, config_dir=config_dir)
assert result["ok"] is False
assert "No interview date" in result["error"]
# ── Successful create ─────────────────────────────────────────────────────────
def test_push_creates_event_and_stores_event_id(tmp_path):
db, job_id = _make_db(tmp_path)
config_dir = _config_dir_with(tmp_path, "apple_calendar")
mock_integration = MagicMock()
mock_integration.create_event.return_value = "peregrine-job-1@circuitforge.tech"
with patch("scripts.calendar_push._load_integration", return_value=mock_integration):
from scripts.calendar_push import push_interview_event
result = push_interview_event(db, job_id=job_id, config_dir=config_dir)
assert result["ok"] is True
assert result["event_id"] == "peregrine-job-1@circuitforge.tech"
mock_integration.create_event.assert_called_once()
def test_push_event_title_includes_stage_and_company(tmp_path):
db, job_id = _make_db(tmp_path)
from scripts.db import advance_to_stage
advance_to_stage(db, job_id=job_id, stage="phone_screen")
config_dir = _config_dir_with(tmp_path, "apple_calendar")
mock_integration = MagicMock()
mock_integration.create_event.return_value = "uid-123"
with patch("scripts.calendar_push._load_integration", return_value=mock_integration):
from scripts.calendar_push import push_interview_event
push_interview_event(db, job_id=job_id, config_dir=config_dir)
call_kwargs = mock_integration.create_event.call_args
title = call_kwargs.args[1] if call_kwargs.args else call_kwargs.kwargs.get("title", "")
assert "Acme Corp" in title
assert "Phone Screen" in title
def test_push_event_start_is_noon_utc(tmp_path):
db, job_id = _make_db(tmp_path, interview_date="2026-04-15")
config_dir = _config_dir_with(tmp_path, "apple_calendar")
mock_integration = MagicMock()
mock_integration.create_event.return_value = "uid-abc"
with patch("scripts.calendar_push._load_integration", return_value=mock_integration):
from scripts.calendar_push import push_interview_event
push_interview_event(db, job_id=job_id, config_dir=config_dir)
call_args = mock_integration.create_event.call_args.args
start_dt = call_args[2]
assert start_dt.hour == 12
assert start_dt.tzinfo == timezone.utc
def test_push_event_duration_is_one_hour(tmp_path):
db, job_id = _make_db(tmp_path, interview_date="2026-04-15")
config_dir = _config_dir_with(tmp_path, "apple_calendar")
mock_integration = MagicMock()
mock_integration.create_event.return_value = "uid-abc"
with patch("scripts.calendar_push._load_integration", return_value=mock_integration):
from scripts.calendar_push import push_interview_event
push_interview_event(db, job_id=job_id, config_dir=config_dir)
call_args = mock_integration.create_event.call_args.args
start_dt, end_dt = call_args[2], call_args[3]
assert (end_dt - start_dt).seconds == 3600
# ── Idempotent update ─────────────────────────────────────────────────────────
def test_push_calls_update_when_event_id_already_exists(tmp_path):
db, job_id = _make_db(tmp_path, calendar_event_id="existing-event-id")
config_dir = _config_dir_with(tmp_path, "apple_calendar")
mock_integration = MagicMock()
mock_integration.update_event.return_value = "existing-event-id"
with patch("scripts.calendar_push._load_integration", return_value=mock_integration):
from scripts.calendar_push import push_interview_event
result = push_interview_event(db, job_id=job_id, config_dir=config_dir)
assert result["ok"] is True
mock_integration.update_event.assert_called_once()
mock_integration.create_event.assert_not_called()
# ── Integration error handling ────────────────────────────────────────────────
def test_push_returns_error_on_integration_exception(tmp_path):
db, job_id = _make_db(tmp_path)
config_dir = _config_dir_with(tmp_path, "apple_calendar")
mock_integration = MagicMock()
mock_integration.create_event.side_effect = RuntimeError("CalDAV server unreachable")
with patch("scripts.calendar_push._load_integration", return_value=mock_integration):
from scripts.calendar_push import push_interview_event
result = push_interview_event(db, job_id=job_id, config_dir=config_dir)
assert result["ok"] is False
assert "CalDAV server unreachable" in result["error"]
# ── Missing job ───────────────────────────────────────────────────────────────
def test_push_returns_error_for_unknown_job_id(tmp_path):
from scripts.db import init_db
db = tmp_path / "test.db"
init_db(db)
config_dir = _config_dir_with(tmp_path, "apple_calendar")
from scripts.calendar_push import push_interview_event
result = push_interview_event(db, job_id=9999, config_dir=config_dir)
assert result["ok"] is False
assert "9999" in result["error"]

View file

@ -80,7 +80,8 @@ class TestTaskRunnerCoverLetterParams:
captured = {}
def mock_generate(title, company, description="", previous_result="", feedback="",
is_jobgether=False, _router=None):
is_jobgether=False, _router=None, config_path=None,
user_yaml_path=None):
captured.update({
"title": title, "company": company,
"previous_result": previous_result, "feedback": feedback,

148
tests/test_db_migrate.py Normal file
View file

@ -0,0 +1,148 @@
"""Tests for scripts/db_migrate.py — numbered SQL migration runner."""
import sqlite3
import textwrap
from pathlib import Path
import pytest
from scripts.db_migrate import migrate_db
# ── helpers ───────────────────────────────────────────────────────────────────
def _applied(db_path: Path) -> list[str]:
con = sqlite3.connect(db_path)
try:
rows = con.execute("SELECT version FROM schema_migrations ORDER BY version").fetchall()
return [r[0] for r in rows]
finally:
con.close()
def _tables(db_path: Path) -> set[str]:
con = sqlite3.connect(db_path)
try:
rows = con.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
).fetchall()
return {r[0] for r in rows}
finally:
con.close()
# ── tests ──────────────────────────────────────────────────────────────────────
def test_creates_schema_migrations_table(tmp_path):
"""Running against an empty DB creates the tracking table."""
db = tmp_path / "test.db"
(tmp_path / "migrations").mkdir() # empty migrations dir
# Patch the module-level _MIGRATIONS_DIR
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = tmp_path / "migrations"
try:
migrate_db(db)
assert "schema_migrations" in _tables(db)
finally:
m._MIGRATIONS_DIR = orig
def test_applies_migration_file(tmp_path):
"""A .sql file in migrations/ is applied and recorded."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_test.sql").write_text(
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
)
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
applied = migrate_db(db)
assert applied == ["001_test"]
assert "widgets" in _tables(db)
assert _applied(db) == ["001_test"]
finally:
m._MIGRATIONS_DIR = orig
def test_idempotent_second_run(tmp_path):
"""Running migrate_db twice does not re-apply migrations."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_test.sql").write_text(
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
)
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
migrate_db(db)
applied = migrate_db(db) # second run
assert applied == []
assert _applied(db) == ["001_test"]
finally:
m._MIGRATIONS_DIR = orig
def test_applies_only_new_migrations(tmp_path):
"""Migrations already in schema_migrations are skipped; only new ones run."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_first.sql").write_text(
"CREATE TABLE IF NOT EXISTS first_table (id INTEGER PRIMARY KEY);"
)
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
migrate_db(db)
# Add a second migration
(mdir / "002_second.sql").write_text(
"CREATE TABLE IF NOT EXISTS second_table (id INTEGER PRIMARY KEY);"
)
applied = migrate_db(db)
assert applied == ["002_second"]
assert set(_applied(db)) == {"001_first", "002_second"}
assert "second_table" in _tables(db)
finally:
m._MIGRATIONS_DIR = orig
def test_migration_failure_raises(tmp_path):
"""A bad migration raises RuntimeError and does not record the version."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_bad.sql").write_text("THIS IS NOT VALID SQL !!!")
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
with pytest.raises(RuntimeError, match="001_bad"):
migrate_db(db)
assert _applied(db) == []
finally:
m._MIGRATIONS_DIR = orig
def test_baseline_migration_runs(tmp_path):
"""The real 001_baseline.sql applies cleanly to a fresh database."""
db = tmp_path / "test.db"
applied = migrate_db(db)
assert "001_baseline" in applied
expected_tables = {
"jobs", "job_contacts", "company_research",
"background_tasks", "survey_responses", "digest_queue",
"schema_migrations",
}
assert expected_tables <= _tables(db)

View file

@ -0,0 +1,82 @@
"""Tests for app/components/demo_toolbar.py."""
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.components.demo_toolbar import (
get_simulated_tier,
set_simulated_tier,
render_demo_toolbar,
)
def test_set_simulated_tier_updates_session_state(monkeypatch):
"""set_simulated_tier writes to st.session_state.simulated_tier."""
session = {}
injected = []
monkeypatch.setattr("streamlit.components.v1.html", lambda h, height=0: injected.append(h))
monkeypatch.setattr("streamlit.session_state", session, raising=False)
monkeypatch.setattr("streamlit.rerun", lambda: None)
set_simulated_tier("premium")
assert session.get("simulated_tier") == "premium"
assert any("prgn_demo_tier=premium" in h for h in injected)
def test_set_simulated_tier_invalid_ignored(monkeypatch):
"""Invalid tier strings are rejected."""
session = {}
monkeypatch.setattr("streamlit.components.v1.html", lambda h, height=0: None)
monkeypatch.setattr("streamlit.session_state", session, raising=False)
monkeypatch.setattr("streamlit.rerun", lambda: None)
set_simulated_tier("ultramax")
assert "simulated_tier" not in session
def test_get_simulated_tier_defaults_to_paid(monkeypatch):
"""Returns 'paid' when no tier is set yet."""
monkeypatch.setattr("streamlit.session_state", {}, raising=False)
monkeypatch.setattr("streamlit.query_params", {}, raising=False)
assert get_simulated_tier() == "paid"
def test_get_simulated_tier_reads_session(monkeypatch):
"""Returns tier from st.session_state when set."""
monkeypatch.setattr("streamlit.session_state", {"simulated_tier": "free"}, raising=False)
monkeypatch.setattr("streamlit.query_params", {}, raising=False)
assert get_simulated_tier() == "free"
def test_render_demo_toolbar_renders_pills(monkeypatch):
"""render_demo_toolbar renders tier selection pills."""
session = {"simulated_tier": "paid"}
calls = []
def mock_button(label, key=None, type=None, use_container_width=False):
calls.append(("button", label, key, type))
return False # button not clicked
monkeypatch.setattr("streamlit.session_state", session, raising=False)
monkeypatch.setattr("streamlit.container", lambda: __import__("contextlib").nullcontext())
monkeypatch.setattr("streamlit.columns", lambda x: [__import__("contextlib").nullcontext() for _ in x])
monkeypatch.setattr("streamlit.caption", lambda x: None)
monkeypatch.setattr("streamlit.button", mock_button)
monkeypatch.setattr("streamlit.divider", lambda: None)
render_demo_toolbar()
# Verify buttons were rendered for all tiers
button_calls = [c for c in calls if c[0] == "button"]
assert len(button_calls) == 3
assert any("Paid ✓" in c[1] for c in button_calls) # current tier marked
primary_calls = [c for c in button_calls if c[3] == "primary"]
assert len(primary_calls) == 1
assert "Paid" in primary_calls[0][1]

View file

@ -0,0 +1,238 @@
"""Tests for digest queue API endpoints."""
import sqlite3
import os
import pytest
from fastapi.testclient import TestClient
@pytest.fixture()
def tmp_db(tmp_path):
"""Create minimal schema in a temp dir with one job_contacts row."""
db_path = str(tmp_path / "staging.db")
con = sqlite3.connect(db_path)
con.executescript("""
CREATE TABLE jobs (
id INTEGER PRIMARY KEY,
title TEXT, company TEXT, url TEXT UNIQUE, location TEXT,
is_remote INTEGER DEFAULT 0, salary TEXT,
match_score REAL, keyword_gaps TEXT, status TEXT DEFAULT 'pending',
date_found TEXT, description TEXT, source TEXT
);
CREATE TABLE job_contacts (
id INTEGER PRIMARY KEY,
job_id INTEGER,
subject TEXT,
received_at TEXT,
stage_signal TEXT,
suggestion_dismissed INTEGER DEFAULT 0,
body TEXT,
from_addr TEXT
);
CREATE TABLE digest_queue (
id INTEGER PRIMARY KEY,
job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id),
created_at TEXT DEFAULT (datetime('now')),
UNIQUE(job_contact_id)
);
INSERT INTO jobs (id, title, company, url, status, source, date_found)
VALUES (1, 'Engineer', 'Acme', 'https://acme.com/job/1', 'applied', 'test', '2026-03-19');
INSERT INTO job_contacts (id, job_id, subject, received_at, stage_signal, body, from_addr)
VALUES (
10, 1, 'TechCrunch Jobs Weekly', '2026-03-19T10:00:00', 'digest',
'<html><body>Apply at <a href="https://greenhouse.io/acme/jobs/456">Senior Engineer</a> or <a href="https://lever.co/globex/staff">Staff Designer</a>. Unsubscribe: https://unsubscribe.example.com/remove</body></html>',
'digest@techcrunch.com'
);
""")
con.close()
return db_path
@pytest.fixture()
def client(tmp_db, monkeypatch):
monkeypatch.setenv("STAGING_DB", tmp_db)
import dev_api
monkeypatch.setattr(dev_api, "DB_PATH", tmp_db)
return TestClient(dev_api.app)
# ── GET /api/digest-queue ───────────────────────────────────────────────────
def test_digest_queue_list_empty(client):
resp = client.get("/api/digest-queue")
assert resp.status_code == 200
assert resp.json() == []
def test_digest_queue_list_with_entry(client, tmp_db):
con = sqlite3.connect(tmp_db)
con.execute("INSERT INTO digest_queue (job_contact_id) VALUES (10)")
con.commit()
con.close()
resp = client.get("/api/digest-queue")
assert resp.status_code == 200
entries = resp.json()
assert len(entries) == 1
assert entries[0]["job_contact_id"] == 10
assert entries[0]["subject"] == "TechCrunch Jobs Weekly"
assert entries[0]["from_addr"] == "digest@techcrunch.com"
assert "body" in entries[0]
assert "created_at" in entries[0]
# ── POST /api/digest-queue ──────────────────────────────────────────────────
def test_digest_queue_add(client, tmp_db):
resp = client.post("/api/digest-queue", json={"job_contact_id": 10})
assert resp.status_code == 200
data = resp.json()
assert data["ok"] is True
assert data["created"] is True
con = sqlite3.connect(tmp_db)
row = con.execute("SELECT * FROM digest_queue WHERE job_contact_id = 10").fetchone()
con.close()
assert row is not None
def test_digest_queue_add_duplicate(client):
client.post("/api/digest-queue", json={"job_contact_id": 10})
resp = client.post("/api/digest-queue", json={"job_contact_id": 10})
assert resp.status_code == 200
data = resp.json()
assert data["ok"] is True
assert data["created"] is False
def test_digest_queue_add_missing_contact(client):
resp = client.post("/api/digest-queue", json={"job_contact_id": 9999})
assert resp.status_code == 404
# ── POST /api/digest-queue/{id}/extract-links ───────────────────────────────
def _add_digest_entry(tmp_db, contact_id=10):
"""Helper: insert a digest_queue row and return its id."""
con = sqlite3.connect(tmp_db)
cur = con.execute("INSERT INTO digest_queue (job_contact_id) VALUES (?)", (contact_id,))
entry_id = cur.lastrowid
con.commit()
con.close()
return entry_id
def test_digest_extract_links(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)
resp = client.post(f"/api/digest-queue/{entry_id}/extract-links")
assert resp.status_code == 200
links = resp.json()["links"]
# greenhouse.io link should be present with score=2
gh_links = [l for l in links if "greenhouse.io" in l["url"]]
assert len(gh_links) == 1
assert gh_links[0]["score"] == 2
# lever.co link should be present with score=2
lever_links = [l for l in links if "lever.co" in l["url"]]
assert len(lever_links) == 1
assert lever_links[0]["score"] == 2
# Each link must have a hint key (may be empty string for links at start of body)
for link in links:
assert "hint" in link
def test_digest_extract_links_filters_trackers(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)
resp = client.post(f"/api/digest-queue/{entry_id}/extract-links")
assert resp.status_code == 200
links = resp.json()["links"]
urls = [l["url"] for l in links]
# Unsubscribe URL should be excluded
assert not any("unsubscribe" in u for u in urls)
def test_digest_extract_links_404(client):
resp = client.post("/api/digest-queue/9999/extract-links")
assert resp.status_code == 404
# ── POST /api/digest-queue/{id}/queue-jobs ──────────────────────────────────
def test_digest_queue_jobs(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)
resp = client.post(
f"/api/digest-queue/{entry_id}/queue-jobs",
json={"urls": ["https://greenhouse.io/acme/jobs/456"]},
)
assert resp.status_code == 200
data = resp.json()
assert data["queued"] == 1
assert data["skipped"] == 0
con = sqlite3.connect(tmp_db)
row = con.execute(
"SELECT source, status FROM jobs WHERE url = 'https://greenhouse.io/acme/jobs/456'"
).fetchone()
con.close()
assert row is not None
assert row[0] == "digest"
assert row[1] == "pending"
def test_digest_queue_jobs_skips_duplicates(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)
resp = client.post(
f"/api/digest-queue/{entry_id}/queue-jobs",
json={"urls": [
"https://greenhouse.io/acme/jobs/789",
"https://greenhouse.io/acme/jobs/789", # same URL twice in one call
]},
)
assert resp.status_code == 200
data = resp.json()
assert data["queued"] == 1
assert data["skipped"] == 1
con = sqlite3.connect(tmp_db)
count = con.execute(
"SELECT COUNT(*) FROM jobs WHERE url = 'https://greenhouse.io/acme/jobs/789'"
).fetchone()[0]
con.close()
assert count == 1
def test_digest_queue_jobs_skips_invalid_urls(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)
resp = client.post(
f"/api/digest-queue/{entry_id}/queue-jobs",
json={"urls": ["", "ftp://bad.example.com", "https://valid.greenhouse.io/job/1"]},
)
assert resp.status_code == 200
data = resp.json()
assert data["queued"] == 1
assert data["skipped"] == 2
def test_digest_queue_jobs_empty_urls(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)
resp = client.post(f"/api/digest-queue/{entry_id}/queue-jobs", json={"urls": []})
assert resp.status_code == 400
def test_digest_queue_jobs_404(client):
resp = client.post("/api/digest-queue/9999/queue-jobs", json={"urls": ["https://example.com"]})
assert resp.status_code == 404
# ── DELETE /api/digest-queue/{id} ───────────────────────────────────────────
def test_digest_delete(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)
resp = client.delete(f"/api/digest-queue/{entry_id}")
assert resp.status_code == 200
assert resp.json()["ok"] is True
# Second delete → 404
resp2 = client.delete(f"/api/digest-queue/{entry_id}")
assert resp2.status_code == 404

View file

@ -0,0 +1,216 @@
"""Tests for new dev-api.py endpoints: stage signals, email sync, signal dismiss."""
import sqlite3
import tempfile
import os
import pytest
from fastapi.testclient import TestClient
@pytest.fixture()
def tmp_db(tmp_path):
"""Create a minimal staging.db schema in a temp dir."""
db_path = str(tmp_path / "staging.db")
con = sqlite3.connect(db_path)
con.executescript("""
CREATE TABLE jobs (
id INTEGER PRIMARY KEY,
title TEXT, company TEXT, url TEXT, location TEXT,
is_remote INTEGER DEFAULT 0, salary TEXT,
match_score REAL, keyword_gaps TEXT, status TEXT,
interview_date TEXT, rejection_stage TEXT,
applied_at TEXT, phone_screen_at TEXT, interviewing_at TEXT,
offer_at TEXT, hired_at TEXT, survey_at TEXT
);
CREATE TABLE job_contacts (
id INTEGER PRIMARY KEY,
job_id INTEGER,
subject TEXT,
received_at TEXT,
stage_signal TEXT,
suggestion_dismissed INTEGER DEFAULT 0,
body TEXT,
from_addr TEXT
);
CREATE TABLE background_tasks (
id INTEGER PRIMARY KEY,
task_type TEXT,
job_id INTEGER,
status TEXT DEFAULT 'queued',
finished_at TEXT
);
INSERT INTO jobs (id, title, company, status) VALUES
(1, 'Engineer', 'Acme', 'applied'),
(2, 'Designer', 'Beta', 'phone_screen');
INSERT INTO job_contacts (id, job_id, subject, received_at, stage_signal, suggestion_dismissed) VALUES
(10, 1, 'Interview confirmed', '2026-03-19T10:00:00', 'interview_scheduled', 0),
(11, 1, 'Old neutral', '2026-03-18T09:00:00', 'neutral', 0),
(12, 2, 'Offer letter', '2026-03-19T11:00:00', 'offer_received', 0),
(13, 1, 'Already dismissed', '2026-03-17T08:00:00', 'positive_response', 1);
""")
con.close()
return db_path
@pytest.fixture()
def client(tmp_db, monkeypatch):
monkeypatch.setenv("STAGING_DB", tmp_db)
import dev_api
monkeypatch.setattr(dev_api, "DB_PATH", tmp_db)
return TestClient(dev_api.app)
# ── GET /api/interviews — stage signals batched ────────────────────────────
def test_interviews_includes_stage_signals(client):
resp = client.get("/api/interviews")
assert resp.status_code == 200
jobs = {j["id"]: j for j in resp.json()}
# job 1 should have exactly 1 undismissed non-excluded signal
assert "stage_signals" in jobs[1]
signals = jobs[1]["stage_signals"]
assert len(signals) == 1
assert signals[0]["stage_signal"] == "interview_scheduled"
assert signals[0]["subject"] == "Interview confirmed"
assert signals[0]["id"] == 10
assert "body" in signals[0]
assert "from_addr" in signals[0]
# neutral signal excluded
signal_types = [s["stage_signal"] for s in signals]
assert "neutral" not in signal_types
# dismissed signal excluded
signal_ids = [s["id"] for s in signals]
assert 13 not in signal_ids
# job 2 has an offer signal
assert len(jobs[2]["stage_signals"]) == 1
assert jobs[2]["stage_signals"][0]["stage_signal"] == "offer_received"
def test_interviews_empty_signals_for_job_without_contacts(client, tmp_db):
con = sqlite3.connect(tmp_db)
con.execute("INSERT INTO jobs (id, title, company, status) VALUES (3, 'NoContact', 'Corp', 'survey')")
con.commit(); con.close()
resp = client.get("/api/interviews")
jobs = {j["id"]: j for j in resp.json()}
assert jobs[3]["stage_signals"] == []
# ── POST /api/email/sync ───────────────────────────────────────────────────
def test_email_sync_returns_202(client):
resp = client.post("/api/email/sync")
assert resp.status_code == 202
assert "task_id" in resp.json()
def test_email_sync_inserts_background_task(client, tmp_db):
client.post("/api/email/sync")
con = sqlite3.connect(tmp_db)
row = con.execute(
"SELECT task_type, job_id, status FROM background_tasks WHERE task_type='email_sync'"
).fetchone()
con.close()
assert row is not None
assert row[0] == "email_sync"
assert row[1] == 0 # sentinel
assert row[2] == "queued"
# ── GET /api/email/sync/status ─────────────────────────────────────────────
def test_email_sync_status_idle_when_no_tasks(client):
resp = client.get("/api/email/sync/status")
assert resp.status_code == 200
body = resp.json()
assert body["status"] == "idle"
assert body["last_completed_at"] is None
def test_email_sync_status_reflects_latest_task(client, tmp_db):
con = sqlite3.connect(tmp_db)
con.execute(
"INSERT INTO background_tasks (task_type, job_id, status, finished_at) VALUES "
"('email_sync', 0, 'completed', '2026-03-19T12:00:00')"
)
con.commit(); con.close()
resp = client.get("/api/email/sync/status")
body = resp.json()
assert body["status"] == "completed"
assert body["last_completed_at"] == "2026-03-19T12:00:00"
# ── POST /api/stage-signals/{id}/dismiss ──────────────────────────────────
def test_dismiss_signal_sets_flag(client, tmp_db):
resp = client.post("/api/stage-signals/10/dismiss")
assert resp.status_code == 200
assert resp.json() == {"ok": True}
con = sqlite3.connect(tmp_db)
row = con.execute(
"SELECT suggestion_dismissed FROM job_contacts WHERE id = 10"
).fetchone()
con.close()
assert row[0] == 1
def test_dismiss_signal_404_for_missing_id(client):
resp = client.post("/api/stage-signals/9999/dismiss")
assert resp.status_code == 404
# ── Body/from_addr in signal response ─────────────────────────────────────
def test_interviews_signal_includes_body_and_from_addr(client):
resp = client.get("/api/interviews")
assert resp.status_code == 200
jobs = {j["id"]: j for j in resp.json()}
sig = jobs[1]["stage_signals"][0]
# Fields must exist (may be None when DB column is NULL)
assert "body" in sig
assert "from_addr" in sig
# ── POST /api/stage-signals/{id}/reclassify ────────────────────────────────
def test_reclassify_signal_updates_label(client, tmp_db):
resp = client.post("/api/stage-signals/10/reclassify",
json={"stage_signal": "positive_response"})
assert resp.status_code == 200
assert resp.json() == {"ok": True}
con = sqlite3.connect(tmp_db)
row = con.execute(
"SELECT stage_signal FROM job_contacts WHERE id = 10"
).fetchone()
con.close()
assert row[0] == "positive_response"
def test_reclassify_signal_invalid_label(client):
resp = client.post("/api/stage-signals/10/reclassify",
json={"stage_signal": "not_a_real_label"})
assert resp.status_code == 400
def test_reclassify_signal_404_for_missing_id(client):
resp = client.post("/api/stage-signals/9999/reclassify",
json={"stage_signal": "neutral"})
assert resp.status_code == 404
def test_signal_body_html_is_stripped(client, tmp_db):
import sqlite3
con = sqlite3.connect(tmp_db)
con.execute(
"UPDATE job_contacts SET body = ? WHERE id = 10",
("<html><body><p>Hi there,</p><p>Interview confirmed.</p></body></html>",)
)
con.commit(); con.close()
resp = client.get("/api/interviews")
jobs = {j["id"]: j for j in resp.json()}
body = jobs[1]["stage_signals"][0]["body"]
assert "<" not in body
assert "Hi there" in body
assert "Interview confirmed" in body

161
tests/test_dev_api_prep.py Normal file
View file

@ -0,0 +1,161 @@
"""Tests for interview prep endpoints: research GET/generate/task, contacts GET."""
import json
import pytest
from unittest.mock import patch, MagicMock
from fastapi.testclient import TestClient
@pytest.fixture
def client():
import sys
sys.path.insert(0, "/Library/Development/CircuitForge/peregrine/.worktrees/feature-vue-spa")
from dev_api import app
return TestClient(app)
# ── /api/jobs/{id}/research ─────────────────────────────────────────────────
def test_get_research_found(client):
"""Returns research row (minus raw_output) when present."""
import sqlite3
mock_row = {
"job_id": 1,
"company_brief": "Acme Corp makes anvils.",
"ceo_brief": "Wile E Coyote",
"talking_points": "- Ask about roadrunner containment",
"tech_brief": "Python, Rust",
"funding_brief": "Series B",
"red_flags": None,
"accessibility_brief": None,
"generated_at": "2026-03-20T12:00:00",
}
mock_db = MagicMock()
mock_db.execute.return_value.fetchone.return_value = mock_row
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/1/research")
assert resp.status_code == 200
data = resp.json()
assert data["company_brief"] == "Acme Corp makes anvils."
assert "raw_output" not in data
def test_get_research_not_found(client):
"""Returns 404 when no research row exists for job."""
mock_db = MagicMock()
mock_db.execute.return_value.fetchone.return_value = None
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/99/research")
assert resp.status_code == 404
# ── /api/jobs/{id}/research/generate ────────────────────────────────────────
def test_generate_research_new_task(client):
"""POST generate returns task_id and is_new=True for fresh submission."""
with patch("scripts.task_runner.submit_task", return_value=(42, True)):
resp = client.post("/api/jobs/1/research/generate")
assert resp.status_code == 200
data = resp.json()
assert data["task_id"] == 42
assert data["is_new"] is True
def test_generate_research_duplicate_task(client):
"""POST generate returns is_new=False when task already queued."""
with patch("scripts.task_runner.submit_task", return_value=(17, False)):
resp = client.post("/api/jobs/1/research/generate")
assert resp.status_code == 200
data = resp.json()
assert data["is_new"] is False
def test_generate_research_error(client):
"""POST generate returns 500 when submit_task raises."""
with patch("scripts.task_runner.submit_task", side_effect=Exception("LLM unavailable")):
resp = client.post("/api/jobs/1/research/generate")
assert resp.status_code == 500
# ── /api/jobs/{id}/research/task ────────────────────────────────────────────
def test_research_task_none(client):
"""Returns status=none when no background task exists for job."""
mock_db = MagicMock()
mock_db.execute.return_value.fetchone.return_value = None
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/1/research/task")
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "none"
assert data["stage"] is None
assert data["message"] is None
def test_research_task_running(client):
"""Returns current status/stage/message for an active task."""
mock_row = {"status": "running", "stage": "Scraping company site", "error": None}
mock_db = MagicMock()
mock_db.execute.return_value.fetchone.return_value = mock_row
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/1/research/task")
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "running"
assert data["stage"] == "Scraping company site"
assert data["message"] is None
def test_research_task_failed(client):
"""Returns message (mapped from error column) for failed task."""
mock_row = {"status": "failed", "stage": None, "error": "LLM timeout"}
mock_db = MagicMock()
mock_db.execute.return_value.fetchone.return_value = mock_row
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/1/research/task")
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "failed"
assert data["message"] == "LLM timeout"
# ── /api/jobs/{id}/contacts ──────────────────────────────────────────────────
def test_get_contacts_empty(client):
"""Returns empty list when job has no contacts."""
mock_db = MagicMock()
mock_db.execute.return_value.fetchall.return_value = []
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/1/contacts")
assert resp.status_code == 200
assert resp.json() == []
def test_get_contacts_list(client):
"""Returns list of contact dicts for job."""
mock_rows = [
{"id": 1, "direction": "inbound", "subject": "Interview next week",
"from_addr": "hr@acme.com", "body": "Hi! We'd like to...", "received_at": "2026-03-19T10:00:00"},
{"id": 2, "direction": "outbound", "subject": "Re: Interview next week",
"from_addr": None, "body": "Thank you!", "received_at": "2026-03-19T11:00:00"},
]
mock_db = MagicMock()
mock_db.execute.return_value.fetchall.return_value = mock_rows
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/1/contacts")
assert resp.status_code == 200
data = resp.json()
assert len(data) == 2
assert data[0]["direction"] == "inbound"
assert data[1]["direction"] == "outbound"
def test_get_contacts_ordered_by_received_at(client):
"""Most recent contacts appear first (ORDER BY received_at DESC)."""
mock_db = MagicMock()
mock_db.execute.return_value.fetchall.return_value = []
with patch("dev_api._get_db", return_value=mock_db):
resp = client.get("/api/jobs/99/contacts")
# Verify the SQL contains ORDER BY received_at DESC
call_args = mock_db.execute.call_args
sql = call_args[0][0]
assert "ORDER BY received_at DESC" in sql

View file

@ -0,0 +1,632 @@
"""Tests for all settings API endpoints added in Tasks 18."""
import os
import sys
import yaml
import pytest
from pathlib import Path
from unittest.mock import patch, MagicMock
from fastapi.testclient import TestClient
_WORKTREE = "/Library/Development/CircuitForge/peregrine/.worktrees/feature-vue-spa"
# ── Path bootstrap ────────────────────────────────────────────────────────────
# dev_api.py inserts /Library/Development/CircuitForge/peregrine into sys.path
# at import time; the worktree has credential_store but the main repo doesn't.
# Insert the worktree first so 'scripts' resolves to the worktree version, then
# pre-cache it in sys.modules so Python won't re-look-up when dev_api adds the
# main peregrine root.
if _WORKTREE not in sys.path:
sys.path.insert(0, _WORKTREE)
# Pre-cache the worktree scripts package and submodules before dev_api import
import importlib, types
def _ensure_worktree_scripts():
import importlib.util as _ilu
_wt = _WORKTREE
# Only load if not already loaded from the worktree
_spec = _ilu.spec_from_file_location("scripts", f"{_wt}/scripts/__init__.py",
submodule_search_locations=[f"{_wt}/scripts"])
if _spec is None:
return
_mod = _ilu.module_from_spec(_spec)
sys.modules.setdefault("scripts", _mod)
try:
_spec.loader.exec_module(_mod)
except Exception:
pass
_ensure_worktree_scripts()
@pytest.fixture(scope="module")
def client():
from dev_api import app
return TestClient(app)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _write_user_yaml(path: Path, data: dict = None):
"""Write a minimal user.yaml to the given path."""
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
yaml.dump(data or {"name": "Test User", "email": "test@example.com"}, f)
# ── GET /api/config/app ───────────────────────────────────────────────────────
def test_app_config_returns_expected_keys(client):
"""Returns 200 with isCloud, tier, and inferenceProfile in valid values."""
resp = client.get("/api/config/app")
assert resp.status_code == 200
data = resp.json()
assert "isCloud" in data
assert "tier" in data
assert "inferenceProfile" in data
valid_tiers = {"free", "paid", "premium", "ultra"}
valid_profiles = {"remote", "cpu", "single-gpu", "dual-gpu"}
assert data["tier"] in valid_tiers
assert data["inferenceProfile"] in valid_profiles
def test_app_config_iscloud_env(client):
"""isCloud reflects CLOUD_MODE env var."""
with patch.dict(os.environ, {"CLOUD_MODE": "true"}):
resp = client.get("/api/config/app")
assert resp.json()["isCloud"] is True
def test_app_config_invalid_tier_falls_back_to_free(client):
"""Unknown APP_TIER falls back to 'free'."""
with patch.dict(os.environ, {"APP_TIER": "enterprise"}):
resp = client.get("/api/config/app")
assert resp.json()["tier"] == "free"
# ── GET/PUT /api/settings/profile ─────────────────────────────────────────────
def test_get_profile_returns_fields(tmp_path, monkeypatch):
"""GET /api/settings/profile returns dict with expected profile fields."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml, {"name": "Alice", "email": "alice@example.com"})
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/profile")
assert resp.status_code == 200
data = resp.json()
assert "name" in data
assert "email" in data
assert "career_summary" in data
assert "mission_preferences" in data
def test_put_get_profile_roundtrip(tmp_path, monkeypatch):
"""PUT then GET profile round-trip: saved name is returned."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
from dev_api import app
c = TestClient(app)
put_resp = c.put("/api/settings/profile", json={
"name": "Bob Builder",
"email": "bob@example.com",
"phone": "555-1234",
"linkedin_url": "",
"career_summary": "Builder of things",
"candidate_voice": "",
"inference_profile": "cpu",
"mission_preferences": [],
"nda_companies": [],
"accessibility_focus": False,
"lgbtq_focus": False,
})
assert put_resp.status_code == 200
assert put_resp.json()["ok"] is True
get_resp = c.get("/api/settings/profile")
assert get_resp.status_code == 200
assert get_resp.json()["name"] == "Bob Builder"
# ── GET /api/settings/resume ──────────────────────────────────────────────────
def test_get_resume_missing_returns_not_exists(tmp_path, monkeypatch):
"""GET /api/settings/resume when file missing returns {exists: false}."""
fake_path = tmp_path / "config" / "plain_text_resume.yaml"
# Ensure the path doesn't exist
monkeypatch.setattr("dev_api._resume_path", lambda: fake_path)
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/resume")
assert resp.status_code == 200
assert resp.json() == {"exists": False}
def test_post_resume_blank_creates_file(tmp_path, monkeypatch):
"""POST /api/settings/resume/blank creates the file."""
fake_path = tmp_path / "config" / "plain_text_resume.yaml"
monkeypatch.setattr("dev_api._resume_path", lambda: fake_path)
from dev_api import app
c = TestClient(app)
resp = c.post("/api/settings/resume/blank")
assert resp.status_code == 200
assert resp.json()["ok"] is True
assert fake_path.exists()
def test_get_resume_after_blank_returns_exists(tmp_path, monkeypatch):
"""GET /api/settings/resume after blank creation returns {exists: true}."""
fake_path = tmp_path / "config" / "plain_text_resume.yaml"
monkeypatch.setattr("dev_api._resume_path", lambda: fake_path)
from dev_api import app
c = TestClient(app)
# First create the blank file
c.post("/api/settings/resume/blank")
# Now get should return exists: True
resp = c.get("/api/settings/resume")
assert resp.status_code == 200
assert resp.json()["exists"] is True
def test_post_resume_sync_identity(tmp_path, monkeypatch):
"""POST /api/settings/resume/sync-identity returns 200."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
from dev_api import app
c = TestClient(app)
resp = c.post("/api/settings/resume/sync-identity", json={
"name": "Alice",
"email": "alice@example.com",
"phone": "555-0000",
"linkedin_url": "https://linkedin.com/in/alice",
})
assert resp.status_code == 200
assert resp.json()["ok"] is True
# ── GET/PUT /api/settings/search ──────────────────────────────────────────────
def test_get_search_prefs_returns_dict(tmp_path, monkeypatch):
"""GET /api/settings/search returns a dict with expected fields."""
fake_path = tmp_path / "config" / "search_profiles.yaml"
fake_path.parent.mkdir(parents=True, exist_ok=True)
with open(fake_path, "w") as f:
yaml.dump({"default": {"remote_preference": "remote", "job_boards": []}}, f)
monkeypatch.setattr("dev_api._search_prefs_path", lambda: fake_path)
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/search")
assert resp.status_code == 200
data = resp.json()
assert "remote_preference" in data
assert "job_boards" in data
def test_put_get_search_roundtrip(tmp_path, monkeypatch):
"""PUT then GET search prefs round-trip: saved field is returned."""
fake_path = tmp_path / "config" / "search_profiles.yaml"
fake_path.parent.mkdir(parents=True, exist_ok=True)
monkeypatch.setattr("dev_api._search_prefs_path", lambda: fake_path)
from dev_api import app
c = TestClient(app)
put_resp = c.put("/api/settings/search", json={
"remote_preference": "remote",
"job_titles": ["Engineer"],
"locations": ["Remote"],
"exclude_keywords": [],
"job_boards": [],
"custom_board_urls": [],
"blocklist_companies": [],
"blocklist_industries": [],
"blocklist_locations": [],
})
assert put_resp.status_code == 200
assert put_resp.json()["ok"] is True
get_resp = c.get("/api/settings/search")
assert get_resp.status_code == 200
assert get_resp.json()["remote_preference"] == "remote"
def test_get_search_missing_file_returns_empty(tmp_path, monkeypatch):
"""GET /api/settings/search when file missing returns empty dict."""
fake_path = tmp_path / "config" / "search_profiles.yaml"
monkeypatch.setattr("dev_api._search_prefs_path", lambda: fake_path)
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/search")
assert resp.status_code == 200
assert resp.json() == {}
# ── GET/PUT /api/settings/system/llm ─────────────────────────────────────────
def test_get_llm_config_returns_backends_and_byok(tmp_path, monkeypatch):
"""GET /api/settings/system/llm returns backends list and byok_acknowledged."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
fake_llm_path = tmp_path / "llm.yaml"
with open(fake_llm_path, "w") as f:
yaml.dump({"backends": [{"name": "ollama", "enabled": True}]}, f)
monkeypatch.setattr("dev_api.LLM_CONFIG_PATH", fake_llm_path)
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/system/llm")
assert resp.status_code == 200
data = resp.json()
assert "backends" in data
assert isinstance(data["backends"], list)
assert "byok_acknowledged" in data
def test_byok_ack_adds_backend(tmp_path, monkeypatch):
"""POST byok-ack with backends list then GET shows backend in byok_acknowledged."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml, {"name": "Test", "byok_acknowledged_backends": []})
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
fake_llm_path = tmp_path / "llm.yaml"
monkeypatch.setattr("dev_api.LLM_CONFIG_PATH", fake_llm_path)
from dev_api import app
c = TestClient(app)
ack_resp = c.post("/api/settings/system/llm/byok-ack", json={"backends": ["anthropic"]})
assert ack_resp.status_code == 200
assert ack_resp.json()["ok"] is True
get_resp = c.get("/api/settings/system/llm")
assert get_resp.status_code == 200
assert "anthropic" in get_resp.json()["byok_acknowledged"]
def test_put_llm_config_returns_ok(tmp_path, monkeypatch):
"""PUT /api/settings/system/llm returns ok."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
fake_llm_path = tmp_path / "llm.yaml"
monkeypatch.setattr("dev_api.LLM_CONFIG_PATH", fake_llm_path)
from dev_api import app
c = TestClient(app)
resp = c.put("/api/settings/system/llm", json={
"backends": [{"name": "ollama", "enabled": True, "url": "http://localhost:11434"}],
})
assert resp.status_code == 200
assert resp.json()["ok"] is True
# ── GET /api/settings/system/services ────────────────────────────────────────
def test_get_services_returns_list(client):
"""GET /api/settings/system/services returns a list."""
resp = client.get("/api/settings/system/services")
assert resp.status_code == 200
assert isinstance(resp.json(), list)
def test_get_services_cpu_profile(client):
"""Services list with INFERENCE_PROFILE=cpu contains cpu-compatible services."""
with patch.dict(os.environ, {"INFERENCE_PROFILE": "cpu"}):
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/system/services")
assert resp.status_code == 200
data = resp.json()
assert isinstance(data, list)
# cpu profile should include ollama and searxng
names = [s["name"] for s in data]
assert "ollama" in names or len(names) >= 0 # may vary by env
# ── GET /api/settings/system/email ───────────────────────────────────────────
def test_get_email_has_password_set_bool(tmp_path, monkeypatch):
"""GET /api/settings/system/email has password_set (bool) and no password key."""
fake_email_path = tmp_path / "email.yaml"
monkeypatch.setattr("dev_api._config_dir", lambda: fake_email_path.parent)
with patch("dev_api.get_credential", return_value=None):
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/system/email")
assert resp.status_code == 200
data = resp.json()
assert "password_set" in data
assert isinstance(data["password_set"], bool)
assert "password" not in data
def test_get_email_password_set_true_when_stored(tmp_path, monkeypatch):
"""password_set is True when credential is stored."""
fake_email_path = tmp_path / "email.yaml"
monkeypatch.setattr("dev_api._config_dir", lambda: fake_email_path.parent)
with patch("dev_api.get_credential", return_value="secret"):
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/system/email")
assert resp.status_code == 200
assert resp.json()["password_set"] is True
def test_test_email_bad_host_returns_ok_false(client):
"""POST /api/settings/system/email/test with bad host returns {ok: false}, not 500."""
with patch("dev_api.get_credential", return_value="fakepassword"):
resp = client.post("/api/settings/system/email/test", json={
"host": "imap.nonexistent-host-xyz.invalid",
"port": 993,
"ssl": True,
"username": "test@nonexistent.invalid",
})
assert resp.status_code == 200
assert resp.json()["ok"] is False
def test_test_email_missing_host_returns_ok_false(client):
"""POST email/test with missing host returns {ok: false}."""
with patch("dev_api.get_credential", return_value=None):
resp = client.post("/api/settings/system/email/test", json={
"host": "",
"username": "",
"port": 993,
"ssl": True,
})
assert resp.status_code == 200
assert resp.json()["ok"] is False
# ── GET /api/settings/fine-tune/status ───────────────────────────────────────
def test_finetune_status_returns_status_and_pairs_count(client):
"""GET /api/settings/fine-tune/status returns status and pairs_count."""
# get_task_status is imported inside the endpoint function; patch on the module
with patch("scripts.task_runner.get_task_status", return_value=None, create=True):
resp = client.get("/api/settings/fine-tune/status")
assert resp.status_code == 200
data = resp.json()
assert "status" in data
assert "pairs_count" in data
def test_finetune_status_idle_when_no_task(tmp_path, monkeypatch):
"""Status is 'idle' and pairs_count is 0 when no task exists."""
fake_jsonl = tmp_path / "cover_letters.jsonl" # does not exist -> 0 pairs
monkeypatch.setattr("dev_api._TRAINING_JSONL", fake_jsonl)
with patch("scripts.task_runner.get_task_status", return_value=None, create=True):
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/fine-tune/status")
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "idle"
assert data["pairs_count"] == 0
# ── GET /api/settings/license ────────────────────────────────────────────────
def test_get_license_returns_tier_and_active(tmp_path, monkeypatch):
"""GET /api/settings/license returns tier and active fields."""
fake_license = tmp_path / "license.yaml"
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/license")
assert resp.status_code == 200
data = resp.json()
assert "tier" in data
assert "active" in data
def test_get_license_defaults_to_free(tmp_path, monkeypatch):
"""GET /api/settings/license defaults to free tier when no file."""
fake_license = tmp_path / "license.yaml"
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/license")
assert resp.status_code == 200
data = resp.json()
assert data["tier"] == "free"
assert data["active"] is False
def test_activate_license_valid_key_returns_ok(tmp_path, monkeypatch):
"""POST activate with valid key format returns {ok: true}."""
fake_license = tmp_path / "license.yaml"
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
from dev_api import app
c = TestClient(app)
resp = c.post("/api/settings/license/activate", json={"key": "CFG-PRNG-A1B2-C3D4-E5F6"})
assert resp.status_code == 200
assert resp.json()["ok"] is True
def test_activate_license_invalid_key_returns_ok_false(tmp_path, monkeypatch):
"""POST activate with bad key format returns {ok: false}."""
fake_license = tmp_path / "license.yaml"
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
from dev_api import app
c = TestClient(app)
resp = c.post("/api/settings/license/activate", json={"key": "BADKEY"})
assert resp.status_code == 200
assert resp.json()["ok"] is False
def test_deactivate_license_returns_ok(tmp_path, monkeypatch):
"""POST /api/settings/license/deactivate returns 200 with ok."""
fake_license = tmp_path / "license.yaml"
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
from dev_api import app
c = TestClient(app)
resp = c.post("/api/settings/license/deactivate")
assert resp.status_code == 200
assert resp.json()["ok"] is True
def test_activate_then_deactivate(tmp_path, monkeypatch):
"""Activate then deactivate: active goes False."""
fake_license = tmp_path / "license.yaml"
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
from dev_api import app
c = TestClient(app)
c.post("/api/settings/license/activate", json={"key": "CFG-PRNG-A1B2-C3D4-E5F6"})
c.post("/api/settings/license/deactivate")
resp = c.get("/api/settings/license")
assert resp.status_code == 200
assert resp.json()["active"] is False
# ── GET/PUT /api/settings/privacy ─────────────────────────────────────────────
def test_get_privacy_returns_expected_fields(tmp_path, monkeypatch):
"""GET /api/settings/privacy returns telemetry_opt_in and byok_info_dismissed."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/privacy")
assert resp.status_code == 200
data = resp.json()
assert "telemetry_opt_in" in data
assert "byok_info_dismissed" in data
def test_put_get_privacy_roundtrip(tmp_path, monkeypatch):
"""PUT then GET privacy round-trip: saved values are returned."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
from dev_api import app
c = TestClient(app)
put_resp = c.put("/api/settings/privacy", json={
"telemetry_opt_in": True,
"byok_info_dismissed": True,
})
assert put_resp.status_code == 200
assert put_resp.json()["ok"] is True
get_resp = c.get("/api/settings/privacy")
assert get_resp.status_code == 200
data = get_resp.json()
assert data["telemetry_opt_in"] is True
assert data["byok_info_dismissed"] is True
# ── GET /api/settings/developer ──────────────────────────────────────────────
def test_get_developer_returns_expected_fields(tmp_path, monkeypatch):
"""GET /api/settings/developer returns dev_tier_override and hf_token_set."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
fake_tokens = tmp_path / "tokens.yaml"
monkeypatch.setattr("dev_api._tokens_path", lambda: fake_tokens)
from dev_api import app
c = TestClient(app)
resp = c.get("/api/settings/developer")
assert resp.status_code == 200
data = resp.json()
assert "dev_tier_override" in data
assert "hf_token_set" in data
assert isinstance(data["hf_token_set"], bool)
def test_put_dev_tier_then_get(tmp_path, monkeypatch):
"""PUT dev tier to 'paid' then GET shows dev_tier_override as 'paid'."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml)
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
fake_tokens = tmp_path / "tokens.yaml"
monkeypatch.setattr("dev_api._tokens_path", lambda: fake_tokens)
from dev_api import app
c = TestClient(app)
put_resp = c.put("/api/settings/developer/tier", json={"tier": "paid"})
assert put_resp.status_code == 200
assert put_resp.json()["ok"] is True
get_resp = c.get("/api/settings/developer")
assert get_resp.status_code == 200
assert get_resp.json()["dev_tier_override"] == "paid"
def test_wizard_reset_returns_ok(tmp_path, monkeypatch):
"""POST /api/settings/developer/wizard-reset returns 200 with ok."""
db_dir = tmp_path / "db"
db_dir.mkdir()
cfg_dir = db_dir / "config"
cfg_dir.mkdir()
user_yaml = cfg_dir / "user.yaml"
_write_user_yaml(user_yaml, {"name": "Test", "wizard_complete": True})
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
from dev_api import app
c = TestClient(app)
resp = c.post("/api/settings/developer/wizard-reset")
assert resp.status_code == 200
assert resp.json()["ok"] is True

View file

@ -0,0 +1,164 @@
"""Tests for survey endpoints: vision health, analyze, save response, get history."""
import pytest
from unittest.mock import patch, MagicMock
from fastapi.testclient import TestClient
@pytest.fixture
def client():
import sys
sys.path.insert(0, "/Library/Development/CircuitForge/peregrine/.worktrees/feature-vue-spa")
from dev_api import app
return TestClient(app)
# ── GET /api/vision/health ───────────────────────────────────────────────────
def test_vision_health_available(client):
"""Returns available=true when vision service responds 200."""
mock_resp = MagicMock()
mock_resp.status_code = 200
with patch("dev_api.requests.get", return_value=mock_resp):
resp = client.get("/api/vision/health")
assert resp.status_code == 200
assert resp.json() == {"available": True}
def test_vision_health_unavailable(client):
"""Returns available=false when vision service times out or errors."""
with patch("dev_api.requests.get", side_effect=Exception("timeout")):
resp = client.get("/api/vision/health")
assert resp.status_code == 200
assert resp.json() == {"available": False}
# ── POST /api/jobs/{id}/survey/analyze ──────────────────────────────────────
def test_analyze_text_quick(client):
"""Text mode quick analysis returns output and source=text_paste."""
mock_router = MagicMock()
mock_router.complete.return_value = "1. B — best option"
mock_router.config.get.return_value = ["claude_code", "vllm"]
with patch("dev_api.LLMRouter", return_value=mock_router):
resp = client.post("/api/jobs/1/survey/analyze", json={
"text": "Q1: Do you prefer teamwork?\nA. Solo B. Together",
"mode": "quick",
})
assert resp.status_code == 200
data = resp.json()
assert data["source"] == "text_paste"
assert "B" in data["output"]
# System prompt must be passed for text path
call_kwargs = mock_router.complete.call_args[1]
assert "system" in call_kwargs
assert "culture-fit survey" in call_kwargs["system"]
def test_analyze_text_detailed(client):
"""Text mode detailed analysis passes correct prompt."""
mock_router = MagicMock()
mock_router.complete.return_value = "Option A: good for... Option B: better because..."
mock_router.config.get.return_value = []
with patch("dev_api.LLMRouter", return_value=mock_router):
resp = client.post("/api/jobs/1/survey/analyze", json={
"text": "Q1: Describe your work style.",
"mode": "detailed",
})
assert resp.status_code == 200
assert resp.json()["source"] == "text_paste"
def test_analyze_image(client):
"""Image mode routes through vision path with NO system prompt."""
mock_router = MagicMock()
mock_router.complete.return_value = "1. C — collaborative choice"
mock_router.config.get.return_value = ["vision_service", "claude_code"]
with patch("dev_api.LLMRouter", return_value=mock_router):
resp = client.post("/api/jobs/1/survey/analyze", json={
"image_b64": "aGVsbG8=",
"mode": "quick",
})
assert resp.status_code == 200
data = resp.json()
assert data["source"] == "screenshot"
# No system prompt on vision path
call_kwargs = mock_router.complete.call_args[1]
assert "system" not in call_kwargs
def test_analyze_llm_failure(client):
"""Returns 500 when LLM raises an exception."""
mock_router = MagicMock()
mock_router.complete.side_effect = Exception("LLM unavailable")
mock_router.config.get.return_value = []
with patch("dev_api.LLMRouter", return_value=mock_router):
resp = client.post("/api/jobs/1/survey/analyze", json={
"text": "Q1: test",
"mode": "quick",
})
assert resp.status_code == 500
# ── POST /api/jobs/{id}/survey/responses ────────────────────────────────────
def test_save_response_text(client):
"""Save text response writes to DB and returns id."""
mock_db = MagicMock()
with patch("dev_api._get_db", return_value=mock_db):
with patch("dev_api.insert_survey_response", return_value=42) as mock_insert:
resp = client.post("/api/jobs/1/survey/responses", json={
"mode": "quick",
"source": "text_paste",
"raw_input": "Q1: test question",
"llm_output": "1. B — good reason",
})
assert resp.status_code == 200
assert resp.json()["id"] == 42
# received_at generated by backend — not None
call_args = mock_insert.call_args
assert call_args[1]["received_at"] is not None or call_args[0][3] is not None
def test_save_response_with_image(client, tmp_path, monkeypatch):
"""Save image response writes PNG file and stores path in DB."""
monkeypatch.setenv("STAGING_DB", str(tmp_path / "test.db"))
with patch("dev_api.insert_survey_response", return_value=7) as mock_insert:
with patch("dev_api.Path") as mock_path_cls:
mock_path_cls.return_value.__truediv__ = lambda s, o: tmp_path / o
resp = client.post("/api/jobs/1/survey/responses", json={
"mode": "quick",
"source": "screenshot",
"image_b64": "aGVsbG8=", # valid base64
"llm_output": "1. B — reason",
})
assert resp.status_code == 200
assert resp.json()["id"] == 7
# ── GET /api/jobs/{id}/survey/responses ─────────────────────────────────────
def test_get_history_empty(client):
"""Returns empty list when no history exists."""
with patch("dev_api.get_survey_responses", return_value=[]):
resp = client.get("/api/jobs/1/survey/responses")
assert resp.status_code == 200
assert resp.json() == []
def test_get_history_populated(client):
"""Returns history rows newest first."""
rows = [
{"id": 2, "survey_name": "Round 2", "mode": "detailed", "source": "text_paste",
"raw_input": None, "image_path": None, "llm_output": "Option A is best",
"reported_score": "90%", "received_at": "2026-03-21T14:00:00", "created_at": "2026-03-21T14:00:01"},
{"id": 1, "survey_name": "Round 1", "mode": "quick", "source": "text_paste",
"raw_input": "Q1: test", "image_path": None, "llm_output": "1. B",
"reported_score": None, "received_at": "2026-03-21T12:00:00", "created_at": "2026-03-21T12:00:01"},
]
with patch("dev_api.get_survey_responses", return_value=rows):
resp = client.get("/api/jobs/1/survey/responses")
assert resp.status_code == 200
data = resp.json()
assert len(data) == 2
assert data[0]["id"] == 2
assert data[0]["survey_name"] == "Round 2"

180
tests/test_e2e_helpers.py Normal file
View file

@ -0,0 +1,180 @@
"""Unit tests for E2E harness models and helper utilities."""
import fnmatch
import pytest
from unittest.mock import patch, MagicMock
import time
from tests.e2e.models import ErrorRecord, ModeConfig, diff_errors
import tests.e2e.modes.cloud as cloud_mod # imported early so load_dotenv runs before any monkeypatch
def test_error_record_equality():
a = ErrorRecord(type="exception", message="boom", element_html="<div>boom</div>")
b = ErrorRecord(type="exception", message="boom", element_html="<div>boom</div>")
assert a == b
def test_error_record_inequality():
a = ErrorRecord(type="exception", message="boom", element_html="")
b = ErrorRecord(type="alert", message="boom", element_html="")
assert a != b
def test_diff_errors_returns_new_only():
before = [ErrorRecord("exception", "old error", "")]
after = [
ErrorRecord("exception", "old error", ""),
ErrorRecord("alert", "new error", ""),
]
result = diff_errors(before, after)
assert result == [ErrorRecord("alert", "new error", "")]
def test_diff_errors_empty_when_no_change():
errors = [ErrorRecord("exception", "x", "")]
assert diff_errors(errors, errors) == []
def test_diff_errors_empty_before():
after = [ErrorRecord("alert", "boom", "")]
assert diff_errors([], after) == after
def test_mode_config_expected_failure_match():
config = ModeConfig(
name="demo",
base_url="http://localhost:8504",
auth_setup=lambda ctx: None,
expected_failures=["Fetch*", "Generate Cover Letter"],
results_dir=None,
settings_tabs=["👤 My Profile"],
)
assert config.matches_expected_failure("Fetch New Jobs")
assert config.matches_expected_failure("Generate Cover Letter")
assert not config.matches_expected_failure("View Jobs")
def test_mode_config_no_expected_failures():
config = ModeConfig(
name="local",
base_url="http://localhost:8502",
auth_setup=lambda ctx: None,
expected_failures=[],
results_dir=None,
settings_tabs=[],
)
assert not config.matches_expected_failure("Fetch New Jobs")
def test_get_jwt_strategy_b_fallback(monkeypatch):
"""Falls back to persistent JWT when no email env var set."""
monkeypatch.delenv("E2E_DIRECTUS_EMAIL", raising=False)
monkeypatch.setenv("E2E_DIRECTUS_JWT", "persistent.jwt.token")
cloud_mod._token_cache.update({"token": None, "expires_at": 0.0})
assert cloud_mod._get_jwt() == "persistent.jwt.token"
def test_get_jwt_strategy_b_raises_if_no_token(monkeypatch):
"""Raises if neither email nor JWT env var is set."""
monkeypatch.delenv("E2E_DIRECTUS_EMAIL", raising=False)
monkeypatch.delenv("E2E_DIRECTUS_JWT", raising=False)
cloud_mod._token_cache.update({"token": None, "expires_at": 0.0})
with pytest.raises(RuntimeError, match="Cloud mode requires"):
cloud_mod._get_jwt()
def test_get_jwt_strategy_a_login(monkeypatch):
"""Strategy A: calls Directus /auth/login and caches token."""
monkeypatch.setenv("E2E_DIRECTUS_EMAIL", "e2e@circuitforge.tech")
monkeypatch.setenv("E2E_DIRECTUS_PASSWORD", "testpass")
monkeypatch.setenv("E2E_DIRECTUS_URL", "http://fake-directus:8055")
cloud_mod._token_cache.update({"token": None, "expires_at": 0.0})
mock_resp = MagicMock()
mock_resp.json.return_value = {"data": {"access_token": "fresh.jwt", "expires": 900_000}}
mock_resp.raise_for_status = lambda: None
with patch("tests.e2e.modes.cloud.requests.post", return_value=mock_resp) as mock_post:
token = cloud_mod._get_jwt()
assert token == "fresh.jwt"
mock_post.assert_called_once()
assert cloud_mod._token_cache["token"] == "fresh.jwt"
def test_get_jwt_uses_cache(monkeypatch):
"""Returns cached token if not yet expired."""
monkeypatch.setenv("E2E_DIRECTUS_EMAIL", "e2e@circuitforge.tech")
cloud_mod._token_cache.update({"token": "cached.jwt", "expires_at": time.time() + 500})
with patch("tests.e2e.modes.cloud.requests.post") as mock_post:
token = cloud_mod._get_jwt()
assert token == "cached.jwt"
mock_post.assert_not_called()
def test_get_page_errors_finds_exceptions(monkeypatch):
"""get_page_errors returns ErrorRecord for stException elements."""
from tests.e2e.conftest import get_page_errors
mock_el = MagicMock()
mock_el.text_content.return_value = "RuntimeError: boom"
mock_el.inner_html.return_value = "<div>RuntimeError: boom</div>"
mock_page = MagicMock()
mock_page.query_selector_all.side_effect = lambda sel: (
[mock_el] if "stException" in sel else []
)
errors = get_page_errors(mock_page)
assert len(errors) == 1
assert errors[0].type == "exception"
assert "boom" in errors[0].message
def test_get_page_errors_finds_alert_errors(monkeypatch):
"""get_page_errors returns ErrorRecord for stAlert with stAlertContentError child."""
from tests.e2e.conftest import get_page_errors
mock_child = MagicMock()
mock_el = MagicMock()
mock_el.query_selector.return_value = mock_child
mock_el.text_content.return_value = "Something went wrong"
mock_el.inner_html.return_value = "<div>Something went wrong</div>"
mock_page = MagicMock()
mock_page.query_selector_all.side_effect = lambda sel: (
[] if "stException" in sel else [mock_el]
)
errors = get_page_errors(mock_page)
assert len(errors) == 1
assert errors[0].type == "alert"
def test_get_page_errors_ignores_non_error_alerts(monkeypatch):
"""get_page_errors does NOT flag st.warning() or st.info() alerts."""
from tests.e2e.conftest import get_page_errors
mock_el = MagicMock()
mock_el.query_selector.return_value = None
mock_el.inner_text.return_value = "Just a warning"
mock_page = MagicMock()
mock_page.query_selector_all.side_effect = lambda sel: (
[] if "stException" in sel else [mock_el]
)
errors = get_page_errors(mock_page)
assert errors == []
def test_get_console_errors_filters_noise():
"""get_console_errors filters benign Streamlit WebSocket reconnect messages."""
from tests.e2e.conftest import get_console_errors
messages = [
MagicMock(type="error", text="WebSocket connection closed"),
MagicMock(type="error", text="TypeError: cannot read property"),
MagicMock(type="log", text="irrelevant"),
]
errors = get_console_errors(messages)
assert errors == ["TypeError: cannot read property"]

View file

@ -1024,8 +1024,8 @@ def test_sync_all_per_job_exception_continues(tmp_path):
# ── Performance / edge cases ──────────────────────────────────────────────────
def test_parse_message_large_body_truncated():
"""Body longer than 4000 chars is silently truncated to 4000."""
def test_parse_message_large_body_not_truncated():
"""Body longer than 4000 chars is stored in full (no truncation)."""
from scripts.imap_sync import _parse_message
big_body = ("x" * 10_000).encode()
@ -1037,7 +1037,7 @@ def test_parse_message_large_body_truncated():
conn.fetch.return_value = ("OK", [(b"1 (RFC822)", raw)])
result = _parse_message(conn, b"1")
assert result is not None
assert len(result["body"]) <= 4000
assert len(result["body"]) == 10_000
def test_parse_message_binary_attachment_no_crash():

View file

@ -24,7 +24,7 @@ def test_router_uses_first_reachable_backend():
mock_response.choices[0].message.content = "hello"
with patch.object(router, "_is_reachable", side_effect=[False, True, True, True, True]), \
patch("scripts.llm_router.OpenAI") as MockOpenAI:
patch("circuitforge_core.llm.router.OpenAI") as MockOpenAI:
instance = MockOpenAI.return_value
instance.chat.completions.create.return_value = mock_response
mock_model = MagicMock()
@ -54,7 +54,7 @@ def test_is_reachable_returns_false_on_connection_error():
router = LLMRouter(CONFIG_PATH)
with patch("scripts.llm_router.requests.get", side_effect=requests.ConnectionError):
with patch("circuitforge_core.llm.router.requests.get", side_effect=requests.ConnectionError):
result = router._is_reachable("http://localhost:9999/v1")
assert result is False
@ -92,8 +92,8 @@ def test_complete_skips_backend_without_image_support(tmp_path):
mock_resp.status_code = 200
mock_resp.json.return_value = {"text": "B — collaborative"}
with patch("scripts.llm_router.requests.get") as mock_get, \
patch("scripts.llm_router.requests.post") as mock_post:
with patch("circuitforge_core.llm.router.requests.get") as mock_get, \
patch("circuitforge_core.llm.router.requests.post") as mock_post:
# health check returns ok for vision_service
mock_get.return_value = MagicMock(status_code=200)
mock_post.return_value = mock_resp
@ -127,7 +127,7 @@ def test_complete_without_images_skips_vision_service(tmp_path):
cfg_file.write_text(yaml.dump(cfg))
router = LLMRouter(config_path=cfg_file)
with patch("scripts.llm_router.requests.post") as mock_post:
with patch("circuitforge_core.llm.router.requests.post") as mock_post:
try:
router.complete("text only prompt")
except RuntimeError:

View file

@ -0,0 +1,132 @@
"""Tests for Peregrine's LLMRouter shim — priority fallback logic."""
import sys
from pathlib import Path
from unittest.mock import patch, MagicMock, call
sys.path.insert(0, str(Path(__file__).parent.parent))
def _import_fresh():
"""Import scripts.llm_router fresh (bypass module cache)."""
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
return mod
# ---------------------------------------------------------------------------
# Test 1: local config/llm.yaml takes priority when it exists
# ---------------------------------------------------------------------------
def test_uses_local_yaml_when_present():
"""When config/llm.yaml exists locally, super().__init__ is called with that path."""
import scripts.llm_router as shim_mod
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
local_path = Path(shim_mod.__file__).parent.parent / "config" / "llm.yaml"
user_path = Path.home() / ".config" / "circuitforge" / "llm.yaml"
def fake_exists(self):
return self == local_path # only the local path "exists"
captured = {}
def fake_core_init(self, config_path=None):
captured["config_path"] = config_path
self.config = {}
with patch.object(Path, "exists", fake_exists), \
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
mod.LLMRouter()
assert captured.get("config_path") == local_path, (
f"Expected super().__init__ to be called with local path {local_path}, "
f"got {captured.get('config_path')}"
)
# ---------------------------------------------------------------------------
# Test 2: falls through to env-var auto-config when neither yaml exists
# ---------------------------------------------------------------------------
def test_falls_through_to_env_when_no_yamls():
"""When no yaml files exist, super().__init__ is called with no args (env-var path)."""
import scripts.llm_router as shim_mod
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
captured = {}
def fake_exists(self):
return False # no yaml files exist anywhere
def fake_core_init(self, config_path=None):
# Record whether a path was passed
captured["config_path"] = config_path
captured["called"] = True
self.config = {}
with patch.object(Path, "exists", fake_exists), \
patch.object(_CoreLLMRouter, "__init__", fake_core_init):
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
mod.LLMRouter()
assert captured.get("called"), "super().__init__ was never called"
# When called with no args, config_path defaults to None in our mock,
# meaning the shim correctly fell through to env-var auto-config
assert captured.get("config_path") is None, (
f"Expected super().__init__ to be called with no explicit path (None), "
f"got {captured.get('config_path')}"
)
# ---------------------------------------------------------------------------
# Test 3: module-level complete() singleton is only instantiated once
# ---------------------------------------------------------------------------
def test_complete_singleton_is_reused():
"""complete() reuses the same LLMRouter instance across multiple calls."""
import importlib
import scripts.llm_router as mod
importlib.reload(mod)
# Reset singleton
mod._router = None
instantiation_count = [0]
original_init = mod.LLMRouter.__init__
mock_router = MagicMock()
mock_router.complete.return_value = "OK"
original_class = mod.LLMRouter
class CountingRouter(original_class):
def __init__(self):
instantiation_count[0] += 1
# Bypass real __init__ to avoid needing config files
self.config = {}
def complete(self, prompt, system=None):
return "OK"
# Patch the class in the module
mod.LLMRouter = CountingRouter
mod._router = None
result1 = mod.complete("first call")
result2 = mod.complete("second call")
assert result1 == "OK"
assert result2 == "OK"
assert instantiation_count[0] == 1, (
f"Expected LLMRouter to be instantiated exactly once, "
f"got {instantiation_count[0]} instantiation(s)"
)
# Restore
mod.LLMRouter = original_class

View file

@ -0,0 +1,80 @@
"""Tests: preflight writes OLLAMA_HOST to .env when Ollama is adopted from host."""
import sys
from pathlib import Path
from unittest.mock import patch, call
sys.path.insert(0, str(Path(__file__).parent.parent))
import scripts.preflight as pf
def _make_ports(ollama_external: bool = True, ollama_port: int = 11434) -> dict:
"""Build a minimal ports dict as returned by preflight's port-scanning logic."""
return {
"ollama": {
"resolved": ollama_port,
"external": ollama_external,
"stub_port": 54321,
"env_var": "OLLAMA_PORT",
"adoptable": True,
},
"streamlit": {
"resolved": 8502,
"external": False,
"stub_port": 8502,
"env_var": "STREAMLIT_PORT",
"adoptable": False,
},
}
def _capture_env_updates(ports: dict) -> dict:
"""Run the env_updates construction block from preflight.main() and return the result.
We extract this logic from main() so tests can call it directly without
needing to simulate the full CLI argument parsing and system probe flow.
The block under test is the `if not args.check_only:` section.
"""
captured = {}
def fake_write_env(updates: dict) -> None:
captured.update(updates)
with patch.object(pf, "write_env", side_effect=fake_write_env), \
patch.object(pf, "update_llm_yaml"), \
patch.object(pf, "write_compose_override"):
# Replicate the env_updates block from preflight.main() as faithfully as possible
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
env_updates["RECOMMENDED_PROFILE"] = "single-gpu"
# ---- Code under test: the OLLAMA_HOST adoption block ----
ollama_info = ports.get("ollama")
if ollama_info and ollama_info.get("external"):
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
# ---------------------------------------------------------
pf.write_env(env_updates)
return captured
def test_ollama_host_written_when_adopted():
"""OLLAMA_HOST is added when Ollama is adopted from the host (external=True)."""
ports = _make_ports(ollama_external=True, ollama_port=11434)
result = _capture_env_updates(ports)
assert "OLLAMA_HOST" in result
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11434"
def test_ollama_host_not_written_when_docker_managed():
"""OLLAMA_HOST is NOT added when Ollama runs in Docker (external=False)."""
ports = _make_ports(ollama_external=False)
result = _capture_env_updates(ports)
assert "OLLAMA_HOST" not in result
def test_ollama_host_reflects_adopted_port():
"""OLLAMA_HOST uses the actual adopted port, not the default."""
ports = _make_ports(ollama_external=True, ollama_port=11500)
result = _capture_env_updates(ports)
assert result["OLLAMA_HOST"] == "http://host.docker.internal:11500"

View file

@ -0,0 +1,288 @@
# tests/test_resume_optimizer.py
"""Tests for scripts/resume_optimizer.py"""
import json
import pytest
from unittest.mock import MagicMock, patch
# ── Fixtures ─────────────────────────────────────────────────────────────────
SAMPLE_RESUME = {
"name": "Alex Rivera",
"email": "alex@example.com",
"phone": "555-1234",
"career_summary": "Experienced Customer Success Manager with a track record of growth.",
"skills": ["Salesforce", "Python", "customer success"],
"experience": [
{
"title": "Customer Success Manager",
"company": "Acme Corp",
"start_date": "2021",
"end_date": "present",
"bullets": [
"Managed a portfolio of 120 enterprise accounts.",
"Reduced churn by 18% through proactive outreach.",
],
},
{
"title": "Support Engineer",
"company": "Beta Inc",
"start_date": "2018",
"end_date": "2021",
"bullets": ["Resolved escalations for top-tier clients."],
},
],
"education": [
{
"degree": "B.S.",
"field": "Computer Science",
"institution": "State University",
"graduation_year": "2018",
}
],
"achievements": [],
}
SAMPLE_JD = (
"We are looking for a Customer Success Manager with Gainsight, cross-functional "
"leadership experience, and strong stakeholder management skills. AWS knowledge a plus."
)
# ── extract_jd_signals ────────────────────────────────────────────────────────
def test_extract_jd_signals_returns_list():
"""extract_jd_signals returns a list even when LLM and TF-IDF both fail."""
from scripts.resume_optimizer import extract_jd_signals
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.side_effect = Exception("no LLM")
result = extract_jd_signals(SAMPLE_JD, resume_text="Python developer")
assert isinstance(result, list)
def test_extract_jd_signals_llm_path_parses_json_array():
"""extract_jd_signals merges LLM-extracted signals with TF-IDF gaps."""
from scripts.resume_optimizer import extract_jd_signals
llm_response = '["Gainsight", "cross-functional leadership", "stakeholder management"]'
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.return_value = llm_response
result = extract_jd_signals(SAMPLE_JD)
assert "Gainsight" in result
assert "cross-functional leadership" in result
def test_extract_jd_signals_deduplicates():
"""extract_jd_signals deduplicates terms across LLM and TF-IDF sources."""
from scripts.resume_optimizer import extract_jd_signals
llm_response = '["Python", "AWS", "Python"]'
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.return_value = llm_response
result = extract_jd_signals(SAMPLE_JD)
assert result.count("Python") == 1
def test_extract_jd_signals_handles_malformed_llm_json():
"""extract_jd_signals falls back gracefully when LLM returns non-JSON."""
from scripts.resume_optimizer import extract_jd_signals
with patch("scripts.llm_router.LLMRouter") as MockRouter:
MockRouter.return_value.complete.return_value = "Here are some keywords: Gainsight, AWS"
result = extract_jd_signals(SAMPLE_JD)
# Should still return a list (may be empty if TF-IDF also silent)
assert isinstance(result, list)
# ── prioritize_gaps ───────────────────────────────────────────────────────────
def test_prioritize_gaps_skips_existing_terms():
"""prioritize_gaps excludes terms already present in the resume."""
from scripts.resume_optimizer import prioritize_gaps
# "Salesforce" is already in SAMPLE_RESUME skills
result = prioritize_gaps(["Salesforce", "Gainsight"], SAMPLE_RESUME)
terms = [r["term"] for r in result]
assert "Salesforce" not in terms
assert "Gainsight" in terms
def test_prioritize_gaps_routes_tech_terms_to_skills():
"""prioritize_gaps maps known tech keywords to the skills section at priority 1."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["AWS", "Docker"], SAMPLE_RESUME)
by_term = {r["term"]: r for r in result}
assert by_term["AWS"]["section"] == "skills"
assert by_term["AWS"]["priority"] == 1
assert by_term["Docker"]["section"] == "skills"
def test_prioritize_gaps_routes_leadership_terms_to_summary():
"""prioritize_gaps maps leadership/executive signals to the summary section."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["cross-functional", "stakeholder"], SAMPLE_RESUME)
by_term = {r["term"]: r for r in result}
assert by_term["cross-functional"]["section"] == "summary"
assert by_term["stakeholder"]["section"] == "summary"
def test_prioritize_gaps_multi_word_routes_to_experience():
"""Multi-word phrases not in skills/summary lists go to experience at priority 2."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["proactive client engagement"], SAMPLE_RESUME)
assert result[0]["section"] == "experience"
assert result[0]["priority"] == 2
def test_prioritize_gaps_single_word_is_lowest_priority():
"""Single generic words not in any list go to experience at priority 3."""
from scripts.resume_optimizer import prioritize_gaps
result = prioritize_gaps(["innovation"], SAMPLE_RESUME)
assert result[0]["priority"] == 3
def test_prioritize_gaps_sorted_by_priority():
"""prioritize_gaps output is sorted ascending by priority (1 first)."""
from scripts.resume_optimizer import prioritize_gaps
gaps = ["innovation", "AWS", "cross-functional", "managed service contracts"]
result = prioritize_gaps(gaps, SAMPLE_RESUME)
priorities = [r["priority"] for r in result]
assert priorities == sorted(priorities)
# ── hallucination_check ───────────────────────────────────────────────────────
def test_hallucination_check_passes_unchanged_resume():
"""hallucination_check returns True when rewrite has no new employers or institutions."""
from scripts.resume_optimizer import hallucination_check
# Shallow rewrite: same structure
rewritten = {
**SAMPLE_RESUME,
"career_summary": "Dynamic CSM with cross-functional stakeholder management experience.",
}
assert hallucination_check(SAMPLE_RESUME, rewritten) is True
def test_hallucination_check_fails_on_new_employer():
"""hallucination_check returns False when a new company is introduced."""
from scripts.resume_optimizer import hallucination_check
fabricated_entry = {
"title": "VP of Customer Success",
"company": "Fabricated Corp",
"start_date": "2019",
"end_date": "2021",
"bullets": ["Led a team of 30."],
}
rewritten = dict(SAMPLE_RESUME)
rewritten["experience"] = SAMPLE_RESUME["experience"] + [fabricated_entry]
assert hallucination_check(SAMPLE_RESUME, rewritten) is False
def test_hallucination_check_fails_on_new_institution():
"""hallucination_check returns False when a new educational institution appears."""
from scripts.resume_optimizer import hallucination_check
rewritten = dict(SAMPLE_RESUME)
rewritten["education"] = [
*SAMPLE_RESUME["education"],
{"degree": "M.S.", "field": "Data Science", "institution": "MIT", "graduation_year": "2020"},
]
assert hallucination_check(SAMPLE_RESUME, rewritten) is False
# ── render_resume_text ────────────────────────────────────────────────────────
def test_render_resume_text_contains_all_sections():
"""render_resume_text produces plain text containing all resume sections."""
from scripts.resume_optimizer import render_resume_text
text = render_resume_text(SAMPLE_RESUME)
assert "Alex Rivera" in text
assert "SUMMARY" in text
assert "EXPERIENCE" in text
assert "Customer Success Manager" in text
assert "Acme Corp" in text
assert "EDUCATION" in text
assert "State University" in text
assert "SKILLS" in text
assert "Salesforce" in text
def test_render_resume_text_omits_empty_sections():
"""render_resume_text skips sections that have no content."""
from scripts.resume_optimizer import render_resume_text
sparse = {
"name": "Jordan Lee",
"email": "",
"phone": "",
"career_summary": "",
"skills": [],
"experience": [],
"education": [],
"achievements": [],
}
text = render_resume_text(sparse)
assert "EXPERIENCE" not in text
assert "SKILLS" not in text
# ── db integration ────────────────────────────────────────────────────────────
def test_save_and_get_optimized_resume(tmp_path):
"""save_optimized_resume persists and get_optimized_resume retrieves the data."""
from scripts.db import init_db, save_optimized_resume, get_optimized_resume
db_path = tmp_path / "test.db"
init_db(db_path)
# Insert a minimal job to satisfy FK
import sqlite3
conn = sqlite3.connect(db_path)
conn.execute(
"INSERT INTO jobs (id, title, company, url, source, status) VALUES (1, 'CSM', 'Acme', 'http://x.com', 'test', 'approved')"
)
conn.commit()
conn.close()
gap_report = json.dumps([{"term": "Gainsight", "section": "skills", "priority": 1, "rationale": "test"}])
save_optimized_resume(db_path, job_id=1, text="Rewritten resume text.", gap_report=gap_report)
result = get_optimized_resume(db_path, job_id=1)
assert result["optimized_resume"] == "Rewritten resume text."
parsed = json.loads(result["ats_gap_report"])
assert parsed[0]["term"] == "Gainsight"
def test_get_optimized_resume_returns_empty_for_missing(tmp_path):
"""get_optimized_resume returns empty strings when no record exists."""
from scripts.db import init_db, get_optimized_resume
db_path = tmp_path / "test.db"
init_db(db_path)
result = get_optimized_resume(db_path, job_id=999)
assert result["optimized_resume"] == ""
assert result["ats_gap_report"] == ""

View file

@ -109,24 +109,33 @@ def test_missing_budget_logs_warning(tmp_db, caplog):
ts.LLM_TASK_TYPES = frozenset(original)
def test_cpu_only_system_gets_unlimited_vram(tmp_db, monkeypatch):
"""_available_vram is 999.0 when _get_gpus() returns empty list."""
# Patch the module-level _get_gpus in task_scheduler (not preflight)
# so __init__'s _ts_mod._get_gpus() call picks up the mock.
def test_cpu_only_system_creates_scheduler(tmp_db, monkeypatch):
"""Scheduler constructs without error when _get_gpus() returns empty list.
LocalScheduler has no VRAM gating it runs tasks regardless of GPU count.
VRAM-aware scheduling is handled by circuitforge_orch's coordinator.
"""
monkeypatch.setattr("scripts.task_scheduler._get_gpus", lambda: [])
s = TaskScheduler(tmp_db, _noop_run_task)
assert s._available_vram == 999.0
# Scheduler still has correct budgets configured; no VRAM attribute expected
# Scheduler constructed successfully; budgets contain all LLM task types.
# Does not assert exact values -- a sibling test may write a config override
# to the shared pytest tmp dir, causing _load_config_overrides to pick it up.
assert set(s._budgets.keys()) >= LLM_TASK_TYPES
def test_gpu_vram_summed_across_all_gpus(tmp_db, monkeypatch):
"""_available_vram sums vram_total_gb across all detected GPUs."""
def test_gpu_detection_does_not_affect_local_scheduler(tmp_db, monkeypatch):
"""LocalScheduler ignores GPU VRAM — it has no _available_vram attribute.
VRAM-gated concurrency requires circuitforge_orch (Paid tier).
"""
fake_gpus = [
{"name": "RTX 3090", "vram_total_gb": 24.0, "vram_free_gb": 20.0},
{"name": "RTX 3090", "vram_total_gb": 24.0, "vram_free_gb": 18.0},
]
monkeypatch.setattr("scripts.task_scheduler._get_gpus", lambda: fake_gpus)
s = TaskScheduler(tmp_db, _noop_run_task)
assert s._available_vram == 48.0
assert not hasattr(s, "_available_vram")
def test_enqueue_adds_taskspec_to_deque(tmp_db):
@ -206,40 +215,37 @@ def _make_recording_run_task(log: list, done_event: threading.Event, expected: i
return _run
def _start_scheduler(tmp_db, run_task_fn, available_vram=999.0):
def _start_scheduler(tmp_db, run_task_fn):
s = TaskScheduler(tmp_db, run_task_fn)
s._available_vram = available_vram
s.start()
return s
# ── Tests ─────────────────────────────────────────────────────────────────────
def test_deepest_queue_wins_first_slot(tmp_db):
"""Type with more queued tasks starts first when VRAM only fits one type."""
def test_all_task_types_complete(tmp_db):
"""Scheduler runs tasks from multiple types; all complete.
LocalScheduler runs type batches concurrently (no VRAM gating).
VRAM-gated sequential scheduling requires circuitforge_orch.
"""
log, done = [], threading.Event()
# Build scheduler but DO NOT start it yet — enqueue all tasks first
# so the scheduler sees the full picture on its very first wake.
run_task_fn = _make_recording_run_task(log, done, 4)
s = TaskScheduler(tmp_db, run_task_fn)
s._available_vram = 3.0 # fits cover_letter (2.5) but not +company_research (5.0)
# Enqueue cover_letter (3 tasks) and company_research (1 task) before start.
# cover_letter has the deeper queue and must win the first batch slot.
for i in range(3):
s.enqueue(i + 1, "cover_letter", i + 1, None)
s.enqueue(4, "company_research", 4, None)
s.start() # scheduler now sees all tasks atomically on its first iteration
s.start()
assert done.wait(timeout=5.0), "timed out — not all 4 tasks completed"
s.shutdown()
assert len(log) == 4
cl = [i for i, (_, t) in enumerate(log) if t == "cover_letter"]
cr = [i for i, (_, t) in enumerate(log) if t == "company_research"]
cl = [t for _, t in log if t == "cover_letter"]
cr = [t for _, t in log if t == "company_research"]
assert len(cl) == 3 and len(cr) == 1
assert max(cl) < min(cr), "All cover_letter tasks must finish before company_research starts"
def test_fifo_within_type(tmp_db):
@ -256,8 +262,8 @@ def test_fifo_within_type(tmp_db):
assert [task_id for task_id, _ in log] == [10, 20, 30]
def test_concurrent_batches_when_vram_allows(tmp_db):
"""Two type batches start simultaneously when VRAM fits both."""
def test_concurrent_batches_different_types(tmp_db):
"""Two type batches run concurrently (LocalScheduler has no VRAM gating)."""
started = {"cover_letter": threading.Event(), "company_research": threading.Event()}
all_done = threading.Event()
log = []
@ -268,8 +274,7 @@ def test_concurrent_batches_when_vram_allows(tmp_db):
if len(log) >= 2:
all_done.set()
# VRAM=10.0 fits both cover_letter (2.5) and company_research (5.0) simultaneously
s = _start_scheduler(tmp_db, run_task, available_vram=10.0)
s = _start_scheduler(tmp_db, run_task)
s.enqueue(1, "cover_letter", 1, None)
s.enqueue(2, "company_research", 2, None)
@ -307,8 +312,15 @@ def test_new_tasks_picked_up_mid_batch(tmp_db):
assert log == [1, 2]
def test_worker_crash_releases_vram(tmp_db):
"""If _run_task raises, _reserved_vram returns to 0 and scheduler continues."""
@pytest.mark.filterwarnings("ignore::pytest.PytestUnhandledThreadExceptionWarning")
def test_worker_crash_does_not_stall_scheduler(tmp_db):
"""If _run_task raises, the scheduler continues processing the next task.
The batch_worker intentionally lets the RuntimeError propagate to the thread
boundary (so LocalScheduler can detect crash vs. normal exit). This produces
a PytestUnhandledThreadExceptionWarning -- suppressed here because it is the
expected behavior under test.
"""
log, done = [], threading.Event()
def run_task(db_path, task_id, task_type, job_id, params):
@ -317,16 +329,15 @@ def test_worker_crash_releases_vram(tmp_db):
log.append(task_id)
done.set()
s = _start_scheduler(tmp_db, run_task, available_vram=3.0)
s = _start_scheduler(tmp_db, run_task)
s.enqueue(1, "cover_letter", 1, None)
s.enqueue(2, "cover_letter", 2, None)
assert done.wait(timeout=5.0), "timed out — task 2 never completed after task 1 crash"
s.shutdown()
# Second task still ran, VRAM was released
# Second task still ran despite first crashing
assert 2 in log
assert s._reserved_vram == 0.0
def test_get_scheduler_returns_singleton(tmp_db):
@ -470,3 +481,14 @@ def test_llm_tasks_routed_to_scheduler(tmp_db):
task_runner.submit_task(tmp_db, "cover_letter", 1)
assert "cover_letter" in enqueue_calls
def test_shim_exports_unchanged_api():
"""Peregrine shim must re-export LLM_TASK_TYPES, get_scheduler, reset_scheduler."""
from scripts.task_scheduler import LLM_TASK_TYPES, get_scheduler, reset_scheduler
assert "cover_letter" in LLM_TASK_TYPES
assert "company_research" in LLM_TASK_TYPES
assert "wizard_generate" in LLM_TASK_TYPES
assert "resume_optimize" in LLM_TASK_TYPES
assert callable(get_scheduler)
assert callable(reset_scheduler)

105
tests/test_ui_switcher.py Normal file
View file

@ -0,0 +1,105 @@
"""Tests for app/components/ui_switcher.py.
Streamlit is not running during tests mock all st.* calls.
"""
import sys
from pathlib import Path
from unittest.mock import patch
import pytest
import yaml
sys.path.insert(0, str(Path(__file__).parent.parent))
@pytest.fixture
def profile_yaml(tmp_path):
data = {"name": "Test", "ui_preference": "streamlit", "wizard_complete": True}
p = tmp_path / "user.yaml"
p.write_text(yaml.dump(data))
return p
def test_sync_cookie_injects_vue_js(profile_yaml, monkeypatch):
"""When ui_preference is vue, JS sets prgn_ui=vue."""
import yaml as _yaml
profile_yaml.write_text(_yaml.dump({"name": "T", "ui_preference": "vue"}))
injected = []
monkeypatch.setattr("streamlit.components.v1.html", lambda html, height=0: injected.append(html))
monkeypatch.setattr("streamlit.query_params", {}, raising=False)
from app.components.ui_switcher import sync_ui_cookie
sync_ui_cookie(profile_yaml, tier="paid")
assert any("prgn_ui=vue" in s for s in injected)
def test_sync_cookie_injects_streamlit_js(profile_yaml, monkeypatch):
"""When ui_preference is streamlit, JS sets prgn_ui=streamlit."""
injected = []
monkeypatch.setattr("streamlit.components.v1.html", lambda html, height=0: injected.append(html))
monkeypatch.setattr("streamlit.query_params", {}, raising=False)
from app.components.ui_switcher import sync_ui_cookie
sync_ui_cookie(profile_yaml, tier="paid")
assert any("prgn_ui=streamlit" in s for s in injected)
def test_sync_cookie_prgn_switch_param_overrides_yaml(profile_yaml, monkeypatch):
"""?prgn_switch=streamlit in query params resets ui_preference to streamlit."""
import yaml as _yaml
profile_yaml.write_text(_yaml.dump({"name": "T", "ui_preference": "vue"}))
injected = []
monkeypatch.setattr("streamlit.components.v1.html", lambda html, height=0: injected.append(html))
monkeypatch.setattr("streamlit.query_params", {"prgn_switch": "streamlit"}, raising=False)
with patch('app.components.ui_switcher._DEMO_MODE', False):
from app.components.ui_switcher import sync_ui_cookie
sync_ui_cookie(profile_yaml, tier="paid")
# user.yaml should now say streamlit
saved = _yaml.safe_load(profile_yaml.read_text())
assert saved["ui_preference"] == "streamlit"
# JS should set cookie to streamlit
assert any("prgn_ui=streamlit" in s for s in injected)
def test_sync_cookie_free_tier_keeps_vue(profile_yaml, monkeypatch):
"""Free-tier user with vue preference keeps vue (vue_ui_beta is free tier).
Previously this test verified a downgrade to streamlit. Vue SPA was opened
to free tier in issue #20 — the downgrade path no longer triggers.
"""
import yaml as _yaml
profile_yaml.write_text(_yaml.dump({"name": "T", "ui_preference": "vue"}))
injected = []
monkeypatch.setattr("streamlit.components.v1.html", lambda html, height=0: injected.append(html))
monkeypatch.setattr("streamlit.query_params", {}, raising=False)
with patch('app.components.ui_switcher._DEMO_MODE', False):
from app.components.ui_switcher import sync_ui_cookie
sync_ui_cookie(profile_yaml, tier="free")
saved = _yaml.safe_load(profile_yaml.read_text())
assert saved["ui_preference"] == "vue"
assert any("prgn_ui=vue" in s for s in injected)
def test_switch_ui_writes_yaml_and_calls_sync(profile_yaml, monkeypatch):
"""switch_ui(to='vue') writes user.yaml and calls sync."""
import yaml as _yaml
synced = []
monkeypatch.setattr("streamlit.components.v1.html", lambda html, height=0: synced.append(html))
monkeypatch.setattr("streamlit.query_params", {}, raising=False)
monkeypatch.setattr("streamlit.rerun", lambda: None)
with patch('app.components.ui_switcher._DEMO_MODE', False):
from app.components.ui_switcher import switch_ui
switch_ui(profile_yaml, to="vue", tier="paid")
saved = _yaml.safe_load(profile_yaml.read_text())
assert saved["ui_preference"] == "vue"
assert any("prgn_ui=vue" in s for s in synced)

Some files were not shown because too many files have changed in this diff Show more