Compare commits
No commits in common. "v0.7.1" and "main" have entirely different histories.
177 changed files with 22751 additions and 7128 deletions
44
.cliff.toml
Normal file
44
.cliff.toml
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
# git-cliff changelog configuration for Peregrine
|
||||||
|
# See: https://git-cliff.org/docs/configuration
|
||||||
|
|
||||||
|
[changelog]
|
||||||
|
header = """
|
||||||
|
# Changelog\n
|
||||||
|
"""
|
||||||
|
body = """
|
||||||
|
{% if version %}\
|
||||||
|
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
|
||||||
|
{% else %}\
|
||||||
|
## [Unreleased]
|
||||||
|
{% endif %}\
|
||||||
|
{% for group, commits in commits | group_by(attribute="group") %}
|
||||||
|
### {{ group | upper_first }}
|
||||||
|
{% for commit in commits %}
|
||||||
|
- {% if commit.scope %}**{{ commit.scope }}:** {% endif %}{{ commit.message | upper_first }}\
|
||||||
|
{% endfor %}
|
||||||
|
{% endfor %}\n
|
||||||
|
"""
|
||||||
|
trim = true
|
||||||
|
|
||||||
|
[git]
|
||||||
|
conventional_commits = true
|
||||||
|
filter_unconventional = true
|
||||||
|
split_commits = false
|
||||||
|
commit_preprocessors = []
|
||||||
|
commit_parsers = [
|
||||||
|
{ message = "^feat", group = "Features" },
|
||||||
|
{ message = "^fix", group = "Bug Fixes" },
|
||||||
|
{ message = "^perf", group = "Performance" },
|
||||||
|
{ message = "^refactor", group = "Refactoring" },
|
||||||
|
{ message = "^docs", group = "Documentation" },
|
||||||
|
{ message = "^test", group = "Testing" },
|
||||||
|
{ message = "^chore", group = "Chores" },
|
||||||
|
{ message = "^ci", group = "CI/CD" },
|
||||||
|
{ message = "^revert", group = "Reverts" },
|
||||||
|
]
|
||||||
|
filter_commits = false
|
||||||
|
tag_pattern = "v[0-9].*"
|
||||||
|
skip_tags = ""
|
||||||
|
ignore_tags = ""
|
||||||
|
topo_order = false
|
||||||
|
sort_commits = "oldest"
|
||||||
38
.env.example
38
.env.example
|
|
@ -2,9 +2,10 @@
|
||||||
# Auto-generated by the setup wizard, or fill in manually.
|
# Auto-generated by the setup wizard, or fill in manually.
|
||||||
# NEVER commit .env to git.
|
# NEVER commit .env to git.
|
||||||
|
|
||||||
STREAMLIT_PORT=8501
|
STREAMLIT_PORT=8502
|
||||||
OLLAMA_PORT=11434
|
OLLAMA_PORT=11434
|
||||||
VLLM_PORT=8000
|
VLLM_PORT=8000
|
||||||
|
CF_TEXT_PORT=8006
|
||||||
SEARXNG_PORT=8888
|
SEARXNG_PORT=8888
|
||||||
VISION_PORT=8002
|
VISION_PORT=8002
|
||||||
VISION_MODEL=vikhyatk/moondream2
|
VISION_MODEL=vikhyatk/moondream2
|
||||||
|
|
@ -12,10 +13,22 @@ VISION_REVISION=2025-01-09
|
||||||
|
|
||||||
DOCS_DIR=~/Documents/JobSearch
|
DOCS_DIR=~/Documents/JobSearch
|
||||||
OLLAMA_MODELS_DIR=~/models/ollama
|
OLLAMA_MODELS_DIR=~/models/ollama
|
||||||
VLLM_MODELS_DIR=~/models/vllm
|
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
|
||||||
VLLM_MODEL=Ouro-1.4B
|
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
|
||||||
|
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
|
||||||
|
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
|
||||||
|
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
|
||||||
|
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
||||||
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
||||||
|
|
||||||
|
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
|
||||||
|
# Set any of these to configure LLM backends without needing a config/llm.yaml.
|
||||||
|
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
|
||||||
|
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
|
||||||
|
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
|
||||||
|
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
|
||||||
|
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
|
||||||
|
|
||||||
# API keys (required for remote profile)
|
# API keys (required for remote profile)
|
||||||
ANTHROPIC_API_KEY=
|
ANTHROPIC_API_KEY=
|
||||||
OPENAI_COMPAT_URL=
|
OPENAI_COMPAT_URL=
|
||||||
|
|
@ -28,6 +41,25 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
||||||
# GITHUB_TOKEN= # future — enable when public mirror is active
|
# GITHUB_TOKEN= # future — enable when public mirror is active
|
||||||
# GITHUB_REPO= # future
|
# GITHUB_REPO= # future
|
||||||
|
|
||||||
|
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
|
||||||
|
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
|
||||||
|
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
|
||||||
|
CF_LICENSE_KEY=
|
||||||
|
CF_ORCH_URL=https://orch.circuitforge.tech
|
||||||
|
|
||||||
|
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
|
||||||
|
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
|
||||||
|
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
|
||||||
|
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
|
||||||
|
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
|
||||||
|
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
|
||||||
|
# Defaults to 127.0.0.1 (same-host coordinator).
|
||||||
|
# Set to your host LAN IP for a remote coordinator.
|
||||||
|
CF_ORCH_COORDINATOR_URL=http://localhost:7700
|
||||||
|
CF_ORCH_NODE_ID=peregrine
|
||||||
|
CF_ORCH_AGENT_PORT=7701
|
||||||
|
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
|
||||||
|
|
||||||
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
||||||
CLOUD_MODE=false
|
CLOUD_MODE=false
|
||||||
CLOUD_DATA_ROOT=/devl/menagerie-data
|
CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||||
|
|
|
||||||
57
.forgejo/workflows/ci.yml
Normal file
57
.forgejo/workflows/ci.yml
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
# Peregrine CI — lint, type-check, test on PR/push
|
||||||
|
# Full-stack: FastAPI (Python) + Vue 3 SPA (Node)
|
||||||
|
# Adapted from Circuit-Forge/cf-agents workflows/ci.yml (cf-agents#4 tracks the
|
||||||
|
# upstream ci-fullstack.yml variant; update this file when that lands).
|
||||||
|
|
||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main, 'feature/**', 'fix/**']
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
backend:
|
||||||
|
name: Backend (Python)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
cache: pip
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: pip install -r requirements.txt
|
||||||
|
|
||||||
|
- name: Lint
|
||||||
|
run: ruff check .
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
run: pytest tests/ -v --tb=short
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
name: Frontend (Vue)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: web
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
cache: npm
|
||||||
|
cache-dependency-path: web/package-lock.json
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Type check
|
||||||
|
run: npx vue-tsc --noEmit
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
run: npm run test
|
||||||
34
.forgejo/workflows/mirror.yml
Normal file
34
.forgejo/workflows/mirror.yml
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
# Mirror push to GitHub and Codeberg on every push to main or tag.
|
||||||
|
# Copied from Circuit-Forge/cf-agents workflows/mirror.yml
|
||||||
|
# Required secrets: GITHUB_MIRROR_TOKEN, CODEBERG_MIRROR_TOKEN
|
||||||
|
|
||||||
|
name: Mirror
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
tags: ['v*']
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
mirror:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Mirror to GitHub
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_MIRROR_TOKEN }}
|
||||||
|
REPO: ${{ github.event.repository.name }}
|
||||||
|
run: |
|
||||||
|
git remote add github "https://x-access-token:${GITHUB_TOKEN}@github.com/CircuitForgeLLC/${REPO}.git"
|
||||||
|
git push github --mirror
|
||||||
|
|
||||||
|
- name: Mirror to Codeberg
|
||||||
|
env:
|
||||||
|
CODEBERG_TOKEN: ${{ secrets.CODEBERG_MIRROR_TOKEN }}
|
||||||
|
REPO: ${{ github.event.repository.name }}
|
||||||
|
run: |
|
||||||
|
git remote add codeberg "https://CircuitForge:${CODEBERG_TOKEN}@codeberg.org/CircuitForge/${REPO}.git"
|
||||||
|
git push codeberg --mirror
|
||||||
71
.forgejo/workflows/release.yml
Normal file
71
.forgejo/workflows/release.yml
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
# Tag-triggered release workflow.
|
||||||
|
# Generates changelog and creates Forgejo release on v* tags.
|
||||||
|
# Copied from Circuit-Forge/cf-agents workflows/release.yml
|
||||||
|
#
|
||||||
|
# Docker push is intentionally disabled — BSL 1.1 registry policy not yet resolved.
|
||||||
|
# Tracked in Circuit-Forge/cf-agents#3. Re-enable the Docker steps when that lands.
|
||||||
|
#
|
||||||
|
# Required secrets: FORGEJO_RELEASE_TOKEN
|
||||||
|
# (GHCR_TOKEN not needed until Docker push is enabled)
|
||||||
|
|
||||||
|
name: Release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags: ['v*']
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
release:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
# ── Changelog ────────────────────────────────────────────────────────────
|
||||||
|
- name: Generate changelog
|
||||||
|
uses: orhun/git-cliff-action@v3
|
||||||
|
id: cliff
|
||||||
|
with:
|
||||||
|
config: .cliff.toml
|
||||||
|
args: --latest --strip header
|
||||||
|
env:
|
||||||
|
OUTPUT: CHANGES.md
|
||||||
|
|
||||||
|
# ── Docker (disabled — BSL registry policy pending cf-agents#3) ──────────
|
||||||
|
# - name: Set up QEMU
|
||||||
|
# uses: docker/setup-qemu-action@v3
|
||||||
|
# - name: Set up Buildx
|
||||||
|
# uses: docker/setup-buildx-action@v3
|
||||||
|
# - name: Log in to GHCR
|
||||||
|
# uses: docker/login-action@v3
|
||||||
|
# with:
|
||||||
|
# registry: ghcr.io
|
||||||
|
# username: ${{ github.actor }}
|
||||||
|
# password: ${{ secrets.GHCR_TOKEN }}
|
||||||
|
# - name: Build and push Docker image
|
||||||
|
# uses: docker/build-push-action@v6
|
||||||
|
# with:
|
||||||
|
# context: .
|
||||||
|
# push: true
|
||||||
|
# platforms: linux/amd64,linux/arm64
|
||||||
|
# tags: |
|
||||||
|
# ghcr.io/circuitforgellc/peregrine:${{ github.ref_name }}
|
||||||
|
# ghcr.io/circuitforgellc/peregrine:latest
|
||||||
|
# cache-from: type=gha
|
||||||
|
# cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
# ── Forgejo Release ───────────────────────────────────────────────────────
|
||||||
|
- name: Create Forgejo release
|
||||||
|
env:
|
||||||
|
FORGEJO_TOKEN: ${{ secrets.FORGEJO_RELEASE_TOKEN }}
|
||||||
|
REPO: ${{ github.event.repository.name }}
|
||||||
|
TAG: ${{ github.ref_name }}
|
||||||
|
NOTES: ${{ steps.cliff.outputs.content }}
|
||||||
|
run: |
|
||||||
|
curl -sS -X POST \
|
||||||
|
"https://git.opensourcesolarpunk.com/api/v1/repos/Circuit-Forge/${REPO}/releases" \
|
||||||
|
-H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$(jq -n --arg tag "$TAG" --arg body "$NOTES" \
|
||||||
|
'{tag_name: $tag, name: $tag, body: $body}')"
|
||||||
45
.github/workflows/ci.yml
vendored
45
.github/workflows/ci.yml
vendored
|
|
@ -1,3 +1,7 @@
|
||||||
|
# Peregrine CI — runs on GitHub mirror for public credibility badge.
|
||||||
|
# Forgejo (.forgejo/workflows/ci.yml) is the canonical CI — keep these in sync.
|
||||||
|
# No Forgejo-specific secrets used here; circuitforge-core is public on Forgejo.
|
||||||
|
|
||||||
name: CI
|
name: CI
|
||||||
|
|
||||||
on:
|
on:
|
||||||
|
|
@ -7,23 +11,46 @@ on:
|
||||||
branches: [main]
|
branches: [main]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
backend:
|
||||||
|
name: Backend (Python)
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Install system dependencies
|
- uses: actions/setup-python@v5
|
||||||
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
|
|
||||||
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: '3.12'
|
||||||
cache: pip
|
cache: pip
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pip install -r requirements.txt
|
run: pip install -r requirements.txt
|
||||||
|
|
||||||
- name: Run tests
|
- name: Lint
|
||||||
|
run: ruff check .
|
||||||
|
|
||||||
|
- name: Test
|
||||||
run: pytest tests/ -v --tb=short
|
run: pytest tests/ -v --tb=short
|
||||||
|
|
||||||
|
frontend:
|
||||||
|
name: Frontend (Vue)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
working-directory: web
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
cache: npm
|
||||||
|
cache-dependency-path: web/package-lock.json
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Type check
|
||||||
|
run: npx vue-tsc --noEmit
|
||||||
|
|
||||||
|
- name: Test
|
||||||
|
run: npm run test
|
||||||
|
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -40,8 +40,11 @@ pytest-output.txt
|
||||||
docs/superpowers/
|
docs/superpowers/
|
||||||
|
|
||||||
data/email_score.jsonl
|
data/email_score.jsonl
|
||||||
|
data/email_score.jsonl.bad-labels
|
||||||
data/email_label_queue.jsonl
|
data/email_label_queue.jsonl
|
||||||
data/email_compare_sample.jsonl
|
data/email_compare_sample.jsonl
|
||||||
|
data/.feedback_ratelimit.json
|
||||||
|
data/config/
|
||||||
|
|
||||||
config/label_tool.yaml
|
config/label_tool.yaml
|
||||||
config/server.yaml
|
config/server.yaml
|
||||||
|
|
|
||||||
204
CHANGELOG.md
204
CHANGELOG.md
|
|
@ -9,6 +9,210 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## [0.8.6] — 2026-04-12
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **Resume Review Modal** — paged tabbed dialog replaces the inline resume review
|
||||||
|
section in the Apply workspace. Pages through Skills diff, Summary diff, one page
|
||||||
|
per experience entry, and a Confirm summary. Color-coded tab status: unvisited
|
||||||
|
(gray), in-progress (indigo), accepted (green), partial (amber), skipped (slate).
|
||||||
|
Full ARIA tabs pattern with focus trap and `Teleport to body`.
|
||||||
|
- **Resume Library** — new `/resumes` page for managing saved resumes. Two-column
|
||||||
|
layout: list sidebar + full-text preview pane. Supports import (.txt, .pdf, .docx,
|
||||||
|
.odt, .yaml), rename (Edit), set as default, download (txt/pdf/yaml), and delete
|
||||||
|
(guarded: disabled when only resume or is default). 5 MB upload limit.
|
||||||
|
- **ResumeLibraryCard** — compact widget shown above the ATS Resume Optimizer in the
|
||||||
|
Apply workspace. Displays the currently active resume for the job (job-specific or
|
||||||
|
global default), with Switch and Manage deep links.
|
||||||
|
- **Resume library API** — `GET/POST /api/resumes`, `GET/PATCH/DELETE /api/resumes/{id}`,
|
||||||
|
`POST /api/resumes/{id}/set-default`, `POST /api/resumes/import`,
|
||||||
|
`GET/PATCH /api/jobs/{job_id}/resume`. `approve_resume` extended with
|
||||||
|
`save_to_library` + `resume_name` params to save optimized resumes directly.
|
||||||
|
- **`resumes` DB migration** — `migrations/005_resumes_table.sql` adds `resumes` table
|
||||||
|
(10 columns) and `resume_id` FK on `jobs`.
|
||||||
|
- **Resumes nav link** — Document icon entry added after Apply in the main nav.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- Resume optimizer "Awaiting review" state now triggers the Review Modal instead of
|
||||||
|
rendering an inline diff; save-to-library checkbox and name input surfaced on the
|
||||||
|
preview confirmation step.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [0.8.5] — 2026-04-02
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **Vue onboarding wizard** — 7-step first-run setup replaces the Streamlit wizard
|
||||||
|
in the Vue SPA: Hardware detection → Tier → Resume upload/build → Identity →
|
||||||
|
Inference & API keys → Search preferences → Integrations. Progress saves to
|
||||||
|
`user.yaml` on every step; crash-recovery resumes from the last completed step.
|
||||||
|
- **Wizard API endpoints** — `GET /api/wizard/status`, `POST /api/wizard/step`,
|
||||||
|
`GET /api/wizard/hardware`, `POST /api/wizard/inference/test`,
|
||||||
|
`POST /api/wizard/complete`. Inference test always soft-fails so Ollama being
|
||||||
|
unreachable never blocks setup completion.
|
||||||
|
- **Cloud auto-skip** — cloud instances automatically complete steps 1 (hardware),
|
||||||
|
2 (tier), and 5 (inference) and drop the user directly on the Resume step.
|
||||||
|
- **`wizardGuard` router gate** — all Vue routes require wizard completion; completed
|
||||||
|
users are bounced away from `/setup` to `/`.
|
||||||
|
- **Chip-input search step** — job titles and locations entered as press-Enter/comma
|
||||||
|
chips; validates at least one title before advancing.
|
||||||
|
- **Integrations tile grid** — optional step 7 shows Notion, Calendar, Slack, Discord,
|
||||||
|
Drive with paid-tier badges; skippable on Finish.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- **User config isolation: dangerous fallback removed** — `_user_yaml_path()` fell
|
||||||
|
back to `/devl/job-seeker/config/user.yaml` (legacy profile) when `user.yaml`
|
||||||
|
didn't exist at the expected path; new users now get an empty dict instead of
|
||||||
|
another user's data. Affects profile, resume, search, and all wizard endpoints.
|
||||||
|
- **Resume path not user-isolated** — `RESUME_PATH = Path("config/plain_text_resume.yaml")`
|
||||||
|
was a relative CWD path shared across all users. Replaced with `_resume_path()`
|
||||||
|
derived from `_user_yaml_path()` / `STAGING_DB`.
|
||||||
|
- **Resume upload silently returned empty data** — `upload_resume` was passing a
|
||||||
|
file path string to `structure_resume()` which expects raw text; now reads bytes
|
||||||
|
and dispatches to the correct extractor (`extract_text_from_pdf` / `_docx` / `_odt`).
|
||||||
|
- **Wizard resume step read wrong envelope field** — `WizardResumeStep.vue` read
|
||||||
|
`data.experience` but the upload response wraps parsed data under `data.data`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [0.8.4] — 2026-04-02
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- **Cloud: cover letter used wrong user's profile** — `generate_cover_letter.generate()`
|
||||||
|
loaded `_profile` from the global `config/user.yaml` at module import time, so all
|
||||||
|
cloud users got the default user's name, voice, and mission preferences in their
|
||||||
|
generated letters. `generate()` now accepts a `user_yaml_path` parameter; `task_runner`
|
||||||
|
derives it from the per-user config directory (`db_path/../config/user.yaml`) and
|
||||||
|
passes it through. `_build_system_context`, `_build_mission_notes`, `detect_mission_alignment`,
|
||||||
|
`build_prompt`, and `_trim_to_letter_end` all accept a `profile` override so the
|
||||||
|
per-call profile is used end-to-end without breaking CLI mode.
|
||||||
|
- **Apply Workspace: hardcoded config paths in cloud mode** — `4_Apply.py` was loading
|
||||||
|
`_USER_YAML` and `RESUME_YAML` from the repo-root `config/` before `resolve_session()`
|
||||||
|
ran, so cloud users saw the global (Meg's) resume in the Apply tab. Both paths now
|
||||||
|
derive from `get_config_dir()` after session resolution.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- **Vue SPA open to all tiers** — Vue 3 frontend is no longer gated behind the beta
|
||||||
|
flag; all tier users can switch to the Vue UI from Settings.
|
||||||
|
- **LLM model candidates** — vllm backend now tries Qwen2.5-3B first, Phi-4-mini
|
||||||
|
as fallback (was reversed). cf_orch allocation block added to vllm config.
|
||||||
|
- **Preflight** — removed `vllm` from Docker adoption list; vllm is now managed
|
||||||
|
entirely by cf-orch and should not be stubbed by preflight.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [0.8.3] — 2026-04-01
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **CI: Forgejo auth** — GitHub Actions `pip install` was failing to fetch
|
||||||
|
`circuitforge-core` from the private Forgejo VCS URL. Added `FORGEJO_TOKEN`
|
||||||
|
repository secret and a `git config insteadOf` step to inject credentials
|
||||||
|
before `pip install`.
|
||||||
|
- **CI: settings API tests** — 6 `test_dev_api_settings` PUT/POST tests were
|
||||||
|
returning HTTP 500 in CI because `_user_yaml_path()` read the module-level
|
||||||
|
`DB_PATH` constant (frozen at import time), so `monkeypatch.setenv("STAGING_DB")`
|
||||||
|
had no effect. Fixed by reading `os.environ` at call time.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [0.8.2] — 2026-04-01
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **CI pipeline** — `pip install -r requirements.txt` was failing in GitHub Actions
|
||||||
|
because `-e ../circuitforge-core` requires a sibling directory that doesn't exist
|
||||||
|
in a single-repo checkout. Replaced with a `git+https://` VCS URL fallback;
|
||||||
|
`Dockerfile.cfcore` still installs from the local `COPY` to avoid redundant
|
||||||
|
network fetches during Docker builds.
|
||||||
|
- **Vue-nav reload loop** — `sync_ui_cookie()` was calling
|
||||||
|
`window.parent.location.reload()` on every render when `user.yaml` has
|
||||||
|
`ui_preference: vue` but no Caddy proxy is in the traffic path (test instances,
|
||||||
|
bare Docker). Gated the reload on `PEREGRINE_CADDY_PROXY=1`; instances without
|
||||||
|
the env var set the cookie silently and skip the reload.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- **cfcore VRAM lease integration** — the task scheduler now acquires a VRAM lease
|
||||||
|
from the cf-orch coordinator before running a batch of LLM tasks and releases it
|
||||||
|
when the batch completes. Visible in the coordinator dashboard at `:7700`.
|
||||||
|
- **`CF_ORCH_URL` env var** — scheduler reads coordinator address from
|
||||||
|
`CF_ORCH_URL` (default `http://localhost:7700`); set to
|
||||||
|
`http://host.docker.internal:7700` in Docker compose files so containers can
|
||||||
|
reach the host coordinator.
|
||||||
|
- **All compose files on `Dockerfile.cfcore`** — `compose.yml`, `compose.cloud.yml`,
|
||||||
|
and `compose.test-cfcore.yml` all use the parent-context build. `build: .` is
|
||||||
|
removed from `compose.yml`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [0.8.1] — 2026-04-01
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Job title suggester silent failure** — when the LLM returned empty arrays or
|
||||||
|
non-JSON text, the spinner would complete with zero UI feedback. Now shows an
|
||||||
|
explicit "No new suggestions found" info message with a resume-upload hint for
|
||||||
|
new users who haven't uploaded a resume yet.
|
||||||
|
- **Suggester exception handling** — catch `Exception` instead of only
|
||||||
|
`RuntimeError` so connection errors and `FileNotFoundError` (missing llm.yaml)
|
||||||
|
surface as error messages rather than crashing the page silently.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **`Dockerfile.cfcore`** — parent-context Dockerfile that copies
|
||||||
|
`circuitforge-core/` alongside `peregrine/` before `pip install`, resolving
|
||||||
|
the `-e ../circuitforge-core` editable requirement inside Docker.
|
||||||
|
- **`compose.test-cfcore.yml`** — single-user test instance on port 8516 for
|
||||||
|
smoke-testing cfcore shim integration before promoting to the cloud instance.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## [0.8.0] — 2026-04-01
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- **ATS Resume Optimizer** (gap report free; LLM rewrite paid+)
|
||||||
|
- `scripts/resume_optimizer.py` — full pipeline: TF-IDF gap extraction →
|
||||||
|
`prioritize_gaps` → `rewrite_for_ats` → hallucination guard (anchor-set
|
||||||
|
diffing on employers, institutions, and dates)
|
||||||
|
- `scripts/db.py` — `optimized_resume` + `ats_gap_report` columns;
|
||||||
|
`save_optimized_resume` / `get_optimized_resume` helpers
|
||||||
|
- `GET /api/jobs/{id}/resume_optimizer` — fetch gap report + rewrite
|
||||||
|
- `POST /api/jobs/{id}/resume_optimizer/generate` — queue rewrite task
|
||||||
|
- `GET /api/jobs/{id}/resume_optimizer/task` — poll task status
|
||||||
|
- `web/src/components/ResumeOptimizerPanel.vue` — gap report (all tiers),
|
||||||
|
LLM rewrite section (paid+), hallucination warning badge, `.txt` download
|
||||||
|
- `ResumeOptimizerPanel` integrated into `ApplyWorkspace`
|
||||||
|
|
||||||
|
- **Vue SPA full merge** (closes #8) — `feature/vue-spa` merged to `main`
|
||||||
|
- `dev-api.py` — full FastAPI backend (settings, jobs, interviews, prep,
|
||||||
|
survey, digest, resume optimizer); cloud session middleware (JWT → per-user
|
||||||
|
SQLite); BYOK credential store
|
||||||
|
- `dev_api.py` — symlink → `dev-api.py` for importable module alias
|
||||||
|
- `scripts/job_ranker.py` — two-stage ranking for `/api/jobs/stack`
|
||||||
|
- `scripts/credential_store.py` — per-user BYOK API key management
|
||||||
|
- `scripts/user_profile.py` — `load_user_profile` / `save_user_profile`
|
||||||
|
- `web/src/components/TaskIndicator.vue` + `web/src/stores/tasks.ts` —
|
||||||
|
live background task queue display
|
||||||
|
- `web/public/` — peregrine logo assets (SVG + PNG)
|
||||||
|
|
||||||
|
- **API test suite** — 5 new test modules (622 tests total)
|
||||||
|
- `tests/test_dev_api_settings.py` (38 tests)
|
||||||
|
- `tests/test_dev_api_interviews.py`, `test_dev_api_prep.py`,
|
||||||
|
`test_dev_api_survey.py`, `test_dev_api_digest.py`
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Cloud DB routing** — `app/pages/1_Job_Review.py`, `5_Interviews.py`,
|
||||||
|
`6_Interview_Prep.py`, `7_Survey.py` were hardcoding `DEFAULT_DB`; now
|
||||||
|
use `get_db_path()` for correct per-user routing in cloud mode (#24)
|
||||||
|
- **Test isolation** — `importlib.reload(dev_api)` in digest/interviews
|
||||||
|
fixtures reset all module globals, silently breaking `monkeypatch.setattr`
|
||||||
|
in subsequent test files; replaced with targeted `monkeypatch.setattr(dev_api,
|
||||||
|
"DB_PATH", tmp_db)` (#26)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## [0.7.0] — 2026-03-22
|
## [0.7.0] — 2026-03-22
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ full instructions.
|
||||||
```bash
|
```bash
|
||||||
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine.git
|
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine.git
|
||||||
cd peregrine
|
cd peregrine
|
||||||
./setup.sh # installs deps, activates git hooks
|
./install.sh # installs deps, activates git hooks
|
||||||
./manage.sh start
|
./manage.sh start
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ WORKDIR /app
|
||||||
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
|
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
|
||||||
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
|
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
gcc libffi-dev curl libsqlcipher-dev \
|
gcc libffi-dev curl libsqlcipher-dev git \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
|
|
|
||||||
60
Dockerfile.cfcore
Normal file
60
Dockerfile.cfcore
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
# Dockerfile.cfcore — build context must be the PARENT directory of peregrine/
|
||||||
|
#
|
||||||
|
# Used when circuitforge-core is installed from source (not PyPI).
|
||||||
|
# Both repos must be siblings on the build host:
|
||||||
|
# /devl/peregrine/ → WORKDIR /app
|
||||||
|
# /devl/circuitforge-core/ → installed to /circuitforge-core
|
||||||
|
#
|
||||||
|
# Build manually:
|
||||||
|
# docker build -f peregrine/Dockerfile.cfcore -t peregrine-cfcore ..
|
||||||
|
#
|
||||||
|
# Via compose (compose.test-cfcore.yml sets context: ..):
|
||||||
|
# docker compose -f compose.test-cfcore.yml build
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
|
||||||
|
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc libffi-dev curl libsqlcipher-dev \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy circuitforge-core and install it from the local path before requirements.txt.
|
||||||
|
# requirements.txt has a git+https:// fallback URL for CI (where circuitforge-core
|
||||||
|
# is not a sibling directory), but Docker always has the local copy available here.
|
||||||
|
COPY circuitforge-core/ /circuitforge-core/
|
||||||
|
RUN pip install --no-cache-dir /circuitforge-core
|
||||||
|
|
||||||
|
# circuitforge-orch client — needed for LLMRouter cf_orch allocation.
|
||||||
|
# Optional: if the directory doesn't exist the COPY will fail at build time; keep
|
||||||
|
# cf-orch as a sibling of peregrine in the build context.
|
||||||
|
COPY circuitforge-orch/ /circuitforge-orch/
|
||||||
|
RUN pip install --no-cache-dir /circuitforge-orch
|
||||||
|
|
||||||
|
COPY peregrine/requirements.txt .
|
||||||
|
# Skip the cfcore line — already installed above from the local copy
|
||||||
|
RUN grep -v 'circuitforge-core' requirements.txt | pip install --no-cache-dir -r /dev/stdin
|
||||||
|
|
||||||
|
# Install Playwright browser (cached separately from Python deps so requirements
|
||||||
|
# changes don't bust the ~600–900 MB Chromium layer and vice versa)
|
||||||
|
RUN playwright install chromium && playwright install-deps chromium
|
||||||
|
|
||||||
|
# Bundle companyScraper (company research web scraper)
|
||||||
|
COPY peregrine/scrapers/ /app/scrapers/
|
||||||
|
|
||||||
|
COPY peregrine/ .
|
||||||
|
|
||||||
|
# Remove per-user config files that are gitignored but may exist locally.
|
||||||
|
# Defense-in-depth: the parent .dockerignore should already exclude these,
|
||||||
|
# but an explicit rm guarantees they never end up in the cloud image.
|
||||||
|
RUN rm -f config/user.yaml config/plain_text_resume.yaml config/notion.yaml \
|
||||||
|
config/email.yaml config/tokens.yaml config/craigslist.yaml \
|
||||||
|
config/adzuna.yaml .env
|
||||||
|
|
||||||
|
EXPOSE 8501
|
||||||
|
|
||||||
|
CMD ["streamlit", "run", "app/app.py", \
|
||||||
|
"--server.port=8501", \
|
||||||
|
"--server.headless=true", \
|
||||||
|
"--server.fileWatcherType=none"]
|
||||||
153
HANDOFF-xanderland.md
Normal file
153
HANDOFF-xanderland.md
Normal file
|
|
@ -0,0 +1,153 @@
|
||||||
|
# Peregrine → xanderland.tv Setup Handoff
|
||||||
|
|
||||||
|
**Written from:** dev machine (CircuitForge dev env)
|
||||||
|
**Target:** xanderland.tv (beta tester, rootful Podman + systemd)
|
||||||
|
**Date:** 2026-02-27
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What we're doing
|
||||||
|
|
||||||
|
Getting Peregrine running on the beta tester's server as a Podman container managed by systemd. He already runs SearXNG and other services in the same style — rootful Podman with `--net=host`, `--restart=unless-stopped`, registered as systemd units.
|
||||||
|
|
||||||
|
The script `podman-standalone.sh` in the repo root handles the container setup.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 1 — Get the repo onto xanderland.tv
|
||||||
|
|
||||||
|
From navi (or directly if you have a route):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv "sudo git clone <repo-url> /opt/peregrine"
|
||||||
|
```
|
||||||
|
|
||||||
|
Or if it's already there, just pull:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv "cd /opt/peregrine && sudo git pull"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 2 — Verify /opt/peregrine looks right
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv "ls /opt/peregrine"
|
||||||
|
```
|
||||||
|
|
||||||
|
Expect to see: `Dockerfile`, `compose.yml`, `manage.sh`, `podman-standalone.sh`, `config/`, `app/`, `scripts/`, etc.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 3 — Config
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv
|
||||||
|
cd /opt/peregrine
|
||||||
|
sudo mkdir -p data
|
||||||
|
sudo cp config/llm.yaml.example config/llm.yaml
|
||||||
|
sudo cp config/notion.yaml.example config/notion.yaml # only if he wants Notion sync
|
||||||
|
```
|
||||||
|
|
||||||
|
Then edit `config/llm.yaml` and set `searxng_url` to his existing SearXNG instance
|
||||||
|
(default is `http://localhost:8888` — confirm his actual port).
|
||||||
|
|
||||||
|
He won't need Anthropic/OpenAI keys to start — the setup wizard lets him pick local Ollama
|
||||||
|
or whatever he has running.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 4 — Fix DOCS_DIR in the script
|
||||||
|
|
||||||
|
The script defaults `DOCS_DIR=/Library/Documents/JobSearch` which is the original user's path.
|
||||||
|
Update it to wherever his job search documents actually live, or a placeholder empty dir:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo mkdir -p /opt/peregrine/docs # placeholder if he has no docs yet
|
||||||
|
```
|
||||||
|
|
||||||
|
Then edit the script:
|
||||||
|
```bash
|
||||||
|
sudo sed -i 's|DOCS_DIR=.*|DOCS_DIR=/opt/peregrine/docs|' /opt/peregrine/podman-standalone.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 5 — Build the image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv "cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest ."
|
||||||
|
```
|
||||||
|
|
||||||
|
Takes a few minutes on first run (downloads python:3.11-slim, installs deps).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 6 — Run the script
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv "sudo bash /opt/peregrine/podman-standalone.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
This starts a single container (`peregrine`) with `--net=host` and `--restart=unless-stopped`.
|
||||||
|
SearXNG is NOT included — his existing instance is used.
|
||||||
|
|
||||||
|
Verify it came up:
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv "sudo podman ps | grep peregrine"
|
||||||
|
ssh xanderland.tv "sudo podman logs peregrine"
|
||||||
|
```
|
||||||
|
|
||||||
|
Health check endpoint: `http://xanderland.tv:8501/_stcore/health`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 7 — Register as a systemd service
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh xanderland.tv
|
||||||
|
sudo podman generate systemd --new --name peregrine \
|
||||||
|
| sudo tee /etc/systemd/system/peregrine.service
|
||||||
|
sudo systemctl daemon-reload
|
||||||
|
sudo systemctl enable --now peregrine
|
||||||
|
```
|
||||||
|
|
||||||
|
Confirm:
|
||||||
|
```bash
|
||||||
|
sudo systemctl status peregrine
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 8 — First-run wizard
|
||||||
|
|
||||||
|
Open `http://xanderland.tv:8501` in a browser.
|
||||||
|
|
||||||
|
The setup wizard (page 0) will gate the app until `config/user.yaml` is created.
|
||||||
|
He'll fill in his profile — name, resume, LLM backend preferences. This writes
|
||||||
|
`config/user.yaml` and unlocks the rest of the UI.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
| Symptom | Check |
|
||||||
|
|---------|-------|
|
||||||
|
| Container exits immediately | `sudo podman logs peregrine` — usually a missing config file |
|
||||||
|
| Port 8501 already in use | `sudo ss -tlnp \| grep 8501` — something else on that port |
|
||||||
|
| SearXNG not reachable | Confirm `searxng_url` in `config/llm.yaml` and that JSON format is enabled in SearXNG settings |
|
||||||
|
| Wizard loops / won't save | `config/` volume mount permissions — `sudo chown -R 1000:1000 /opt/peregrine/config` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## To update Peregrine later
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /opt/peregrine
|
||||||
|
sudo git pull
|
||||||
|
sudo podman build -t localhost/peregrine:latest .
|
||||||
|
sudo podman restart peregrine
|
||||||
|
```
|
||||||
|
|
||||||
|
No need to touch the systemd unit — it launches fresh via `--new` in the generate step.
|
||||||
2
Makefile
2
Makefile
|
|
@ -45,7 +45,7 @@ endif
|
||||||
PROFILE_ARG := $(if $(filter remote,$(PROFILE)),,--profile $(PROFILE))
|
PROFILE_ARG := $(if $(filter remote,$(PROFILE)),,--profile $(PROFILE))
|
||||||
|
|
||||||
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
|
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
|
||||||
@bash setup.sh
|
@bash install.sh
|
||||||
|
|
||||||
preflight: ## Check ports + system resources; write .env
|
preflight: ## Check ports + system resources; write .env
|
||||||
@$(PYTHON) scripts/preflight.py
|
@$(PYTHON) scripts/preflight.py
|
||||||
|
|
|
||||||
47
README.md
47
README.md
|
|
@ -1,16 +1,34 @@
|
||||||
# Peregrine
|
# Peregrine
|
||||||
|
|
||||||
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/pyr0ball/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
|
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
|
||||||
|
|
||||||
[](./LICENSE-BSL)
|
[](./LICENSE-BSL)
|
||||||
[](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml)
|
[](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml)
|
||||||
|
[](https://docs.circuitforge.tech/peregrine/)
|
||||||
|
|
||||||
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.tech)**
|
**Job search pipeline — by [Circuit Forge LLC](https://circuitforge.tech)**
|
||||||
|
|
||||||
> *"Don't be evil, for real and forever."*
|
> *"Tools for the jobs that the system made hard on purpose."*
|
||||||
|
|
||||||
Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
|
---
|
||||||
Privacy-first, local-first. Your data never leaves your machine.
|
|
||||||
|
Job search is a second job nobody hired you for.
|
||||||
|
|
||||||
|
ATS filters designed to reject. Job boards that show the same listing eight times. Cover letter number forty-seven for a role that might already be filled. Hours of prep for a phone screen that lasts twelve minutes.
|
||||||
|
|
||||||
|
Peregrine handles the pipeline — discovery, matching, tracking, drafting, and prep — so you can spend your time doing the work you actually want to be doing.
|
||||||
|
|
||||||
|
**LLM support is optional.** The full discovery and tracking pipeline works without one. When you do configure a backend, the LLM drafts the parts that are genuinely miserable — cover letters, company research briefs, interview prep sheets — and waits for your approval before anything goes anywhere.
|
||||||
|
|
||||||
|
### What Peregrine does not do
|
||||||
|
|
||||||
|
Peregrine does **not** submit job applications for you. You still have to go to each employer's site and click apply yourself.
|
||||||
|
|
||||||
|
This is intentional. Automated mass-applying is a bad experience for everyone — it's also a trust violation with employers who took the time to post a real role. Peregrine is a preparation and organization tool, not a bot.
|
||||||
|
|
||||||
|
What it *does* cover is everything before and after that click: finding the jobs, matching them against your resume, generating cover letters and prep materials, and once you've applied — tracking where you stand, classifying the emails that come back, and surfacing company research when an interview lands on your calendar. The submit button is yours. The rest of the grind is ours.
|
||||||
|
|
||||||
|
> **Exception:** [AIHawk](https://github.com/nicolomantini/LinkedIn-Easy-Apply) is a separate, optional tool that handles LinkedIn Easy Apply automation. Peregrine integrates with it for AIHawk-compatible profiles, but it is not part of Peregrine's core pipeline.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -19,7 +37,7 @@ Privacy-first, local-first. Your data never leaves your machine.
|
||||||
**1. Clone and install dependencies** (Docker, NVIDIA toolkit if needed):
|
**1. Clone and install dependencies** (Docker, NVIDIA toolkit if needed):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine
|
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
|
||||||
cd peregrine
|
cd peregrine
|
||||||
./manage.sh setup
|
./manage.sh setup
|
||||||
```
|
```
|
||||||
|
|
@ -42,7 +60,7 @@ make start PROFILE=single-gpu
|
||||||
|
|
||||||
**3.** Open http://localhost:8501 — the setup wizard guides you through the rest.
|
**3.** Open http://localhost:8501 — the setup wizard guides you through the rest.
|
||||||
|
|
||||||
> **macOS / Apple Silicon:** Docker Desktop must be running. For Metal GPU-accelerated inference, install Ollama natively before starting — `setup.sh` will prompt you to do this. See [Apple Silicon GPU](#apple-silicon-gpu) below.
|
> **macOS / Apple Silicon:** Docker Desktop must be running. For Metal GPU-accelerated inference, install Ollama natively before starting — `install.sh` will prompt you to do this. See [Apple Silicon GPU](#apple-silicon-gpu) below.
|
||||||
> **Windows:** Not supported — use WSL2 with Ubuntu.
|
> **Windows:** Not supported — use WSL2 with Ubuntu.
|
||||||
|
|
||||||
### Installing to `/opt` or other system directories
|
### Installing to `/opt` or other system directories
|
||||||
|
|
@ -86,7 +104,7 @@ After `./manage.sh setup`, log out and back in for docker group membership to ta
|
||||||
|
|
||||||
Docker Desktop on macOS runs in a Linux VM — it cannot access the Apple GPU. Metal-accelerated inference requires Ollama to run **natively** on the host.
|
Docker Desktop on macOS runs in a Linux VM — it cannot access the Apple GPU. Metal-accelerated inference requires Ollama to run **natively** on the host.
|
||||||
|
|
||||||
`setup.sh` handles this automatically: it offers to install Ollama via Homebrew, starts it as a background service, and explains what happens next. If Ollama is running on port 11434 when you start Peregrine, preflight detects it, stubs out the Docker Ollama container, and routes inference through the native process — which uses Metal automatically.
|
`install.sh` handles this automatically: it offers to install Ollama via Homebrew, starts it as a background service, and explains what happens next. If Ollama is running on port 11434 when you start Peregrine, preflight detects it, stubs out the Docker Ollama container, and routes inference through the native process — which uses Metal automatically.
|
||||||
|
|
||||||
To do it manually:
|
To do it manually:
|
||||||
|
|
||||||
|
|
@ -129,21 +147,26 @@ Re-enter the wizard any time via **Settings → Developer → Reset wizard**.
|
||||||
| **Company research briefs** | Free with LLM¹ |
|
| **Company research briefs** | Free with LLM¹ |
|
||||||
| **Interview prep & practice Q&A** | Free with LLM¹ |
|
| **Interview prep & practice Q&A** | Free with LLM¹ |
|
||||||
| **Survey assistant** (culture-fit Q&A, screenshot analysis) | Free with LLM¹ |
|
| **Survey assistant** (culture-fit Q&A, screenshot analysis) | Free with LLM¹ |
|
||||||
| **AI wizard helpers** (career summary, bullet expansion, skill suggestions) | Free with LLM¹ |
|
| **Wizard helpers** (career summary, bullet expansion, skill suggestions, job title suggestions, mission notes) | Free with LLM¹ |
|
||||||
| Managed cloud LLM (no API key needed) | Paid |
|
| Managed cloud LLM (no API key needed) | Paid |
|
||||||
| Email sync & auto-classification | Paid |
|
| Email sync & auto-classification | Paid |
|
||||||
|
| LLM-powered keyword blocklist | Paid |
|
||||||
| Job tracking integrations (Notion, Airtable, Google Sheets) | Paid |
|
| Job tracking integrations (Notion, Airtable, Google Sheets) | Paid |
|
||||||
| Calendar sync (Google, Apple) | Paid |
|
| Calendar sync (Google, Apple) | Paid |
|
||||||
| Slack notifications | Paid |
|
| Slack notifications | Paid |
|
||||||
| CircuitForge shared cover-letter model | Paid |
|
| CircuitForge shared cover-letter model | Paid |
|
||||||
|
| Vue 3 SPA — full UI with onboarding wizard, job board, apply workspace, sort/filter, research modal, draft cover letter | Free |
|
||||||
|
| **Voice guidelines** (custom writing style & tone) | Premium with LLM¹ ² |
|
||||||
| Cover letter model fine-tuning (your writing, your model) | Premium |
|
| Cover letter model fine-tuning (your writing, your model) | Premium |
|
||||||
| Multi-user support | Premium |
|
| Multi-user support | Premium |
|
||||||
|
|
||||||
¹ **BYOK unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance,
|
¹ **BYOK (bring your own key/backend) unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance,
|
||||||
or your own API key (Anthropic, OpenAI-compatible) — and all AI features marked **Free with LLM**
|
or your own API key (Anthropic, OpenAI-compatible) — and all features marked **Free with LLM** or **Premium with LLM**
|
||||||
unlock at no charge. The paid tier earns its price by providing managed cloud inference so you
|
unlock at no charge. The paid tier earns its price by providing managed cloud inference so you
|
||||||
don't need a key at all, plus integrations and email sync.
|
don't need a key at all, plus integrations and email sync.
|
||||||
|
|
||||||
|
² **Voice guidelines** requires Premium tier without a configured LLM backend. With BYOK, it unlocks at any tier.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Email Sync
|
## Email Sync
|
||||||
|
|
@ -201,6 +224,6 @@ Full documentation at: https://docs.circuitforge.tech/peregrine
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Core discovery pipeline: [MIT](LICENSE-MIT)
|
Core discovery pipeline: [MIT](LICENSE-MIT)
|
||||||
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](LICENSE-BSL)
|
LLM features (cover letter generation, company research, interview prep, UI): [BSL 1.1](LICENSE-BSL)
|
||||||
|
|
||||||
© 2026 Circuit Forge LLC
|
© 2026 Circuit Forge LLC
|
||||||
|
|
|
||||||
275
app/Home.py
275
app/Home.py
|
|
@ -14,24 +14,22 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
from scripts.user_profile import UserProfile
|
from scripts.user_profile import UserProfile
|
||||||
|
|
||||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
|
||||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
|
||||||
_name = _profile.name if _profile else "Job Seeker"
|
|
||||||
|
|
||||||
from scripts.db import init_db, get_job_counts, purge_jobs, purge_email_data, \
|
from scripts.db import init_db, get_job_counts, purge_jobs, purge_email_data, \
|
||||||
purge_non_remote, archive_jobs, kill_stuck_tasks, get_task_for_job, get_active_tasks, \
|
purge_non_remote, archive_jobs, kill_stuck_tasks, cancel_task, \
|
||||||
insert_job, get_existing_urls
|
get_task_for_job, get_active_tasks, insert_job, get_existing_urls
|
||||||
from scripts.task_runner import submit_task
|
from scripts.task_runner import submit_task
|
||||||
from app.cloud_session import resolve_session, get_db_path
|
from app.cloud_session import resolve_session, get_db_path, get_config_dir
|
||||||
|
|
||||||
_CONFIG_DIR = Path(__file__).parent.parent / "config"
|
|
||||||
_NOTION_CONNECTED = (_CONFIG_DIR / "integrations" / "notion.yaml").exists()
|
|
||||||
|
|
||||||
resolve_session("peregrine")
|
resolve_session("peregrine")
|
||||||
init_db(get_db_path())
|
init_db(get_db_path())
|
||||||
|
|
||||||
|
_CONFIG_DIR = get_config_dir()
|
||||||
|
_USER_YAML = _CONFIG_DIR / "user.yaml"
|
||||||
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
_name = _profile.name if _profile else "Job Seeker"
|
||||||
|
|
||||||
def _email_configured() -> bool:
|
def _email_configured() -> bool:
|
||||||
_e = Path(__file__).parent.parent / "config" / "email.yaml"
|
_e = get_config_dir() / "email.yaml"
|
||||||
if not _e.exists():
|
if not _e.exists():
|
||||||
return False
|
return False
|
||||||
import yaml as _yaml
|
import yaml as _yaml
|
||||||
|
|
@ -39,7 +37,7 @@ def _email_configured() -> bool:
|
||||||
return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host"))
|
return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host"))
|
||||||
|
|
||||||
def _notion_configured() -> bool:
|
def _notion_configured() -> bool:
|
||||||
_n = Path(__file__).parent.parent / "config" / "notion.yaml"
|
_n = get_config_dir() / "notion.yaml"
|
||||||
if not _n.exists():
|
if not _n.exists():
|
||||||
return False
|
return False
|
||||||
import yaml as _yaml
|
import yaml as _yaml
|
||||||
|
|
@ -47,7 +45,7 @@ def _notion_configured() -> bool:
|
||||||
return bool(_cfg.get("token"))
|
return bool(_cfg.get("token"))
|
||||||
|
|
||||||
def _keywords_configured() -> bool:
|
def _keywords_configured() -> bool:
|
||||||
_k = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
|
_k = get_config_dir() / "resume_keywords.yaml"
|
||||||
if not _k.exists():
|
if not _k.exists():
|
||||||
return False
|
return False
|
||||||
import yaml as _yaml
|
import yaml as _yaml
|
||||||
|
|
@ -134,7 +132,7 @@ def _queue_url_imports(db_path: Path, urls: list) -> int:
|
||||||
|
|
||||||
|
|
||||||
st.title(f"🔍 {_name}'s Job Search")
|
st.title(f"🔍 {_name}'s Job Search")
|
||||||
st.caption("Discover → Review → Sync to Notion")
|
st.caption("Discover → Review → Sync" + (" to Notion" if _notion_configured() else ""))
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
|
|
@ -146,7 +144,7 @@ def _live_counts():
|
||||||
col1.metric("Pending Review", counts.get("pending", 0))
|
col1.metric("Pending Review", counts.get("pending", 0))
|
||||||
col2.metric("Approved", counts.get("approved", 0))
|
col2.metric("Approved", counts.get("approved", 0))
|
||||||
col3.metric("Applied", counts.get("applied", 0))
|
col3.metric("Applied", counts.get("applied", 0))
|
||||||
col4.metric("Synced to Notion", counts.get("synced", 0))
|
col4.metric("Synced" + (" to Notion" if _notion_configured() else ""), counts.get("synced", 0))
|
||||||
col5.metric("Rejected", counts.get("rejected", 0))
|
col5.metric("Rejected", counts.get("rejected", 0))
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -237,7 +235,7 @@ with mid:
|
||||||
|
|
||||||
with right:
|
with right:
|
||||||
approved_count = get_job_counts(get_db_path()).get("approved", 0)
|
approved_count = get_job_counts(get_db_path()).get("approved", 0)
|
||||||
if _NOTION_CONNECTED:
|
if _notion_configured():
|
||||||
st.subheader("Send to Notion")
|
st.subheader("Send to Notion")
|
||||||
st.caption("Push all approved jobs to your Notion tracking database.")
|
st.caption("Push all approved jobs to your Notion tracking database.")
|
||||||
if approved_count == 0:
|
if approved_count == 0:
|
||||||
|
|
@ -376,177 +374,144 @@ _scrape_status()
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
# ── Danger zone: purge + re-scrape ────────────────────────────────────────────
|
# ── Danger zone ───────────────────────────────────────────────────────────────
|
||||||
with st.expander("⚠️ Danger Zone", expanded=False):
|
with st.expander("⚠️ Danger Zone", expanded=False):
|
||||||
|
|
||||||
|
# ── Queue reset (the common case) ─────────────────────────────────────────
|
||||||
|
st.markdown("**Queue reset**")
|
||||||
st.caption(
|
st.caption(
|
||||||
"**Purge** permanently deletes jobs from the local database. "
|
"Archive clears your review queue while keeping job URLs for dedup, "
|
||||||
"Applied and synced jobs are never touched."
|
"so the same listings won't resurface on the next discovery run. "
|
||||||
|
"Use hard purge only if you want a full clean slate including dedup history."
|
||||||
)
|
)
|
||||||
|
|
||||||
purge_col, rescrape_col, email_col, tasks_col = st.columns(4)
|
_scope = st.radio(
|
||||||
|
"Clear scope",
|
||||||
|
["Pending only", "Pending + approved (stale search)"],
|
||||||
|
horizontal=True,
|
||||||
|
label_visibility="collapsed",
|
||||||
|
)
|
||||||
|
_scope_statuses = (
|
||||||
|
["pending"] if _scope == "Pending only" else ["pending", "approved"]
|
||||||
|
)
|
||||||
|
|
||||||
with purge_col:
|
_qc1, _qc2, _qc3 = st.columns([2, 2, 4])
|
||||||
st.markdown("**Purge pending & rejected**")
|
if _qc1.button("📦 Archive & reset", use_container_width=True, type="primary"):
|
||||||
st.caption("Removes all _pending_ and _rejected_ listings so the next discovery starts fresh.")
|
st.session_state["confirm_dz"] = "archive"
|
||||||
if st.button("🗑 Purge Pending + Rejected", use_container_width=True):
|
if _qc2.button("🗑 Hard purge (delete)", use_container_width=True):
|
||||||
st.session_state["confirm_purge"] = "partial"
|
st.session_state["confirm_dz"] = "purge"
|
||||||
|
|
||||||
if st.session_state.get("confirm_purge") == "partial":
|
if st.session_state.get("confirm_dz") == "archive":
|
||||||
st.warning("Are you sure? This cannot be undone.")
|
st.info(
|
||||||
c1, c2 = st.columns(2)
|
f"Archive **{', '.join(_scope_statuses)}** jobs? "
|
||||||
if c1.button("Yes, purge", type="primary", use_container_width=True):
|
"URLs are kept for dedup — nothing is permanently deleted."
|
||||||
deleted = purge_jobs(get_db_path(), statuses=["pending", "rejected"])
|
)
|
||||||
st.success(f"Purged {deleted} jobs.")
|
_dc1, _dc2 = st.columns(2)
|
||||||
st.session_state.pop("confirm_purge", None)
|
if _dc1.button("Yes, archive", type="primary", use_container_width=True, key="dz_archive_confirm"):
|
||||||
|
n = archive_jobs(get_db_path(), statuses=_scope_statuses)
|
||||||
|
st.success(f"Archived {n} jobs.")
|
||||||
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if c2.button("Cancel", use_container_width=True):
|
if _dc2.button("Cancel", use_container_width=True, key="dz_archive_cancel"):
|
||||||
st.session_state.pop("confirm_purge", None)
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
with email_col:
|
if st.session_state.get("confirm_dz") == "purge":
|
||||||
st.markdown("**Purge email data**")
|
st.warning(
|
||||||
st.caption("Clears all email thread logs and email-sourced pending jobs so the next sync starts fresh.")
|
f"Permanently delete **{', '.join(_scope_statuses)}** jobs? "
|
||||||
if st.button("📧 Purge Email Data", use_container_width=True):
|
"This removes the URLs from dedup history too. Cannot be undone."
|
||||||
st.session_state["confirm_purge"] = "email"
|
)
|
||||||
|
_dc1, _dc2 = st.columns(2)
|
||||||
if st.session_state.get("confirm_purge") == "email":
|
if _dc1.button("Yes, delete", type="primary", use_container_width=True, key="dz_purge_confirm"):
|
||||||
st.warning("This deletes all email contacts and email-sourced jobs. Cannot be undone.")
|
n = purge_jobs(get_db_path(), statuses=_scope_statuses)
|
||||||
c1, c2 = st.columns(2)
|
st.success(f"Deleted {n} jobs.")
|
||||||
if c1.button("Yes, purge emails", type="primary", use_container_width=True):
|
st.session_state.pop("confirm_dz", None)
|
||||||
contacts, jobs = purge_email_data(get_db_path())
|
|
||||||
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
|
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if c2.button("Cancel ", use_container_width=True):
|
if _dc2.button("Cancel", use_container_width=True, key="dz_purge_cancel"):
|
||||||
st.session_state.pop("confirm_purge", None)
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
with tasks_col:
|
st.divider()
|
||||||
|
|
||||||
|
# ── Background tasks ──────────────────────────────────────────────────────
|
||||||
_active = get_active_tasks(get_db_path())
|
_active = get_active_tasks(get_db_path())
|
||||||
st.markdown("**Kill stuck tasks**")
|
st.markdown(f"**Background tasks** — {len(_active)} active")
|
||||||
st.caption(f"Force-fail all queued/running background tasks. Currently **{len(_active)}** active.")
|
|
||||||
if st.button("⏹ Kill All Tasks", use_container_width=True, disabled=len(_active) == 0):
|
if _active:
|
||||||
|
_task_icons = {"cover_letter": "✉️", "research": "🔍", "discovery": "🌐", "enrich_descriptions": "📝"}
|
||||||
|
for _t in _active:
|
||||||
|
_tc1, _tc2, _tc3 = st.columns([3, 4, 2])
|
||||||
|
_icon = _task_icons.get(_t["task_type"], "⚙️")
|
||||||
|
_tc1.caption(f"{_icon} `{_t['task_type']}`")
|
||||||
|
_job_label = f"{_t['title']} @ {_t['company']}" if _t.get("title") else f"job #{_t['job_id']}"
|
||||||
|
_tc2.caption(_job_label)
|
||||||
|
_tc3.caption(f"_{_t['status']}_")
|
||||||
|
if st.button("✕ Cancel", key=f"dz_cancel_task_{_t['id']}", use_container_width=True):
|
||||||
|
cancel_task(get_db_path(), _t["id"])
|
||||||
|
st.rerun()
|
||||||
|
st.caption("")
|
||||||
|
|
||||||
|
_kill_col, _ = st.columns([2, 6])
|
||||||
|
if _kill_col.button("⏹ Kill all stuck", use_container_width=True, disabled=len(_active) == 0):
|
||||||
killed = kill_stuck_tasks(get_db_path())
|
killed = kill_stuck_tasks(get_db_path())
|
||||||
st.success(f"Killed {killed} task(s).")
|
st.success(f"Killed {killed} task(s).")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
with rescrape_col:
|
|
||||||
st.markdown("**Purge all & re-scrape**")
|
|
||||||
st.caption("Wipes _all_ non-applied, non-synced jobs then immediately runs a fresh discovery.")
|
|
||||||
if st.button("🔄 Purge All + Re-scrape", use_container_width=True):
|
|
||||||
st.session_state["confirm_purge"] = "full"
|
|
||||||
|
|
||||||
if st.session_state.get("confirm_purge") == "full":
|
|
||||||
st.warning("This will delete ALL pending, approved, and rejected jobs, then re-scrape. Applied and synced records are kept.")
|
|
||||||
c1, c2 = st.columns(2)
|
|
||||||
if c1.button("Yes, wipe + scrape", type="primary", use_container_width=True):
|
|
||||||
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"])
|
|
||||||
submit_task(get_db_path(), "discovery", 0)
|
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
|
||||||
if c2.button("Cancel ", use_container_width=True):
|
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
pending_col, nonremote_col, approved_col, _ = st.columns(4)
|
# ── Rarely needed (collapsed) ─────────────────────────────────────────────
|
||||||
|
with st.expander("More options", expanded=False):
|
||||||
|
_rare1, _rare2, _rare3 = st.columns(3)
|
||||||
|
|
||||||
with pending_col:
|
with _rare1:
|
||||||
st.markdown("**Purge pending review**")
|
st.markdown("**Purge email data**")
|
||||||
st.caption("Removes only _pending_ listings, keeping your rejected history intact.")
|
st.caption("Clears all email thread logs and email-sourced pending jobs.")
|
||||||
if st.button("🗑 Purge Pending Only", use_container_width=True):
|
if st.button("📧 Purge Email Data", use_container_width=True):
|
||||||
st.session_state["confirm_purge"] = "pending_only"
|
st.session_state["confirm_dz"] = "email"
|
||||||
|
if st.session_state.get("confirm_dz") == "email":
|
||||||
if st.session_state.get("confirm_purge") == "pending_only":
|
st.warning("Deletes all email contacts and email-sourced jobs. Cannot be undone.")
|
||||||
st.warning("Deletes all pending jobs. Rejected jobs are kept. Cannot be undone.")
|
_ec1, _ec2 = st.columns(2)
|
||||||
c1, c2 = st.columns(2)
|
if _ec1.button("Yes, purge emails", type="primary", use_container_width=True, key="dz_email_confirm"):
|
||||||
if c1.button("Yes, purge pending", type="primary", use_container_width=True):
|
contacts, jobs = purge_email_data(get_db_path())
|
||||||
deleted = purge_jobs(get_db_path(), statuses=["pending"])
|
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
|
||||||
st.success(f"Purged {deleted} pending jobs.")
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if c2.button("Cancel ", use_container_width=True):
|
if _ec2.button("Cancel", use_container_width=True, key="dz_email_cancel"):
|
||||||
st.session_state.pop("confirm_purge", None)
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
with nonremote_col:
|
with _rare2:
|
||||||
st.markdown("**Purge non-remote**")
|
st.markdown("**Purge non-remote**")
|
||||||
st.caption("Removes pending/approved/rejected jobs where remote is not set. Keeps anything already in the pipeline.")
|
st.caption("Removes pending/approved/rejected on-site listings from the DB.")
|
||||||
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
|
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
|
||||||
st.session_state["confirm_purge"] = "non_remote"
|
st.session_state["confirm_dz"] = "non_remote"
|
||||||
|
if st.session_state.get("confirm_dz") == "non_remote":
|
||||||
if st.session_state.get("confirm_purge") == "non_remote":
|
|
||||||
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
|
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
|
||||||
c1, c2 = st.columns(2)
|
_rc1, _rc2 = st.columns(2)
|
||||||
if c1.button("Yes, purge on-site", type="primary", use_container_width=True):
|
if _rc1.button("Yes, purge on-site", type="primary", use_container_width=True, key="dz_nonremote_confirm"):
|
||||||
deleted = purge_non_remote(get_db_path())
|
deleted = purge_non_remote(get_db_path())
|
||||||
st.success(f"Purged {deleted} non-remote jobs.")
|
st.success(f"Purged {deleted} non-remote jobs.")
|
||||||
st.session_state.pop("confirm_purge", None)
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if c2.button("Cancel ", use_container_width=True):
|
if _rc2.button("Cancel", use_container_width=True, key="dz_nonremote_cancel"):
|
||||||
st.session_state.pop("confirm_purge", None)
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
with approved_col:
|
with _rare3:
|
||||||
st.markdown("**Purge approved (unapplied)**")
|
st.markdown("**Wipe all + re-scrape**")
|
||||||
st.caption("Removes _approved_ jobs you haven't applied to yet — e.g. to reset after a review pass.")
|
st.caption("Deletes all non-applied jobs then immediately runs a fresh discovery.")
|
||||||
if st.button("🗑 Purge Approved", use_container_width=True):
|
if st.button("🔄 Wipe + Re-scrape", use_container_width=True):
|
||||||
st.session_state["confirm_purge"] = "approved_only"
|
st.session_state["confirm_dz"] = "rescrape"
|
||||||
|
if st.session_state.get("confirm_dz") == "rescrape":
|
||||||
if st.session_state.get("confirm_purge") == "approved_only":
|
st.warning("Wipes ALL pending, approved, and rejected jobs, then re-scrapes. Applied and synced records are kept.")
|
||||||
st.warning("Deletes all approved-but-not-applied jobs. Cannot be undone.")
|
_wc1, _wc2 = st.columns(2)
|
||||||
c1, c2 = st.columns(2)
|
if _wc1.button("Yes, wipe + scrape", type="primary", use_container_width=True, key="dz_rescrape_confirm"):
|
||||||
if c1.button("Yes, purge approved", type="primary", use_container_width=True):
|
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"])
|
||||||
deleted = purge_jobs(get_db_path(), statuses=["approved"])
|
submit_task(get_db_path(), "discovery", 0)
|
||||||
st.success(f"Purged {deleted} approved jobs.")
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if c2.button("Cancel ", use_container_width=True):
|
if _wc2.button("Cancel", use_container_width=True, key="dz_rescrape_cancel"):
|
||||||
st.session_state.pop("confirm_purge", None)
|
st.session_state.pop("confirm_dz", None)
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
st.divider()
|
|
||||||
|
|
||||||
archive_col1, archive_col2, _, _ = st.columns(4)
|
|
||||||
|
|
||||||
with archive_col1:
|
|
||||||
st.markdown("**Archive remaining**")
|
|
||||||
st.caption(
|
|
||||||
"Move all _pending_ and _rejected_ jobs to archived status. "
|
|
||||||
"Archived jobs stay in the DB for dedup — they just won't appear in Job Review."
|
|
||||||
)
|
|
||||||
if st.button("📦 Archive Pending + Rejected", use_container_width=True):
|
|
||||||
st.session_state["confirm_purge"] = "archive_remaining"
|
|
||||||
|
|
||||||
if st.session_state.get("confirm_purge") == "archive_remaining":
|
|
||||||
st.info("Jobs will be archived (not deleted) — URLs are kept for dedup.")
|
|
||||||
c1, c2 = st.columns(2)
|
|
||||||
if c1.button("Yes, archive", type="primary", use_container_width=True):
|
|
||||||
archived = archive_jobs(get_db_path(), statuses=["pending", "rejected"])
|
|
||||||
st.success(f"Archived {archived} jobs.")
|
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
|
||||||
if c2.button("Cancel ", use_container_width=True):
|
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
|
||||||
|
|
||||||
with archive_col2:
|
|
||||||
st.markdown("**Archive approved (unapplied)**")
|
|
||||||
st.caption("Archive _approved_ listings you decided to skip — keeps history without cluttering the apply queue.")
|
|
||||||
if st.button("📦 Archive Approved", use_container_width=True):
|
|
||||||
st.session_state["confirm_purge"] = "archive_approved"
|
|
||||||
|
|
||||||
if st.session_state.get("confirm_purge") == "archive_approved":
|
|
||||||
st.info("Approved jobs will be archived (not deleted).")
|
|
||||||
c1, c2 = st.columns(2)
|
|
||||||
if c1.button("Yes, archive approved", type="primary", use_container_width=True):
|
|
||||||
archived = archive_jobs(get_db_path(), statuses=["approved"])
|
|
||||||
st.success(f"Archived {archived} approved jobs.")
|
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
|
||||||
if c2.button("Cancel ", use_container_width=True):
|
|
||||||
st.session_state.pop("confirm_purge", None)
|
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
# ── Setup banners ─────────────────────────────────────────────────────────────
|
# ── Setup banners ─────────────────────────────────────────────────────────────
|
||||||
|
|
|
||||||
|
|
@ -17,10 +17,16 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s")
|
logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s")
|
||||||
|
|
||||||
|
# Load .env before any os.environ reads — safe to call inside Docker too
|
||||||
|
# (uses setdefault, so Docker-injected vars take precedence over .env values)
|
||||||
|
from circuitforge_core.config.settings import load_env as _load_env
|
||||||
|
_load_env(Path(__file__).parent.parent / ".env")
|
||||||
|
|
||||||
IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
|
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
|
||||||
|
from scripts.db_migrate import migrate_db
|
||||||
from app.feedback import inject_feedback_button
|
from app.feedback import inject_feedback_button
|
||||||
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
|
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
@ -36,6 +42,7 @@ st.set_page_config(
|
||||||
|
|
||||||
resolve_session("peregrine")
|
resolve_session("peregrine")
|
||||||
init_db(get_db_path())
|
init_db(get_db_path())
|
||||||
|
migrate_db(Path(get_db_path()))
|
||||||
|
|
||||||
# Demo tier — initialize once per session (cookie persistence handled client-side)
|
# Demo tier — initialize once per session (cookie persistence handled client-side)
|
||||||
if IS_DEMO and "simulated_tier" not in st.session_state:
|
if IS_DEMO and "simulated_tier" not in st.session_state:
|
||||||
|
|
|
||||||
|
|
@ -203,8 +203,16 @@ def get_config_dir() -> Path:
|
||||||
isolated and never shared across tenants.
|
isolated and never shared across tenants.
|
||||||
Local: repo-level config/ directory.
|
Local: repo-level config/ directory.
|
||||||
"""
|
"""
|
||||||
if CLOUD_MODE and st.session_state.get("db_path"):
|
if CLOUD_MODE:
|
||||||
return Path(st.session_state["db_path"]).parent / "config"
|
db_path = st.session_state.get("db_path")
|
||||||
|
if db_path:
|
||||||
|
return Path(db_path).parent / "config"
|
||||||
|
# Session not resolved yet (resolve_session() should have called st.stop() already).
|
||||||
|
# Return an isolated empty temp dir rather than the repo config, which may contain
|
||||||
|
# another user's data baked into the image.
|
||||||
|
_safe = Path("/tmp/peregrine-cloud-noconfig")
|
||||||
|
_safe.mkdir(exist_ok=True)
|
||||||
|
return _safe
|
||||||
return Path(__file__).parent.parent / "config"
|
return Path(__file__).parent.parent / "config"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,13 @@ _DEMO_MODE = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
||||||
# Example: PEREGRINE_VUE_URL=http://localhost:8506
|
# Example: PEREGRINE_VUE_URL=http://localhost:8506
|
||||||
_VUE_URL = os.environ.get("PEREGRINE_VUE_URL", "").strip().rstrip("/")
|
_VUE_URL = os.environ.get("PEREGRINE_VUE_URL", "").strip().rstrip("/")
|
||||||
|
|
||||||
|
# When True, a window.location.reload() after setting prgn_ui=vue will be
|
||||||
|
# intercepted by Caddy and routed to the Vue SPA. When False (no Caddy in the
|
||||||
|
# traffic path — e.g. test instances, direct Docker exposure), reloading just
|
||||||
|
# comes back to Streamlit and creates an infinite loop. Only set this in
|
||||||
|
# production/staging compose files where Caddy is actually in front.
|
||||||
|
_CADDY_PROXY = os.environ.get("PEREGRINE_CADDY_PROXY", "").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
_COOKIE_JS = """
|
_COOKIE_JS = """
|
||||||
<script>
|
<script>
|
||||||
(function() {{
|
(function() {{
|
||||||
|
|
@ -50,14 +57,18 @@ def _set_cookie_js(value: str, navigate: bool = False) -> None:
|
||||||
port. Without this, reload() just sends the request back to the same
|
port. Without this, reload() just sends the request back to the same
|
||||||
Streamlit port with no router in between to inspect the cookie.
|
Streamlit port with no router in between to inspect the cookie.
|
||||||
|
|
||||||
When PEREGRINE_VUE_URL is absent (Caddy deployment): navigate=True
|
When PEREGRINE_CADDY_PROXY is set (production/staging): navigate=True
|
||||||
triggers window.location.reload() so Caddy sees the updated cookie on
|
triggers window.location.reload() so Caddy sees the updated cookie on
|
||||||
the next HTTP request and routes accordingly.
|
the next HTTP request and routes accordingly.
|
||||||
|
|
||||||
|
When neither is set (test instances, bare Docker): navigate is suppressed
|
||||||
|
entirely — the cookie is written silently, but no reload is attempted.
|
||||||
|
Reloading without a proxy just bounces back to Streamlit and loops.
|
||||||
"""
|
"""
|
||||||
# components.html() renders in an iframe — window.parent navigates the host page
|
# components.html() renders in an iframe — window.parent navigates the host page
|
||||||
if navigate and value == "vue" and _VUE_URL:
|
if navigate and value == "vue" and _VUE_URL:
|
||||||
nav_js = f"window.parent.location.href = '{_VUE_URL}';"
|
nav_js = f"window.parent.location.href = '{_VUE_URL}';"
|
||||||
elif navigate:
|
elif navigate and _CADDY_PROXY:
|
||||||
nav_js = "window.parent.location.reload();"
|
nav_js = "window.parent.location.reload();"
|
||||||
else:
|
else:
|
||||||
nav_js = ""
|
nav_js = ""
|
||||||
|
|
@ -113,12 +124,6 @@ def sync_ui_cookie(yaml_path: Path, tier: str) -> None:
|
||||||
# UI components must not crash the app — silent fallback to default
|
# UI components must not crash the app — silent fallback to default
|
||||||
pref = "streamlit"
|
pref = "streamlit"
|
||||||
|
|
||||||
# Demo mode: Vue SPA has no demo data wiring — always serve Streamlit.
|
|
||||||
# (The tier downgrade check below is skipped in demo mode, but we must
|
|
||||||
# also block the Vue navigation itself so Caddy doesn't route to a blank SPA.)
|
|
||||||
if pref == "vue" and _DEMO_MODE:
|
|
||||||
pref = "streamlit"
|
|
||||||
|
|
||||||
# Tier downgrade protection (skip in demo — demo bypasses tier gate)
|
# Tier downgrade protection (skip in demo — demo bypasses tier gate)
|
||||||
if pref == "vue" and not _DEMO_MODE and not can_use(tier, "vue_ui_beta"):
|
if pref == "vue" and not _DEMO_MODE and not can_use(tier, "vue_ui_beta"):
|
||||||
if profile is not None:
|
if profile is not None:
|
||||||
|
|
@ -189,7 +194,7 @@ def render_banner(yaml_path: Path, tier: str) -> None:
|
||||||
|
|
||||||
col1, col2, col3 = st.columns([8, 1, 1])
|
col1, col2, col3 = st.columns([8, 1, 1])
|
||||||
with col1:
|
with col1:
|
||||||
st.info("✨ **New Peregrine UI available** — try the modern Vue interface (Beta, Paid tier)")
|
st.info("✨ **New Peregrine UI available** — try the modern Vue interface (Beta)")
|
||||||
with col2:
|
with col2:
|
||||||
if st.button("Try it", key="_ui_banner_try"):
|
if st.button("Try it", key="_ui_banner_try"):
|
||||||
switch_ui(yaml_path, to="vue", tier=tier)
|
switch_ui(yaml_path, to="vue", tier=tier)
|
||||||
|
|
|
||||||
|
|
@ -457,6 +457,11 @@ elif step == 5:
|
||||||
from app.wizard.step_inference import validate
|
from app.wizard.step_inference import validate
|
||||||
|
|
||||||
st.subheader("Step 5 \u2014 Inference & API Keys")
|
st.subheader("Step 5 \u2014 Inference & API Keys")
|
||||||
|
st.info(
|
||||||
|
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
|
||||||
|
"Peregrine auto-detects it, no config file needed. "
|
||||||
|
"Or use the fields below to configure API keys and endpoints."
|
||||||
|
)
|
||||||
profile = saved_yaml.get("inference_profile", "remote")
|
profile = saved_yaml.get("inference_profile", "remote")
|
||||||
|
|
||||||
if profile == "remote":
|
if profile == "remote":
|
||||||
|
|
@ -466,8 +471,18 @@ elif step == 5:
|
||||||
placeholder="https://api.together.xyz/v1")
|
placeholder="https://api.together.xyz/v1")
|
||||||
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
|
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
|
||||||
key="oai_key") if openai_url else ""
|
key="oai_key") if openai_url else ""
|
||||||
|
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
|
||||||
|
placeholder="http://localhost:11434",
|
||||||
|
key="ollama_host_input")
|
||||||
|
ollama_model = st.text_input("Ollama model (optional)",
|
||||||
|
value="llama3.2:3b",
|
||||||
|
key="ollama_model_input")
|
||||||
else:
|
else:
|
||||||
st.info(f"Local mode ({profile}): Ollama provides inference.")
|
st.info(f"Local mode ({profile}): Ollama provides inference.")
|
||||||
|
import os
|
||||||
|
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
|
||||||
|
if _ollama_host_env:
|
||||||
|
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
|
||||||
anthropic_key = openai_url = openai_key = ""
|
anthropic_key = openai_url = openai_key = ""
|
||||||
|
|
||||||
with st.expander("Advanced \u2014 Service Ports & Hosts"):
|
with st.expander("Advanced \u2014 Service Ports & Hosts"):
|
||||||
|
|
@ -546,6 +561,14 @@ elif step == 5:
|
||||||
if anthropic_key or openai_url:
|
if anthropic_key or openai_url:
|
||||||
env_path.write_text("\n".join(env_lines) + "\n")
|
env_path.write_text("\n".join(env_lines) + "\n")
|
||||||
|
|
||||||
|
if profile == "remote":
|
||||||
|
if ollama_host:
|
||||||
|
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
|
||||||
|
if ollama_model:
|
||||||
|
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
|
||||||
|
if ollama_host or ollama_model:
|
||||||
|
env_path.write_text("\n".join(env_lines) + "\n")
|
||||||
|
|
||||||
_save_yaml({"services": svc, "wizard_step": 5})
|
_save_yaml({"services": svc, "wizard_step": 5})
|
||||||
st.session_state.wizard_step = 6
|
st.session_state.wizard_step = 6
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
@ -631,7 +654,7 @@ elif step == 6:
|
||||||
)
|
)
|
||||||
default_profile = {
|
default_profile = {
|
||||||
"name": "default",
|
"name": "default",
|
||||||
"job_titles": titles,
|
"titles": titles,
|
||||||
"locations": locations,
|
"locations": locations,
|
||||||
"remote_only": False,
|
"remote_only": False,
|
||||||
"boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],
|
"boards": ["linkedin", "indeed", "glassdoor", "zip_recruiter"],
|
||||||
|
|
|
||||||
|
|
@ -12,12 +12,15 @@ from scripts.db import (
|
||||||
DEFAULT_DB, init_db, get_jobs_by_status, update_job_status,
|
DEFAULT_DB, init_db, get_jobs_by_status, update_job_status,
|
||||||
update_cover_letter, mark_applied, get_email_leads,
|
update_cover_letter, mark_applied, get_email_leads,
|
||||||
)
|
)
|
||||||
|
from app.cloud_session import resolve_session, get_db_path
|
||||||
|
|
||||||
|
resolve_session("peregrine")
|
||||||
|
|
||||||
st.title("📋 Job Review")
|
st.title("📋 Job Review")
|
||||||
|
|
||||||
init_db(DEFAULT_DB)
|
init_db(get_db_path())
|
||||||
|
|
||||||
_email_leads = get_email_leads(DEFAULT_DB)
|
_email_leads = get_email_leads(get_db_path())
|
||||||
|
|
||||||
# ── Sidebar filters ────────────────────────────────────────────────────────────
|
# ── Sidebar filters ────────────────────────────────────────────────────────────
|
||||||
with st.sidebar:
|
with st.sidebar:
|
||||||
|
|
@ -37,7 +40,7 @@ with st.sidebar:
|
||||||
index=0,
|
index=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
jobs = get_jobs_by_status(DEFAULT_DB, show_status)
|
jobs = get_jobs_by_status(get_db_path(), show_status)
|
||||||
|
|
||||||
if remote_only:
|
if remote_only:
|
||||||
jobs = [j for j in jobs if j.get("is_remote")]
|
jobs = [j for j in jobs if j.get("is_remote")]
|
||||||
|
|
@ -86,11 +89,11 @@ if show_status == "pending" and _email_leads:
|
||||||
with right_l:
|
with right_l:
|
||||||
if st.button("✅ Approve", key=f"el_approve_{lead_id}",
|
if st.button("✅ Approve", key=f"el_approve_{lead_id}",
|
||||||
type="primary", use_container_width=True):
|
type="primary", use_container_width=True):
|
||||||
update_job_status(DEFAULT_DB, [lead_id], "approved")
|
update_job_status(get_db_path(), [lead_id], "approved")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if st.button("❌ Reject", key=f"el_reject_{lead_id}",
|
if st.button("❌ Reject", key=f"el_reject_{lead_id}",
|
||||||
use_container_width=True):
|
use_container_width=True):
|
||||||
update_job_status(DEFAULT_DB, [lead_id], "rejected")
|
update_job_status(get_db_path(), [lead_id], "rejected")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
st.divider()
|
st.divider()
|
||||||
|
|
||||||
|
|
@ -162,7 +165,7 @@ for job in jobs:
|
||||||
)
|
)
|
||||||
save_col, _ = st.columns([2, 5])
|
save_col, _ = st.columns([2, 5])
|
||||||
if save_col.button("💾 Save draft", key=f"save_cl_{job_id}"):
|
if save_col.button("💾 Save draft", key=f"save_cl_{job_id}"):
|
||||||
update_cover_letter(DEFAULT_DB, job_id, st.session_state[_cl_key])
|
update_cover_letter(get_db_path(), job_id, st.session_state[_cl_key])
|
||||||
st.success("Saved!")
|
st.success("Saved!")
|
||||||
|
|
||||||
# Applied date + cover letter preview (applied/synced)
|
# Applied date + cover letter preview (applied/synced)
|
||||||
|
|
@ -182,11 +185,11 @@ for job in jobs:
|
||||||
if show_status == "pending":
|
if show_status == "pending":
|
||||||
if st.button("✅ Approve", key=f"approve_{job_id}",
|
if st.button("✅ Approve", key=f"approve_{job_id}",
|
||||||
type="primary", use_container_width=True):
|
type="primary", use_container_width=True):
|
||||||
update_job_status(DEFAULT_DB, [job_id], "approved")
|
update_job_status(get_db_path(), [job_id], "approved")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
if st.button("❌ Reject", key=f"reject_{job_id}",
|
if st.button("❌ Reject", key=f"reject_{job_id}",
|
||||||
use_container_width=True):
|
use_container_width=True):
|
||||||
update_job_status(DEFAULT_DB, [job_id], "rejected")
|
update_job_status(get_db_path(), [job_id], "rejected")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
elif show_status == "approved":
|
elif show_status == "approved":
|
||||||
|
|
@ -198,6 +201,6 @@ for job in jobs:
|
||||||
use_container_width=True):
|
use_container_width=True):
|
||||||
cl_text = st.session_state.get(f"cl_{job_id}", "")
|
cl_text = st.session_state.get(f"cl_{job_id}", "")
|
||||||
if cl_text:
|
if cl_text:
|
||||||
update_cover_letter(DEFAULT_DB, job_id, cl_text)
|
update_cover_letter(get_db_path(), job_id, cl_text)
|
||||||
mark_applied(DEFAULT_DB, [job_id])
|
mark_applied(get_db_path(), [job_id])
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
|
||||||
|
|
@ -401,21 +401,31 @@ with tab_search:
|
||||||
with st.spinner("Asking LLM for suggestions…"):
|
with st.spinner("Asking LLM for suggestions…"):
|
||||||
try:
|
try:
|
||||||
suggestions = _suggest_search_terms(_current_titles, RESUME_PATH, _blocklist, _user_profile)
|
suggestions = _suggest_search_terms(_current_titles, RESUME_PATH, _blocklist, _user_profile)
|
||||||
except RuntimeError as _e:
|
except Exception as _e:
|
||||||
|
_err_msg = str(_e)
|
||||||
|
if "exhausted" in _err_msg.lower() or isinstance(_e, RuntimeError):
|
||||||
st.warning(
|
st.warning(
|
||||||
f"No LLM backend available: {_e}. "
|
f"No LLM backend available: {_err_msg}. "
|
||||||
"Check that Ollama is running and has GPU access, or enable a cloud backend in Settings → System → LLM.",
|
"Check that Ollama is running and has GPU access, or enable a cloud backend in Settings → System → LLM.",
|
||||||
icon="⚠️",
|
icon="⚠️",
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
st.error(f"Suggestion failed: {_err_msg}", icon="🚨")
|
||||||
suggestions = None
|
suggestions = None
|
||||||
if suggestions is not None:
|
if suggestions is not None:
|
||||||
# Add suggested titles to options list (not auto-selected — user picks from dropdown)
|
# Add suggested titles to options list (not auto-selected — user picks from dropdown)
|
||||||
_opts = list(st.session_state.get("_sp_title_options", []))
|
_opts = list(st.session_state.get("_sp_title_options", []))
|
||||||
for _t in suggestions.get("suggested_titles", []):
|
_new_titles = [_t for _t in suggestions.get("suggested_titles", []) if _t not in _opts]
|
||||||
if _t not in _opts:
|
_opts.extend(_new_titles)
|
||||||
_opts.append(_t)
|
|
||||||
st.session_state["_sp_title_options"] = _opts
|
st.session_state["_sp_title_options"] = _opts
|
||||||
st.session_state["_sp_suggestions"] = suggestions
|
st.session_state["_sp_suggestions"] = suggestions
|
||||||
|
if not _new_titles and not suggestions.get("suggested_excludes"):
|
||||||
|
_resume_hint = " Upload your resume in Settings → Resume Profile for better results." if not RESUME_PATH.exists() else ""
|
||||||
|
st.info(
|
||||||
|
f"No new suggestions found — the LLM didn't generate anything new for these titles.{_resume_hint}",
|
||||||
|
icon="ℹ️",
|
||||||
|
)
|
||||||
|
else:
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
if st.session_state.get("_sp_suggestions"):
|
if st.session_state.get("_sp_suggestions"):
|
||||||
|
|
|
||||||
|
|
@ -15,28 +15,28 @@ import streamlit.components.v1 as components
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from scripts.user_profile import UserProfile
|
from scripts.user_profile import UserProfile
|
||||||
|
|
||||||
_USER_YAML = Path(__file__).parent.parent.parent / "config" / "user.yaml"
|
|
||||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
|
||||||
_name = _profile.name if _profile else "Job Seeker"
|
|
||||||
|
|
||||||
from scripts.db import (
|
from scripts.db import (
|
||||||
DEFAULT_DB, init_db, get_jobs_by_status,
|
DEFAULT_DB, init_db, get_jobs_by_status,
|
||||||
update_cover_letter, mark_applied, update_job_status,
|
update_cover_letter, mark_applied, update_job_status,
|
||||||
get_task_for_job,
|
get_task_for_job,
|
||||||
)
|
)
|
||||||
from scripts.task_runner import submit_task
|
from scripts.task_runner import submit_task
|
||||||
from app.cloud_session import resolve_session, get_db_path
|
from app.cloud_session import resolve_session, get_db_path, get_config_dir
|
||||||
from app.telemetry import log_usage_event
|
from app.telemetry import log_usage_event
|
||||||
|
|
||||||
DOCS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
|
||||||
RESUME_YAML = Path(__file__).parent.parent.parent / "config" / "plain_text_resume.yaml"
|
|
||||||
|
|
||||||
st.title("🚀 Apply Workspace")
|
st.title("🚀 Apply Workspace")
|
||||||
|
|
||||||
resolve_session("peregrine")
|
resolve_session("peregrine")
|
||||||
init_db(get_db_path())
|
init_db(get_db_path())
|
||||||
|
|
||||||
|
_CONFIG_DIR = get_config_dir()
|
||||||
|
_USER_YAML = _CONFIG_DIR / "user.yaml"
|
||||||
|
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||||
|
_name = _profile.name if _profile else "Job Seeker"
|
||||||
|
|
||||||
|
DOCS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearch"
|
||||||
|
RESUME_YAML = _CONFIG_DIR / "plain_text_resume.yaml"
|
||||||
|
|
||||||
# ── PDF generation ─────────────────────────────────────────────────────────────
|
# ── PDF generation ─────────────────────────────────────────────────────────────
|
||||||
def _make_cover_letter_pdf(job: dict, cover_letter: str, output_dir: Path) -> Path:
|
def _make_cover_letter_pdf(job: dict, cover_letter: str, output_dir: Path) -> Path:
|
||||||
from reportlab.lib.pagesizes import letter
|
from reportlab.lib.pagesizes import letter
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,9 @@ from scripts.db import (
|
||||||
get_unread_stage_signals, dismiss_stage_signal,
|
get_unread_stage_signals, dismiss_stage_signal,
|
||||||
)
|
)
|
||||||
from scripts.task_runner import submit_task
|
from scripts.task_runner import submit_task
|
||||||
|
from app.cloud_session import resolve_session, get_db_path
|
||||||
|
|
||||||
|
resolve_session("peregrine")
|
||||||
|
|
||||||
_CONFIG_DIR = Path(__file__).parent.parent.parent / "config"
|
_CONFIG_DIR = Path(__file__).parent.parent.parent / "config"
|
||||||
_CALENDAR_INTEGRATIONS = ("apple_calendar", "google_calendar")
|
_CALENDAR_INTEGRATIONS = ("apple_calendar", "google_calendar")
|
||||||
|
|
@ -46,23 +49,23 @@ _calendar_connected = any(
|
||||||
|
|
||||||
st.title("🎯 Interviews")
|
st.title("🎯 Interviews")
|
||||||
|
|
||||||
init_db(DEFAULT_DB)
|
init_db(get_db_path())
|
||||||
|
|
||||||
# ── Sidebar: Email sync ────────────────────────────────────────────────────────
|
# ── Sidebar: Email sync ────────────────────────────────────────────────────────
|
||||||
with st.sidebar:
|
with st.sidebar:
|
||||||
st.markdown("### 📧 Email Sync")
|
st.markdown("### 📧 Email Sync")
|
||||||
_email_task = get_task_for_job(DEFAULT_DB, "email_sync", 0)
|
_email_task = get_task_for_job(get_db_path(), "email_sync", 0)
|
||||||
_email_running = _email_task and _email_task["status"] in ("queued", "running")
|
_email_running = _email_task and _email_task["status"] in ("queued", "running")
|
||||||
|
|
||||||
if st.button("🔄 Sync Emails", use_container_width=True, type="primary",
|
if st.button("🔄 Sync Emails", use_container_width=True, type="primary",
|
||||||
disabled=bool(_email_running)):
|
disabled=bool(_email_running)):
|
||||||
submit_task(DEFAULT_DB, "email_sync", 0)
|
submit_task(get_db_path(), "email_sync", 0)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
if _email_running:
|
if _email_running:
|
||||||
@st.fragment(run_every=4)
|
@st.fragment(run_every=4)
|
||||||
def _email_sidebar_status():
|
def _email_sidebar_status():
|
||||||
t = get_task_for_job(DEFAULT_DB, "email_sync", 0)
|
t = get_task_for_job(get_db_path(), "email_sync", 0)
|
||||||
if t and t["status"] in ("queued", "running"):
|
if t and t["status"] in ("queued", "running"):
|
||||||
st.info("⏳ Syncing…")
|
st.info("⏳ Syncing…")
|
||||||
else:
|
else:
|
||||||
|
|
@ -99,7 +102,7 @@ STAGE_NEXT_LABEL = {
|
||||||
}
|
}
|
||||||
|
|
||||||
# ── Data ──────────────────────────────────────────────────────────────────────
|
# ── Data ──────────────────────────────────────────────────────────────────────
|
||||||
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
|
jobs_by_stage = get_interview_jobs(get_db_path())
|
||||||
|
|
||||||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||||
def _days_ago(date_str: str | None) -> str:
|
def _days_ago(date_str: str | None) -> str:
|
||||||
|
|
@ -120,8 +123,8 @@ def _days_ago(date_str: str | None) -> str:
|
||||||
def _research_modal(job: dict) -> None:
|
def _research_modal(job: dict) -> None:
|
||||||
job_id = job["id"]
|
job_id = job["id"]
|
||||||
st.caption(f"**{job.get('company')}** — {job.get('title')}")
|
st.caption(f"**{job.get('company')}** — {job.get('title')}")
|
||||||
research = get_research(DEFAULT_DB, job_id=job_id)
|
research = get_research(get_db_path(), job_id=job_id)
|
||||||
task = get_task_for_job(DEFAULT_DB, "company_research", job_id)
|
task = get_task_for_job(get_db_path(), "company_research", job_id)
|
||||||
running = task and task["status"] in ("queued", "running")
|
running = task and task["status"] in ("queued", "running")
|
||||||
|
|
||||||
if running:
|
if running:
|
||||||
|
|
@ -144,7 +147,7 @@ def _research_modal(job: dict) -> None:
|
||||||
"inaccuracies. SearXNG is now available — re-run to get verified facts."
|
"inaccuracies. SearXNG is now available — re-run to get verified facts."
|
||||||
)
|
)
|
||||||
if st.button("🔄 Re-run with live data", key=f"modal_rescrape_{job_id}", type="primary"):
|
if st.button("🔄 Re-run with live data", key=f"modal_rescrape_{job_id}", type="primary"):
|
||||||
submit_task(DEFAULT_DB, "company_research", job_id)
|
submit_task(get_db_path(), "company_research", job_id)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
st.divider()
|
st.divider()
|
||||||
else:
|
else:
|
||||||
|
|
@ -160,14 +163,14 @@ def _research_modal(job: dict) -> None:
|
||||||
)
|
)
|
||||||
st.markdown(research["raw_output"])
|
st.markdown(research["raw_output"])
|
||||||
if st.button("🔄 Refresh", key=f"modal_regen_{job_id}", disabled=bool(running)):
|
if st.button("🔄 Refresh", key=f"modal_regen_{job_id}", disabled=bool(running)):
|
||||||
submit_task(DEFAULT_DB, "company_research", job_id)
|
submit_task(get_db_path(), "company_research", job_id)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
else:
|
else:
|
||||||
st.info("No research brief yet.")
|
st.info("No research brief yet.")
|
||||||
if task and task["status"] == "failed":
|
if task and task["status"] == "failed":
|
||||||
st.error(f"Last attempt failed: {task.get('error', '')}")
|
st.error(f"Last attempt failed: {task.get('error', '')}")
|
||||||
if st.button("🔬 Generate now", key=f"modal_gen_{job_id}"):
|
if st.button("🔬 Generate now", key=f"modal_gen_{job_id}"):
|
||||||
submit_task(DEFAULT_DB, "company_research", job_id)
|
submit_task(get_db_path(), "company_research", job_id)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -175,7 +178,7 @@ def _research_modal(job: dict) -> None:
|
||||||
def _email_modal(job: dict) -> None:
|
def _email_modal(job: dict) -> None:
|
||||||
job_id = job["id"]
|
job_id = job["id"]
|
||||||
st.caption(f"**{job.get('company')}** — {job.get('title')}")
|
st.caption(f"**{job.get('company')}** — {job.get('title')}")
|
||||||
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
|
contacts = get_contacts(get_db_path(), job_id=job_id)
|
||||||
|
|
||||||
if not contacts:
|
if not contacts:
|
||||||
st.info("No emails logged yet. Use the form below to add one.")
|
st.info("No emails logged yet. Use the form below to add one.")
|
||||||
|
|
@ -246,7 +249,7 @@ def _email_modal(job: dict) -> None:
|
||||||
body_text = st.text_area("Body / notes", height=80, key=f"body_modal_{job_id}")
|
body_text = st.text_area("Body / notes", height=80, key=f"body_modal_{job_id}")
|
||||||
if st.form_submit_button("📧 Save contact"):
|
if st.form_submit_button("📧 Save contact"):
|
||||||
add_contact(
|
add_contact(
|
||||||
DEFAULT_DB, job_id=job_id,
|
get_db_path(), job_id=job_id,
|
||||||
direction=direction, subject=subject,
|
direction=direction, subject=subject,
|
||||||
from_addr=from_addr, body=body_text, received_at=recv_at,
|
from_addr=from_addr, body=body_text, received_at=recv_at,
|
||||||
)
|
)
|
||||||
|
|
@ -255,7 +258,7 @@ def _email_modal(job: dict) -> None:
|
||||||
def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
||||||
"""Render a single job card appropriate for the given stage."""
|
"""Render a single job card appropriate for the given stage."""
|
||||||
job_id = job["id"]
|
job_id = job["id"]
|
||||||
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
|
contacts = get_contacts(get_db_path(), job_id=job_id)
|
||||||
last_contact = contacts[-1] if contacts else None
|
last_contact = contacts[-1] if contacts else None
|
||||||
|
|
||||||
with st.container(border=True):
|
with st.container(border=True):
|
||||||
|
|
@ -278,7 +281,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
||||||
format="YYYY-MM-DD",
|
format="YYYY-MM-DD",
|
||||||
)
|
)
|
||||||
if st.form_submit_button("📅 Save date"):
|
if st.form_submit_button("📅 Save date"):
|
||||||
set_interview_date(DEFAULT_DB, job_id=job_id, date_str=str(new_date))
|
set_interview_date(get_db_path(), job_id=job_id, date_str=str(new_date))
|
||||||
st.success("Saved!")
|
st.success("Saved!")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
|
|
@ -288,7 +291,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
||||||
_cal_label = "🔄 Update Calendar" if _has_event else "📅 Add to Calendar"
|
_cal_label = "🔄 Update Calendar" if _has_event else "📅 Add to Calendar"
|
||||||
if st.button(_cal_label, key=f"cal_push_{job_id}", use_container_width=True):
|
if st.button(_cal_label, key=f"cal_push_{job_id}", use_container_width=True):
|
||||||
from scripts.calendar_push import push_interview_event
|
from scripts.calendar_push import push_interview_event
|
||||||
result = push_interview_event(DEFAULT_DB, job_id=job_id, config_dir=_CONFIG_DIR)
|
result = push_interview_event(get_db_path(), job_id=job_id, config_dir=_CONFIG_DIR)
|
||||||
if result["ok"]:
|
if result["ok"]:
|
||||||
st.success(f"Event {'updated' if _has_event else 'added'} ({result['provider'].replace('_', ' ').title()})")
|
st.success(f"Event {'updated' if _has_event else 'added'} ({result['provider'].replace('_', ' ').title()})")
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
@ -297,7 +300,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
||||||
|
|
||||||
if not compact:
|
if not compact:
|
||||||
if stage in ("applied", "phone_screen", "interviewing"):
|
if stage in ("applied", "phone_screen", "interviewing"):
|
||||||
signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
|
signals = get_unread_stage_signals(get_db_path(), job_id=job_id)
|
||||||
if signals:
|
if signals:
|
||||||
sig = signals[-1]
|
sig = signals[-1]
|
||||||
_SIGNAL_TO_STAGE = {
|
_SIGNAL_TO_STAGE = {
|
||||||
|
|
@ -318,23 +321,23 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
||||||
if sig["stage_signal"] == "rejected":
|
if sig["stage_signal"] == "rejected":
|
||||||
if b1.button("✗ Reject", key=f"sig_rej_{sig['id']}",
|
if b1.button("✗ Reject", key=f"sig_rej_{sig['id']}",
|
||||||
use_container_width=True):
|
use_container_width=True):
|
||||||
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
|
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
|
||||||
dismiss_stage_signal(DEFAULT_DB, sig["id"])
|
dismiss_stage_signal(get_db_path(), sig["id"])
|
||||||
st.rerun(scope="app")
|
st.rerun(scope="app")
|
||||||
elif target_stage and b1.button(
|
elif target_stage and b1.button(
|
||||||
f"→ {target_label}", key=f"sig_adv_{sig['id']}",
|
f"→ {target_label}", key=f"sig_adv_{sig['id']}",
|
||||||
use_container_width=True, type="primary",
|
use_container_width=True, type="primary",
|
||||||
):
|
):
|
||||||
if target_stage == "phone_screen" and stage == "applied":
|
if target_stage == "phone_screen" and stage == "applied":
|
||||||
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
|
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
|
||||||
submit_task(DEFAULT_DB, "company_research", job_id)
|
submit_task(get_db_path(), "company_research", job_id)
|
||||||
elif target_stage:
|
elif target_stage:
|
||||||
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
|
advance_to_stage(get_db_path(), job_id=job_id, stage=target_stage)
|
||||||
dismiss_stage_signal(DEFAULT_DB, sig["id"])
|
dismiss_stage_signal(get_db_path(), sig["id"])
|
||||||
st.rerun(scope="app")
|
st.rerun(scope="app")
|
||||||
if b2.button("Dismiss", key=f"sig_dis_{sig['id']}",
|
if b2.button("Dismiss", key=f"sig_dis_{sig['id']}",
|
||||||
use_container_width=True):
|
use_container_width=True):
|
||||||
dismiss_stage_signal(DEFAULT_DB, sig["id"])
|
dismiss_stage_signal(get_db_path(), sig["id"])
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
# Advance / Reject buttons
|
# Advance / Reject buttons
|
||||||
|
|
@ -346,16 +349,16 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
||||||
f"→ {next_label}", key=f"adv_{job_id}",
|
f"→ {next_label}", key=f"adv_{job_id}",
|
||||||
use_container_width=True, type="primary",
|
use_container_width=True, type="primary",
|
||||||
):
|
):
|
||||||
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=next_stage)
|
advance_to_stage(get_db_path(), job_id=job_id, stage=next_stage)
|
||||||
if next_stage == "phone_screen":
|
if next_stage == "phone_screen":
|
||||||
submit_task(DEFAULT_DB, "company_research", job_id)
|
submit_task(get_db_path(), "company_research", job_id)
|
||||||
st.rerun(scope="app") # full rerun — card must appear in new column
|
st.rerun(scope="app") # full rerun — card must appear in new column
|
||||||
|
|
||||||
if c2.button(
|
if c2.button(
|
||||||
"✗ Reject", key=f"rej_{job_id}",
|
"✗ Reject", key=f"rej_{job_id}",
|
||||||
use_container_width=True,
|
use_container_width=True,
|
||||||
):
|
):
|
||||||
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
|
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
|
||||||
st.rerun() # fragment-scope rerun — card disappears without scroll-to-top
|
st.rerun() # fragment-scope rerun — card disappears without scroll-to-top
|
||||||
|
|
||||||
if job.get("url"):
|
if job.get("url"):
|
||||||
|
|
@ -385,7 +388,7 @@ def _render_card(job: dict, stage: str, compact: bool = False) -> None:
|
||||||
@st.fragment
|
@st.fragment
|
||||||
def _card_fragment(job_id: int, stage: str) -> None:
|
def _card_fragment(job_id: int, stage: str) -> None:
|
||||||
"""Re-fetches the job on each fragment rerun; renders nothing if moved/rejected."""
|
"""Re-fetches the job on each fragment rerun; renders nothing if moved/rejected."""
|
||||||
job = get_job_by_id(DEFAULT_DB, job_id)
|
job = get_job_by_id(get_db_path(), job_id)
|
||||||
if job is None or job.get("status") != stage:
|
if job is None or job.get("status") != stage:
|
||||||
return
|
return
|
||||||
_render_card(job, stage)
|
_render_card(job, stage)
|
||||||
|
|
@ -394,11 +397,11 @@ def _card_fragment(job_id: int, stage: str) -> None:
|
||||||
@st.fragment
|
@st.fragment
|
||||||
def _pre_kanban_row_fragment(job_id: int) -> None:
|
def _pre_kanban_row_fragment(job_id: int) -> None:
|
||||||
"""Pre-kanban compact row for applied and survey-stage jobs."""
|
"""Pre-kanban compact row for applied and survey-stage jobs."""
|
||||||
job = get_job_by_id(DEFAULT_DB, job_id)
|
job = get_job_by_id(get_db_path(), job_id)
|
||||||
if job is None or job.get("status") not in ("applied", "survey"):
|
if job is None or job.get("status") not in ("applied", "survey"):
|
||||||
return
|
return
|
||||||
stage = job["status"]
|
stage = job["status"]
|
||||||
contacts = get_contacts(DEFAULT_DB, job_id=job_id)
|
contacts = get_contacts(get_db_path(), job_id=job_id)
|
||||||
last_contact = contacts[-1] if contacts else None
|
last_contact = contacts[-1] if contacts else None
|
||||||
|
|
||||||
with st.container(border=True):
|
with st.container(border=True):
|
||||||
|
|
@ -414,7 +417,7 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
|
||||||
_email_modal(job)
|
_email_modal(job)
|
||||||
|
|
||||||
# Stage signal hint (email-detected next steps)
|
# Stage signal hint (email-detected next steps)
|
||||||
signals = get_unread_stage_signals(DEFAULT_DB, job_id=job_id)
|
signals = get_unread_stage_signals(get_db_path(), job_id=job_id)
|
||||||
if signals:
|
if signals:
|
||||||
sig = signals[-1]
|
sig = signals[-1]
|
||||||
_SIGNAL_TO_STAGE = {
|
_SIGNAL_TO_STAGE = {
|
||||||
|
|
@ -437,15 +440,15 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
|
||||||
use_container_width=True, type="primary",
|
use_container_width=True, type="primary",
|
||||||
):
|
):
|
||||||
if target_stage == "phone_screen":
|
if target_stage == "phone_screen":
|
||||||
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
|
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
|
||||||
submit_task(DEFAULT_DB, "company_research", job_id)
|
submit_task(get_db_path(), "company_research", job_id)
|
||||||
else:
|
else:
|
||||||
advance_to_stage(DEFAULT_DB, job_id=job_id, stage=target_stage)
|
advance_to_stage(get_db_path(), job_id=job_id, stage=target_stage)
|
||||||
dismiss_stage_signal(DEFAULT_DB, sig["id"])
|
dismiss_stage_signal(get_db_path(), sig["id"])
|
||||||
st.rerun(scope="app")
|
st.rerun(scope="app")
|
||||||
if s2.button("Dismiss", key=f"sig_dis_pre_{sig['id']}",
|
if s2.button("Dismiss", key=f"sig_dis_pre_{sig['id']}",
|
||||||
use_container_width=True):
|
use_container_width=True):
|
||||||
dismiss_stage_signal(DEFAULT_DB, sig["id"])
|
dismiss_stage_signal(get_db_path(), sig["id"])
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
with right:
|
with right:
|
||||||
|
|
@ -453,24 +456,24 @@ def _pre_kanban_row_fragment(job_id: int) -> None:
|
||||||
"→ 📞 Phone Screen", key=f"adv_pre_{job_id}",
|
"→ 📞 Phone Screen", key=f"adv_pre_{job_id}",
|
||||||
use_container_width=True, type="primary",
|
use_container_width=True, type="primary",
|
||||||
):
|
):
|
||||||
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="phone_screen")
|
advance_to_stage(get_db_path(), job_id=job_id, stage="phone_screen")
|
||||||
submit_task(DEFAULT_DB, "company_research", job_id)
|
submit_task(get_db_path(), "company_research", job_id)
|
||||||
st.rerun(scope="app")
|
st.rerun(scope="app")
|
||||||
col_a, col_b = st.columns(2)
|
col_a, col_b = st.columns(2)
|
||||||
if stage == "applied" and col_a.button(
|
if stage == "applied" and col_a.button(
|
||||||
"📋 Survey", key=f"to_survey_{job_id}", use_container_width=True,
|
"📋 Survey", key=f"to_survey_{job_id}", use_container_width=True,
|
||||||
):
|
):
|
||||||
advance_to_stage(DEFAULT_DB, job_id=job_id, stage="survey")
|
advance_to_stage(get_db_path(), job_id=job_id, stage="survey")
|
||||||
st.rerun(scope="app")
|
st.rerun(scope="app")
|
||||||
if col_b.button("✗ Reject", key=f"rej_pre_{job_id}", use_container_width=True):
|
if col_b.button("✗ Reject", key=f"rej_pre_{job_id}", use_container_width=True):
|
||||||
reject_at_stage(DEFAULT_DB, job_id=job_id, rejection_stage=stage)
|
reject_at_stage(get_db_path(), job_id=job_id, rejection_stage=stage)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
|
|
||||||
@st.fragment
|
@st.fragment
|
||||||
def _hired_card_fragment(job_id: int) -> None:
|
def _hired_card_fragment(job_id: int) -> None:
|
||||||
"""Compact hired job card — shown in the Offer/Hired column."""
|
"""Compact hired job card — shown in the Offer/Hired column."""
|
||||||
job = get_job_by_id(DEFAULT_DB, job_id)
|
job = get_job_by_id(get_db_path(), job_id)
|
||||||
if job is None or job.get("status") != "hired":
|
if job is None or job.get("status") != "hired":
|
||||||
return
|
return
|
||||||
with st.container(border=True):
|
with st.container(border=True):
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,14 @@ from scripts.db import (
|
||||||
get_task_for_job,
|
get_task_for_job,
|
||||||
)
|
)
|
||||||
from scripts.task_runner import submit_task
|
from scripts.task_runner import submit_task
|
||||||
|
from app.cloud_session import resolve_session, get_db_path
|
||||||
|
|
||||||
init_db(DEFAULT_DB)
|
resolve_session("peregrine")
|
||||||
|
|
||||||
|
init_db(get_db_path())
|
||||||
|
|
||||||
# ── Job selection ─────────────────────────────────────────────────────────────
|
# ── Job selection ─────────────────────────────────────────────────────────────
|
||||||
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
|
jobs_by_stage = get_interview_jobs(get_db_path())
|
||||||
active_stages = ["phone_screen", "interviewing", "offer"]
|
active_stages = ["phone_screen", "interviewing", "offer"]
|
||||||
active_jobs = [
|
active_jobs = [
|
||||||
j for stage in active_stages
|
j for stage in active_stages
|
||||||
|
|
@ -100,10 +103,10 @@ col_prep, col_context = st.columns([2, 3])
|
||||||
# ════════════════════════════════════════════════
|
# ════════════════════════════════════════════════
|
||||||
with col_prep:
|
with col_prep:
|
||||||
|
|
||||||
research = get_research(DEFAULT_DB, job_id=selected_id)
|
research = get_research(get_db_path(), job_id=selected_id)
|
||||||
|
|
||||||
# Refresh / generate research
|
# Refresh / generate research
|
||||||
_res_task = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
|
_res_task = get_task_for_job(get_db_path(), "company_research", selected_id)
|
||||||
_res_running = _res_task and _res_task["status"] in ("queued", "running")
|
_res_running = _res_task and _res_task["status"] in ("queued", "running")
|
||||||
|
|
||||||
if not research:
|
if not research:
|
||||||
|
|
@ -112,13 +115,13 @@ with col_prep:
|
||||||
if _res_task and _res_task["status"] == "failed":
|
if _res_task and _res_task["status"] == "failed":
|
||||||
st.error(f"Last attempt failed: {_res_task.get('error', '')}")
|
st.error(f"Last attempt failed: {_res_task.get('error', '')}")
|
||||||
if st.button("🔬 Generate research brief", type="primary", use_container_width=True):
|
if st.button("🔬 Generate research brief", type="primary", use_container_width=True):
|
||||||
submit_task(DEFAULT_DB, "company_research", selected_id)
|
submit_task(get_db_path(), "company_research", selected_id)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
if _res_running:
|
if _res_running:
|
||||||
@st.fragment(run_every=3)
|
@st.fragment(run_every=3)
|
||||||
def _res_status_initial():
|
def _res_status_initial():
|
||||||
t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
|
t = get_task_for_job(get_db_path(), "company_research", selected_id)
|
||||||
if t and t["status"] in ("queued", "running"):
|
if t and t["status"] in ("queued", "running"):
|
||||||
stage = t.get("stage") or ""
|
stage = t.get("stage") or ""
|
||||||
lbl = "Queued…" if t["status"] == "queued" else (stage or "Generating… this may take 30–60 seconds")
|
lbl = "Queued…" if t["status"] == "queued" else (stage or "Generating… this may take 30–60 seconds")
|
||||||
|
|
@ -133,13 +136,13 @@ with col_prep:
|
||||||
col_ts, col_btn = st.columns([3, 1])
|
col_ts, col_btn = st.columns([3, 1])
|
||||||
col_ts.caption(f"Research generated: {generated_at}")
|
col_ts.caption(f"Research generated: {generated_at}")
|
||||||
if col_btn.button("🔄 Refresh", use_container_width=True, disabled=bool(_res_running)):
|
if col_btn.button("🔄 Refresh", use_container_width=True, disabled=bool(_res_running)):
|
||||||
submit_task(DEFAULT_DB, "company_research", selected_id)
|
submit_task(get_db_path(), "company_research", selected_id)
|
||||||
st.rerun()
|
st.rerun()
|
||||||
|
|
||||||
if _res_running:
|
if _res_running:
|
||||||
@st.fragment(run_every=3)
|
@st.fragment(run_every=3)
|
||||||
def _res_status_refresh():
|
def _res_status_refresh():
|
||||||
t = get_task_for_job(DEFAULT_DB, "company_research", selected_id)
|
t = get_task_for_job(get_db_path(), "company_research", selected_id)
|
||||||
if t and t["status"] in ("queued", "running"):
|
if t and t["status"] in ("queued", "running"):
|
||||||
stage = t.get("stage") or ""
|
stage = t.get("stage") or ""
|
||||||
lbl = "Queued…" if t["status"] == "queued" else (stage or "Refreshing research…")
|
lbl = "Queued…" if t["status"] == "queued" else (stage or "Refreshing research…")
|
||||||
|
|
@ -311,7 +314,7 @@ with col_context:
|
||||||
st.markdown(job.get("description") or "_No description saved for this listing._")
|
st.markdown(job.get("description") or "_No description saved for this listing._")
|
||||||
|
|
||||||
with tab_emails:
|
with tab_emails:
|
||||||
contacts = get_contacts(DEFAULT_DB, job_id=selected_id)
|
contacts = get_contacts(get_db_path(), job_id=selected_id)
|
||||||
if not contacts:
|
if not contacts:
|
||||||
st.info("No contacts logged yet. Use the Interviews page to log emails.")
|
st.info("No contacts logged yet. Use the Interviews page to log emails.")
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -22,10 +22,13 @@ from scripts.db import (
|
||||||
insert_survey_response, get_survey_responses,
|
insert_survey_response, get_survey_responses,
|
||||||
)
|
)
|
||||||
from scripts.llm_router import LLMRouter
|
from scripts.llm_router import LLMRouter
|
||||||
|
from app.cloud_session import resolve_session, get_db_path
|
||||||
|
|
||||||
|
resolve_session("peregrine")
|
||||||
|
|
||||||
st.title("📋 Survey Assistant")
|
st.title("📋 Survey Assistant")
|
||||||
|
|
||||||
init_db(DEFAULT_DB)
|
init_db(get_db_path())
|
||||||
|
|
||||||
|
|
||||||
# ── Vision service health check ────────────────────────────────────────────────
|
# ── Vision service health check ────────────────────────────────────────────────
|
||||||
|
|
@ -40,7 +43,7 @@ def _vision_available() -> bool:
|
||||||
vision_up = _vision_available()
|
vision_up = _vision_available()
|
||||||
|
|
||||||
# ── Job selector ───────────────────────────────────────────────────────────────
|
# ── Job selector ───────────────────────────────────────────────────────────────
|
||||||
jobs_by_stage = get_interview_jobs(DEFAULT_DB)
|
jobs_by_stage = get_interview_jobs(get_db_path())
|
||||||
survey_jobs = jobs_by_stage.get("survey", [])
|
survey_jobs = jobs_by_stage.get("survey", [])
|
||||||
other_jobs = (
|
other_jobs = (
|
||||||
jobs_by_stage.get("applied", []) +
|
jobs_by_stage.get("applied", []) +
|
||||||
|
|
@ -61,7 +64,7 @@ selected_job_id = st.selectbox(
|
||||||
format_func=lambda jid: job_labels[jid],
|
format_func=lambda jid: job_labels[jid],
|
||||||
index=0,
|
index=0,
|
||||||
)
|
)
|
||||||
selected_job = get_job_by_id(DEFAULT_DB, selected_job_id)
|
selected_job = get_job_by_id(get_db_path(), selected_job_id)
|
||||||
|
|
||||||
# ── LLM prompt builders ────────────────────────────────────────────────────────
|
# ── LLM prompt builders ────────────────────────────────────────────────────────
|
||||||
_SURVEY_SYSTEM = (
|
_SURVEY_SYSTEM = (
|
||||||
|
|
@ -236,7 +239,7 @@ with right_col:
|
||||||
image_path = str(img_file)
|
image_path = str(img_file)
|
||||||
|
|
||||||
insert_survey_response(
|
insert_survey_response(
|
||||||
DEFAULT_DB,
|
get_db_path(),
|
||||||
job_id=selected_job_id,
|
job_id=selected_job_id,
|
||||||
survey_name=survey_name,
|
survey_name=survey_name,
|
||||||
source=source,
|
source=source,
|
||||||
|
|
@ -256,7 +259,7 @@ with right_col:
|
||||||
# ── History ────────────────────────────────────────────────────────────────────
|
# ── History ────────────────────────────────────────────────────────────────────
|
||||||
st.divider()
|
st.divider()
|
||||||
st.subheader("📂 Response History")
|
st.subheader("📂 Response History")
|
||||||
history = get_survey_responses(DEFAULT_DB, job_id=selected_job_id)
|
history = get_survey_responses(get_db_path(), job_id=selected_job_id)
|
||||||
|
|
||||||
if not history:
|
if not history:
|
||||||
st.caption("No saved responses for this job yet.")
|
st.caption("No saved responses for this job yet.")
|
||||||
|
|
|
||||||
BIN
app/static/peregrine_logo.png
Normal file
BIN
app/static/peregrine_logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 298 KiB |
BIN
app/static/peregrine_logo_circle.png
Normal file
BIN
app/static/peregrine_logo_circle.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 276 KiB |
|
|
@ -1,7 +1,7 @@
|
||||||
"""
|
"""
|
||||||
Tier definitions and feature gates for Peregrine.
|
Tier definitions and feature gates for Peregrine.
|
||||||
|
|
||||||
Tiers: free < paid < premium
|
Tiers: free < paid < premium < ultra (ultra reserved; no Peregrine features use it yet)
|
||||||
FEATURES maps feature key → minimum tier required.
|
FEATURES maps feature key → minimum tier required.
|
||||||
Features not in FEATURES are available to all tiers (free).
|
Features not in FEATURES are available to all tiers (free).
|
||||||
|
|
||||||
|
|
@ -25,7 +25,11 @@ from __future__ import annotations
|
||||||
import os as _os
|
import os as _os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
TIERS = ["free", "paid", "premium"]
|
from circuitforge_core.tiers import (
|
||||||
|
can_use as _core_can_use,
|
||||||
|
TIERS,
|
||||||
|
tier_label as _core_tier_label,
|
||||||
|
)
|
||||||
|
|
||||||
# Maps feature key → minimum tier string required.
|
# Maps feature key → minimum tier string required.
|
||||||
# Features absent from this dict are free (available to all).
|
# Features absent from this dict are free (available to all).
|
||||||
|
|
@ -60,8 +64,8 @@ FEATURES: dict[str, str] = {
|
||||||
"apple_calendar_sync": "paid",
|
"apple_calendar_sync": "paid",
|
||||||
"slack_notifications": "paid",
|
"slack_notifications": "paid",
|
||||||
|
|
||||||
# Beta UI access — stays gated (access management, not compute)
|
# Beta UI access — open to all tiers (access management, not compute)
|
||||||
"vue_ui_beta": "paid",
|
"vue_ui_beta": "free",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Features that unlock when the user supplies any LLM backend (local or BYOK).
|
# Features that unlock when the user supplies any LLM backend (local or BYOK).
|
||||||
|
|
@ -132,25 +136,20 @@ def can_use(
|
||||||
Returns False for unknown/invalid tier strings.
|
Returns False for unknown/invalid tier strings.
|
||||||
"""
|
"""
|
||||||
effective_tier = demo_tier if (demo_tier is not None and _DEMO_MODE) else tier
|
effective_tier = demo_tier if (demo_tier is not None and _DEMO_MODE) else tier
|
||||||
required = FEATURES.get(feature)
|
# Pass Peregrine's BYOK_UNLOCKABLE via has_byok collapse — core's frozenset is empty
|
||||||
if required is None:
|
|
||||||
return True # not gated — available to all
|
|
||||||
if has_byok and feature in BYOK_UNLOCKABLE:
|
if has_byok and feature in BYOK_UNLOCKABLE:
|
||||||
return True
|
return True
|
||||||
try:
|
return _core_can_use(feature, effective_tier, _features=FEATURES)
|
||||||
return TIERS.index(effective_tier) >= TIERS.index(required)
|
|
||||||
except ValueError:
|
|
||||||
return False # invalid tier string
|
|
||||||
|
|
||||||
|
|
||||||
def tier_label(feature: str, has_byok: bool = False) -> str:
|
def tier_label(feature: str, has_byok: bool = False) -> str:
|
||||||
"""Return a display label for a locked feature, or '' if free/unlocked."""
|
"""Return a display label for a locked feature, or '' if free/unlocked."""
|
||||||
if has_byok and feature in BYOK_UNLOCKABLE:
|
if has_byok and feature in BYOK_UNLOCKABLE:
|
||||||
return ""
|
return ""
|
||||||
required = FEATURES.get(feature)
|
raw = _core_tier_label(feature, _features=FEATURES)
|
||||||
if required is None:
|
if not raw or raw == "free":
|
||||||
return ""
|
return ""
|
||||||
return "🔒 Paid" if required == "paid" else "⭐ Premium"
|
return "🔒 Paid" if raw == "paid" else "⭐ Premium"
|
||||||
|
|
||||||
|
|
||||||
def effective_tier(
|
def effective_tier(
|
||||||
|
|
|
||||||
|
|
@ -13,12 +13,15 @@
|
||||||
|
|
||||||
services:
|
services:
|
||||||
app:
|
app:
|
||||||
build: .
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: peregrine/Dockerfile.cfcore
|
||||||
container_name: peregrine-cloud
|
container_name: peregrine-cloud
|
||||||
ports:
|
ports:
|
||||||
- "8505:8501"
|
- "8505:8501"
|
||||||
volumes:
|
volumes:
|
||||||
- /devl/menagerie-data:/devl/menagerie-data # per-user data trees
|
- /devl/menagerie-data:/devl/menagerie-data # per-user data trees
|
||||||
|
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro # cloud-safe backends only (no claude_code/copilot/anthropic)
|
||||||
environment:
|
environment:
|
||||||
- CLOUD_MODE=true
|
- CLOUD_MODE=true
|
||||||
- CLOUD_DATA_ROOT=/devl/menagerie-data
|
- CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||||
|
|
@ -31,7 +34,10 @@ services:
|
||||||
- DOCS_DIR=/tmp/cloud-docs
|
- DOCS_DIR=/tmp/cloud-docs
|
||||||
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
|
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
|
||||||
- PYTHONUNBUFFERED=1
|
- PYTHONUNBUFFERED=1
|
||||||
|
- PEREGRINE_CADDY_PROXY=1
|
||||||
|
- CF_ORCH_URL=http://host.docker.internal:7700
|
||||||
- DEMO_MODE=false
|
- DEMO_MODE=false
|
||||||
|
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
|
||||||
depends_on:
|
depends_on:
|
||||||
searxng:
|
searxng:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -39,14 +45,48 @@ services:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: peregrine/Dockerfile.cfcore
|
||||||
|
command: >
|
||||||
|
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:8601:8601" # localhost-only — Caddy + avocet imitate tab
|
||||||
|
volumes:
|
||||||
|
- /devl/menagerie-data:/devl/menagerie-data
|
||||||
|
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro
|
||||||
|
environment:
|
||||||
|
- CLOUD_MODE=true
|
||||||
|
- CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||||
|
- STAGING_DB=/devl/menagerie-data/cloud-default.db
|
||||||
|
- DIRECTUS_JWT_SECRET=${DIRECTUS_JWT_SECRET}
|
||||||
|
- CF_SERVER_SECRET=${CF_SERVER_SECRET}
|
||||||
|
- PLATFORM_DB_URL=${PLATFORM_DB_URL}
|
||||||
|
- HEIMDALL_URL=${HEIMDALL_URL:-http://cf-license:8000}
|
||||||
|
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
|
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
|
||||||
|
- CF_ORCH_URL=http://host.docker.internal:7700
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
web:
|
web:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: docker/web/Dockerfile
|
dockerfile: docker/web/Dockerfile
|
||||||
|
args:
|
||||||
|
VITE_BASE_PATH: /peregrine/
|
||||||
ports:
|
ports:
|
||||||
- "8508:80"
|
- "8508:80"
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
# cf-orch-agent: not needed in cloud — a host-native agent already runs on :7701
|
||||||
|
# and is registered with the coordinator. app/api reach it via CF_ORCH_URL.
|
||||||
|
|
||||||
searxng:
|
searxng:
|
||||||
image: searxng/searxng:latest
|
image: searxng/searxng:latest
|
||||||
volumes:
|
volumes:
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,8 @@ services:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: docker/web/Dockerfile
|
dockerfile: docker/web/Dockerfile
|
||||||
|
args:
|
||||||
|
VITE_BASE_PATH: /peregrine/
|
||||||
ports:
|
ports:
|
||||||
- "8507:80"
|
- "8507:80"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
|
||||||
35
compose.test-cfcore.yml
Normal file
35
compose.test-cfcore.yml
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
# compose.test-cfcore.yml — single-user test instance for circuitforge-core integration
|
||||||
|
#
|
||||||
|
# Run from the PARENT directory of peregrine/ (the build context must include
|
||||||
|
# both peregrine/ and circuitforge-core/ as siblings):
|
||||||
|
#
|
||||||
|
# cd /devl (or /Library/Development/CircuitForge on dev)
|
||||||
|
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test up -d
|
||||||
|
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test logs -f
|
||||||
|
# docker compose -f peregrine/compose.test-cfcore.yml --project-name peregrine-test down
|
||||||
|
#
|
||||||
|
# UI: http://localhost:8516
|
||||||
|
# Purpose: smoke-test circuitforge-core shims (db, llm_router, tiers, task_scheduler)
|
||||||
|
# before promoting cfcore integration to the production cloud instance.
|
||||||
|
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: peregrine/Dockerfile.cfcore
|
||||||
|
container_name: peregrine-test-cfcore
|
||||||
|
ports:
|
||||||
|
- "8516:8501"
|
||||||
|
volumes:
|
||||||
|
- /devl/job-seeker:/devl/job-seeker
|
||||||
|
- /devl/job-seeker/config:/app/config
|
||||||
|
- /devl/job-seeker/config/llm.docker.yaml:/app/config/llm.yaml:ro
|
||||||
|
- /devl/job-seeker/config/user.docker.yaml:/app/config/user.yaml:ro
|
||||||
|
environment:
|
||||||
|
- STAGING_DB=/devl/job-seeker/staging.db
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
|
- STREAMLIT_SERVER_BASE_URL_PATH=
|
||||||
|
- CF_ORCH_URL=http://host.docker.internal:7700
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
restart: "no"
|
||||||
69
compose.yml
69
compose.yml
|
|
@ -1,9 +1,11 @@
|
||||||
# compose.yml — Peregrine by Circuit Forge LLC
|
# compose.yml — Peregrine by Circuit Forge LLC
|
||||||
# Profiles: remote | cpu | single-gpu | dual-gpu-ollama | dual-gpu-vllm | dual-gpu-mixed
|
# Profiles: remote | cpu | single-gpu | dual-gpu-ollama
|
||||||
services:
|
services:
|
||||||
|
|
||||||
app:
|
app:
|
||||||
build: .
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: peregrine/Dockerfile.cfcore
|
||||||
command: >
|
command: >
|
||||||
bash -c "streamlit run app/app.py
|
bash -c "streamlit run app/app.py
|
||||||
--server.port=8501
|
--server.port=8501
|
||||||
|
|
@ -33,6 +35,7 @@ services:
|
||||||
- FORGEJO_API_URL=${FORGEJO_API_URL:-}
|
- FORGEJO_API_URL=${FORGEJO_API_URL:-}
|
||||||
- PYTHONUNBUFFERED=1
|
- PYTHONUNBUFFERED=1
|
||||||
- PYTHONLOGGING=WARNING
|
- PYTHONLOGGING=WARNING
|
||||||
|
- PEREGRINE_CADDY_PROXY=1
|
||||||
depends_on:
|
depends_on:
|
||||||
searxng:
|
searxng:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -40,12 +43,38 @@ services:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
|
api:
|
||||||
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: peregrine/Dockerfile.cfcore
|
||||||
|
command: >
|
||||||
|
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
|
||||||
|
volumes:
|
||||||
|
- ./config:/app/config
|
||||||
|
- ./data:/app/data
|
||||||
|
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
||||||
|
environment:
|
||||||
|
- STAGING_DB=/app/data/staging.db
|
||||||
|
- DOCS_DIR=/docs
|
||||||
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||||
|
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
|
||||||
|
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
||||||
|
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
|
||||||
|
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
|
||||||
|
- CF_ORCH_URL=${CF_ORCH_URL:-http://host.docker.internal:7700}
|
||||||
|
- PYTHONUNBUFFERED=1
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
web:
|
web:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: docker/web/Dockerfile
|
dockerfile: docker/web/Dockerfile
|
||||||
ports:
|
ports:
|
||||||
- "${VUE_PORT:-8506}:80"
|
- "${VUE_PORT:-8506}:80"
|
||||||
|
depends_on:
|
||||||
|
- api
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
searxng:
|
searxng:
|
||||||
|
|
@ -101,21 +130,29 @@ services:
|
||||||
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
|
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
vllm:
|
cf-orch-agent:
|
||||||
image: vllm/vllm-openai:latest
|
build:
|
||||||
|
context: ..
|
||||||
|
dockerfile: peregrine/Dockerfile.cfcore
|
||||||
|
command: ["/bin/sh", "/app/docker/cf-orch-agent/start.sh"]
|
||||||
ports:
|
ports:
|
||||||
- "${VLLM_PORT:-8000}:8000"
|
- "${CF_ORCH_AGENT_PORT:-7701}:7701"
|
||||||
volumes:
|
environment:
|
||||||
- ${VLLM_MODELS_DIR:-~/models/vllm}:/models
|
- CF_ORCH_COORDINATOR_URL=${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700}
|
||||||
command: >
|
- CF_ORCH_NODE_ID=${CF_ORCH_NODE_ID:-peregrine}
|
||||||
--model /models/${VLLM_MODEL:-Ouro-1.4B}
|
- CF_ORCH_AGENT_PORT=${CF_ORCH_AGENT_PORT:-7701}
|
||||||
--trust-remote-code
|
- CF_ORCH_ADVERTISE_HOST=${CF_ORCH_ADVERTISE_HOST:-}
|
||||||
--max-model-len 4096
|
- PYTHONUNBUFFERED=1
|
||||||
--gpu-memory-utilization 0.75
|
extra_hosts:
|
||||||
--enforce-eager
|
- "host.docker.internal:host-gateway"
|
||||||
--max-num-seqs 8
|
deploy:
|
||||||
--cpu-offload-gb ${CPU_OFFLOAD_GB:-0}
|
resources:
|
||||||
profiles: [dual-gpu-vllm, dual-gpu-mixed]
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities: [gpu]
|
||||||
|
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
finetune:
|
finetune:
|
||||||
|
|
|
||||||
23
config/label_tool.yaml.example
Normal file
23
config/label_tool.yaml.example
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
# config/label_tool.yaml — Multi-account IMAP config for the email label tool
|
||||||
|
# Copy to config/label_tool.yaml and fill in your credentials.
|
||||||
|
# This file is gitignored.
|
||||||
|
|
||||||
|
accounts:
|
||||||
|
- name: "Gmail"
|
||||||
|
host: "imap.gmail.com"
|
||||||
|
port: 993
|
||||||
|
username: "you@gmail.com"
|
||||||
|
password: "your-app-password" # Use an App Password, not your login password
|
||||||
|
folder: "INBOX"
|
||||||
|
days_back: 90
|
||||||
|
|
||||||
|
- name: "Outlook"
|
||||||
|
host: "outlook.office365.com"
|
||||||
|
port: 993
|
||||||
|
username: "you@outlook.com"
|
||||||
|
password: "your-app-password"
|
||||||
|
folder: "INBOX"
|
||||||
|
days_back: 90
|
||||||
|
|
||||||
|
# Optional: limit emails fetched per account per run (0 = unlimited)
|
||||||
|
max_per_account: 500
|
||||||
72
config/llm.cloud.yaml
Normal file
72
config/llm.cloud.yaml
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
backends:
|
||||||
|
anthropic:
|
||||||
|
api_key_env: ANTHROPIC_API_KEY
|
||||||
|
enabled: false
|
||||||
|
model: claude-sonnet-4-6
|
||||||
|
supports_images: true
|
||||||
|
type: anthropic
|
||||||
|
claude_code:
|
||||||
|
api_key: any
|
||||||
|
base_url: http://localhost:3009/v1
|
||||||
|
enabled: false
|
||||||
|
model: claude-code-terminal
|
||||||
|
supports_images: true
|
||||||
|
type: openai_compat
|
||||||
|
github_copilot:
|
||||||
|
api_key: any
|
||||||
|
base_url: http://localhost:3010/v1
|
||||||
|
enabled: false
|
||||||
|
model: gpt-4o
|
||||||
|
supports_images: false
|
||||||
|
type: openai_compat
|
||||||
|
ollama:
|
||||||
|
api_key: ollama
|
||||||
|
base_url: http://host.docker.internal:11434/v1
|
||||||
|
enabled: true
|
||||||
|
model: llama3.1:8b # generic — no personal fine-tunes in cloud
|
||||||
|
supports_images: false
|
||||||
|
type: openai_compat
|
||||||
|
ollama_research:
|
||||||
|
api_key: ollama
|
||||||
|
base_url: http://host.docker.internal:11434/v1
|
||||||
|
enabled: true
|
||||||
|
model: llama3.1:8b
|
||||||
|
supports_images: false
|
||||||
|
type: openai_compat
|
||||||
|
vision_service:
|
||||||
|
base_url: http://host.docker.internal:8002
|
||||||
|
enabled: true
|
||||||
|
supports_images: true
|
||||||
|
type: vision_service
|
||||||
|
vllm:
|
||||||
|
api_key: ''
|
||||||
|
base_url: http://host.docker.internal:8000/v1
|
||||||
|
enabled: true
|
||||||
|
model: __auto__
|
||||||
|
supports_images: false
|
||||||
|
type: openai_compat
|
||||||
|
cf_orch:
|
||||||
|
service: vllm
|
||||||
|
model_candidates:
|
||||||
|
- Qwen2.5-3B-Instruct
|
||||||
|
ttl_s: 300
|
||||||
|
vllm_research:
|
||||||
|
api_key: ''
|
||||||
|
base_url: http://host.docker.internal:8000/v1
|
||||||
|
enabled: true
|
||||||
|
model: __auto__
|
||||||
|
supports_images: false
|
||||||
|
type: openai_compat
|
||||||
|
cf_orch:
|
||||||
|
service: vllm
|
||||||
|
model_candidates:
|
||||||
|
- Qwen2.5-3B-Instruct
|
||||||
|
ttl_s: 300
|
||||||
|
fallback_order:
|
||||||
|
- vllm
|
||||||
|
- ollama
|
||||||
|
research_fallback_order:
|
||||||
|
- vllm_research
|
||||||
|
- ollama_research
|
||||||
|
vision_fallback_order:
|
||||||
|
- vision_service
|
||||||
|
|
@ -1,4 +1,11 @@
|
||||||
backends:
|
backends:
|
||||||
|
cf_text:
|
||||||
|
api_key: any
|
||||||
|
base_url: http://host.docker.internal:8006/v1
|
||||||
|
enabled: true
|
||||||
|
model: cf-text
|
||||||
|
supports_images: false
|
||||||
|
type: openai_compat
|
||||||
anthropic:
|
anthropic:
|
||||||
api_key_env: ANTHROPIC_API_KEY
|
api_key_env: ANTHROPIC_API_KEY
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
@ -28,13 +35,13 @@ backends:
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
ollama_research:
|
ollama_research:
|
||||||
api_key: ollama
|
api_key: ollama
|
||||||
base_url: http://host.docker.internal:11434/v1
|
base_url: http://ollama_research:11434/v1
|
||||||
enabled: true
|
enabled: true
|
||||||
model: llama3.2:3b
|
model: llama3.1:8b
|
||||||
supports_images: false
|
supports_images: false
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
vision_service:
|
vision_service:
|
||||||
base_url: http://host.docker.internal:8002
|
base_url: http://vision:8002
|
||||||
enabled: true
|
enabled: true
|
||||||
supports_images: true
|
supports_images: true
|
||||||
type: vision_service
|
type: vision_service
|
||||||
|
|
@ -45,6 +52,11 @@ backends:
|
||||||
model: __auto__
|
model: __auto__
|
||||||
supports_images: false
|
supports_images: false
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
|
cf_orch:
|
||||||
|
service: vllm
|
||||||
|
model_candidates:
|
||||||
|
- Qwen2.5-3B-Instruct
|
||||||
|
ttl_s: 300
|
||||||
vllm_research:
|
vllm_research:
|
||||||
api_key: ''
|
api_key: ''
|
||||||
base_url: http://host.docker.internal:8000/v1
|
base_url: http://host.docker.internal:8000/v1
|
||||||
|
|
@ -53,6 +65,7 @@ backends:
|
||||||
supports_images: false
|
supports_images: false
|
||||||
type: openai_compat
|
type: openai_compat
|
||||||
fallback_order:
|
fallback_order:
|
||||||
|
- cf_text
|
||||||
- ollama
|
- ollama
|
||||||
- claude_code
|
- claude_code
|
||||||
- vllm
|
- vllm
|
||||||
|
|
@ -62,6 +75,7 @@ research_fallback_order:
|
||||||
- claude_code
|
- claude_code
|
||||||
- vllm_research
|
- vllm_research
|
||||||
- ollama_research
|
- ollama_research
|
||||||
|
- cf_text
|
||||||
- github_copilot
|
- github_copilot
|
||||||
- anthropic
|
- anthropic
|
||||||
vision_fallback_order:
|
vision_fallback_order:
|
||||||
|
|
|
||||||
258
config/mission_domains.yaml
Normal file
258
config/mission_domains.yaml
Normal file
|
|
@ -0,0 +1,258 @@
|
||||||
|
# Mission domain signal configuration for cover letter generation.
|
||||||
|
#
|
||||||
|
# When a job description or company name matches signals in a domain,
|
||||||
|
# the cover letter prompt injects a Para 3 hint to reflect genuine personal
|
||||||
|
# alignment. Dict order = match priority (first match wins).
|
||||||
|
#
|
||||||
|
# Users can add custom domains under `mission_preferences` in user.yaml.
|
||||||
|
# Any key in mission_preferences that is NOT listed here is treated as a
|
||||||
|
# user-defined domain: no signal detection, custom note only (skipped if
|
||||||
|
# the job description doesn't contain the key as a literal word).
|
||||||
|
#
|
||||||
|
# Schema per domain:
|
||||||
|
# signals: list[str] — lowercase keywords to scan for in "company + JD"
|
||||||
|
# default_note: str — hint injected when user has no custom note for domain
|
||||||
|
|
||||||
|
domains:
|
||||||
|
music:
|
||||||
|
signals:
|
||||||
|
- music
|
||||||
|
- spotify
|
||||||
|
- tidal
|
||||||
|
- soundcloud
|
||||||
|
- bandcamp
|
||||||
|
- apple music
|
||||||
|
- distrokid
|
||||||
|
- cd baby
|
||||||
|
- landr
|
||||||
|
- beatport
|
||||||
|
- reverb
|
||||||
|
- vinyl
|
||||||
|
- streaming
|
||||||
|
- artist
|
||||||
|
- label
|
||||||
|
- live nation
|
||||||
|
- ticketmaster
|
||||||
|
- aeg
|
||||||
|
- songkick
|
||||||
|
- concert
|
||||||
|
- venue
|
||||||
|
- festival
|
||||||
|
- audio
|
||||||
|
- podcast
|
||||||
|
- studio
|
||||||
|
- record
|
||||||
|
- musician
|
||||||
|
- playlist
|
||||||
|
default_note: >
|
||||||
|
This company is in the music industry — an industry the candidate finds genuinely
|
||||||
|
compelling. Para 3 should warmly and specifically reflect this authentic alignment,
|
||||||
|
not as a generic fan statement, but as an honest statement of where they'd love to
|
||||||
|
apply their skills.
|
||||||
|
|
||||||
|
animal_welfare:
|
||||||
|
signals:
|
||||||
|
- animal
|
||||||
|
- shelter
|
||||||
|
- rescue
|
||||||
|
- humane society
|
||||||
|
- spca
|
||||||
|
- aspca
|
||||||
|
- veterinary
|
||||||
|
- "vet "
|
||||||
|
- wildlife
|
||||||
|
- "pet "
|
||||||
|
- adoption
|
||||||
|
- foster
|
||||||
|
- dog
|
||||||
|
- cat
|
||||||
|
- feline
|
||||||
|
- canine
|
||||||
|
- sanctuary
|
||||||
|
- zoo
|
||||||
|
default_note: >
|
||||||
|
This organization works in animal welfare/rescue — a mission the candidate finds
|
||||||
|
genuinely meaningful. Para 3 should reflect this authentic connection warmly and
|
||||||
|
specifically, tying their skills to this mission.
|
||||||
|
|
||||||
|
education:
|
||||||
|
signals:
|
||||||
|
- education
|
||||||
|
- school
|
||||||
|
- learning
|
||||||
|
- student
|
||||||
|
- edtech
|
||||||
|
- classroom
|
||||||
|
- curriculum
|
||||||
|
- tutoring
|
||||||
|
- academic
|
||||||
|
- university
|
||||||
|
- kids
|
||||||
|
- children
|
||||||
|
- youth
|
||||||
|
- literacy
|
||||||
|
- khan academy
|
||||||
|
- duolingo
|
||||||
|
- chegg
|
||||||
|
- coursera
|
||||||
|
- instructure
|
||||||
|
- canvas lms
|
||||||
|
- clever
|
||||||
|
- district
|
||||||
|
- teacher
|
||||||
|
- k-12
|
||||||
|
- k12
|
||||||
|
- grade
|
||||||
|
- pedagogy
|
||||||
|
default_note: >
|
||||||
|
This company works in education or EdTech — a domain that resonates with the
|
||||||
|
candidate's values. Para 3 should reflect this authentic connection specifically
|
||||||
|
and warmly.
|
||||||
|
|
||||||
|
social_impact:
|
||||||
|
signals:
|
||||||
|
- nonprofit
|
||||||
|
- non-profit
|
||||||
|
- "501(c)"
|
||||||
|
- social impact
|
||||||
|
- mission-driven
|
||||||
|
- public benefit
|
||||||
|
- community
|
||||||
|
- underserved
|
||||||
|
- equity
|
||||||
|
- justice
|
||||||
|
- humanitarian
|
||||||
|
- advocacy
|
||||||
|
- charity
|
||||||
|
- foundation
|
||||||
|
- ngo
|
||||||
|
- social good
|
||||||
|
- civic
|
||||||
|
- public health
|
||||||
|
- mental health
|
||||||
|
- food security
|
||||||
|
- housing
|
||||||
|
- homelessness
|
||||||
|
- poverty
|
||||||
|
- workforce development
|
||||||
|
default_note: >
|
||||||
|
This organization is mission-driven / social impact focused — exactly the kind of
|
||||||
|
cause the candidate cares deeply about. Para 3 should warmly reflect their genuine
|
||||||
|
desire to apply their skills to work that makes a real difference in people's lives.
|
||||||
|
|
||||||
|
# Health listed last — genuine but lower-priority connection.
|
||||||
|
health:
|
||||||
|
signals:
|
||||||
|
- patient
|
||||||
|
- patients
|
||||||
|
- healthcare
|
||||||
|
- health tech
|
||||||
|
- healthtech
|
||||||
|
- pharma
|
||||||
|
- pharmaceutical
|
||||||
|
- clinical
|
||||||
|
- medical
|
||||||
|
- hospital
|
||||||
|
- clinic
|
||||||
|
- therapy
|
||||||
|
- therapist
|
||||||
|
- rare disease
|
||||||
|
- life sciences
|
||||||
|
- life science
|
||||||
|
- treatment
|
||||||
|
- prescription
|
||||||
|
- biotech
|
||||||
|
- biopharma
|
||||||
|
- medtech
|
||||||
|
- behavioral health
|
||||||
|
- population health
|
||||||
|
- care management
|
||||||
|
- care coordination
|
||||||
|
- oncology
|
||||||
|
- specialty pharmacy
|
||||||
|
- provider network
|
||||||
|
- payer
|
||||||
|
- health plan
|
||||||
|
- benefits administration
|
||||||
|
- ehr
|
||||||
|
- emr
|
||||||
|
- fhir
|
||||||
|
- hipaa
|
||||||
|
default_note: >
|
||||||
|
This company works in healthcare, life sciences, or patient care.
|
||||||
|
Do NOT write about the candidate's passion for pharmaceuticals or healthcare as an
|
||||||
|
industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies
|
||||||
|
exist to serve: those navigating complex, often invisible, or unusual health journeys;
|
||||||
|
patients facing rare or poorly understood conditions; individuals whose situations don't
|
||||||
|
fit a clean category. The connection is to the humans behind the data, not the industry.
|
||||||
|
If the user has provided a personal note, use that to anchor Para 3 specifically.
|
||||||
|
|
||||||
|
# Extended domains — added 2026-04-12
|
||||||
|
|
||||||
|
privacy:
|
||||||
|
signals:
|
||||||
|
- privacy
|
||||||
|
- data rights
|
||||||
|
- surveillance
|
||||||
|
- gdpr
|
||||||
|
- ccpa
|
||||||
|
- anonymity
|
||||||
|
- end-to-end encryption
|
||||||
|
- open source
|
||||||
|
- decentralized
|
||||||
|
- self-hosted
|
||||||
|
- zero knowledge
|
||||||
|
- data sovereignty
|
||||||
|
- digital rights
|
||||||
|
- eff
|
||||||
|
- electronic frontier
|
||||||
|
default_note: >
|
||||||
|
This company operates in the privacy, data rights, or digital rights space —
|
||||||
|
a domain the candidate genuinely cares about. Para 3 should reflect their
|
||||||
|
authentic belief in user autonomy and data sovereignty, not as abstract principle
|
||||||
|
but as something that shapes how they approach their work.
|
||||||
|
|
||||||
|
accessibility:
|
||||||
|
signals:
|
||||||
|
- accessibility
|
||||||
|
- assistive technology
|
||||||
|
- a11y
|
||||||
|
- wcag
|
||||||
|
- screen reader
|
||||||
|
- adaptive technology
|
||||||
|
- disability
|
||||||
|
- neurodivergent
|
||||||
|
- neurodiversity
|
||||||
|
- adhd
|
||||||
|
- autism
|
||||||
|
- inclusive design
|
||||||
|
- universal design
|
||||||
|
- accommodations
|
||||||
|
- ada compliance
|
||||||
|
default_note: >
|
||||||
|
This company works in accessibility or assistive technology — a mission the
|
||||||
|
candidate feels genuine, personal alignment with. Para 3 should reflect authentic
|
||||||
|
investment in building tools and systems that work for everyone, especially those
|
||||||
|
whose needs are most often overlooked in mainstream product development.
|
||||||
|
|
||||||
|
open_source:
|
||||||
|
signals:
|
||||||
|
- open source
|
||||||
|
- open-source
|
||||||
|
- linux foundation
|
||||||
|
- apache foundation
|
||||||
|
- free software
|
||||||
|
- gnu
|
||||||
|
- contributor
|
||||||
|
- maintainer
|
||||||
|
- upstream
|
||||||
|
- community-driven
|
||||||
|
- innersource
|
||||||
|
- copyleft
|
||||||
|
- mozilla
|
||||||
|
- wikimedia
|
||||||
|
default_note: >
|
||||||
|
This organization is rooted in open source culture — a community the candidate
|
||||||
|
actively participates in and believes in. Para 3 should reflect genuine investment
|
||||||
|
in the collaborative, transparent, and community-driven approach to building
|
||||||
|
software that lasts.
|
||||||
4177
dev-api.py
Normal file
4177
dev-api.py
Normal file
File diff suppressed because it is too large
Load diff
1
dev_api.py
Symbolic link
1
dev_api.py
Symbolic link
|
|
@ -0,0 +1 @@
|
||||||
|
dev-api.py
|
||||||
14
docker/cf-orch-agent/start.sh
Normal file
14
docker/cf-orch-agent/start.sh
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
#!/bin/sh
|
||||||
|
# Start the cf-orch agent. Adds --advertise-host only when CF_ORCH_ADVERTISE_HOST is set.
|
||||||
|
set -e
|
||||||
|
|
||||||
|
ARGS="--coordinator ${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700} \
|
||||||
|
--node-id ${CF_ORCH_NODE_ID:-peregrine} \
|
||||||
|
--host 0.0.0.0 \
|
||||||
|
--port ${CF_ORCH_AGENT_PORT:-7701}"
|
||||||
|
|
||||||
|
if [ -n "${CF_ORCH_ADVERTISE_HOST}" ]; then
|
||||||
|
ARGS="$ARGS --advertise-host ${CF_ORCH_ADVERTISE_HOST}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec cf-orch agent $ARGS
|
||||||
|
|
@ -4,6 +4,8 @@ WORKDIR /app
|
||||||
COPY web/package*.json ./
|
COPY web/package*.json ./
|
||||||
RUN npm ci --prefer-offline
|
RUN npm ci --prefer-offline
|
||||||
COPY web/ ./
|
COPY web/ ./
|
||||||
|
ARG VITE_BASE_PATH=/
|
||||||
|
ENV VITE_BASE_PATH=${VITE_BASE_PATH}
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# Stage 2: serve
|
# Stage 2: serve
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,18 @@ server {
|
||||||
listen 80;
|
listen 80;
|
||||||
server_name _;
|
server_name _;
|
||||||
|
|
||||||
|
client_max_body_size 20m;
|
||||||
|
|
||||||
root /usr/share/nginx/html;
|
root /usr/share/nginx/html;
|
||||||
index index.html;
|
index index.html;
|
||||||
|
|
||||||
# SPA fallback
|
# Proxy API calls to the FastAPI backend service
|
||||||
location / {
|
location /api/ {
|
||||||
try_files $uri $uri/ /index.html;
|
proxy_pass http://api:8601;
|
||||||
|
proxy_set_header Host $host;
|
||||||
|
proxy_set_header X-Real-IP $remote_addr;
|
||||||
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||||
|
proxy_read_timeout 120s;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Cache static assets
|
# Cache static assets
|
||||||
|
|
@ -15,4 +21,9 @@ server {
|
||||||
expires 1y;
|
expires 1y;
|
||||||
add_header Cache-Control "public, immutable";
|
add_header Cache-Control "public, immutable";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# SPA fallback — must come after API and assets
|
||||||
|
location / {
|
||||||
|
try_files $uri $uri/ /index.html;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -144,7 +144,7 @@ Shipped in v0.4.0. Ongoing maintenance and known decisions:
|
||||||
|
|
||||||
## Container Runtime
|
## Container Runtime
|
||||||
|
|
||||||
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
|
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `install.sh` detects existing Podman and skips Docker install.
|
||||||
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
|
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,23 @@ Before opening a pull request:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Database Migrations
|
||||||
|
|
||||||
|
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
|
||||||
|
|
||||||
|
### Adding a migration
|
||||||
|
|
||||||
|
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
|
||||||
|
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
|
||||||
|
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
|
||||||
|
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
|
||||||
|
|
||||||
|
### Rollbacks
|
||||||
|
|
||||||
|
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## What NOT to Do
|
## What NOT to Do
|
||||||
|
|
||||||
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored
|
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ This page walks through a full Peregrine installation from scratch.
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- **Git** — to clone the repository
|
- **Git** — to clone the repository
|
||||||
- **Internet connection** — `setup.sh` downloads Docker and other dependencies
|
- **Internet connection** — `install.sh` downloads Docker and other dependencies
|
||||||
- **Operating system**: Ubuntu/Debian, Fedora/RHEL, Arch Linux, or macOS (with Docker Desktop)
|
- **Operating system**: Ubuntu/Debian, Fedora/RHEL, Arch Linux, or macOS (with Docker Desktop)
|
||||||
|
|
||||||
!!! warning "Windows"
|
!!! warning "Windows"
|
||||||
|
|
@ -18,19 +18,19 @@ This page walks through a full Peregrine installation from scratch.
|
||||||
## Step 1 — Clone the repository
|
## Step 1 — Clone the repository
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://git.circuitforge.io/circuitforge/peregrine
|
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
|
||||||
cd peregrine
|
cd peregrine
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Step 2 — Run setup.sh
|
## Step 2 — Run install.sh
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash setup.sh
|
bash install.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
`setup.sh` performs the following automatically:
|
`install.sh` performs the following automatically:
|
||||||
|
|
||||||
1. **Detects your platform** (Ubuntu/Debian, Fedora/RHEL, Arch, macOS)
|
1. **Detects your platform** (Ubuntu/Debian, Fedora/RHEL, Arch, macOS)
|
||||||
2. **Installs Git** if not already present
|
2. **Installs Git** if not already present
|
||||||
|
|
@ -40,10 +40,10 @@ bash setup.sh
|
||||||
6. **Creates `.env` from `.env.example`** — edit `.env` to customise ports and model storage paths before starting
|
6. **Creates `.env` from `.env.example`** — edit `.env` to customise ports and model storage paths before starting
|
||||||
|
|
||||||
!!! note "macOS"
|
!!! note "macOS"
|
||||||
`setup.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script.
|
`install.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script.
|
||||||
|
|
||||||
!!! note "GPU requirement"
|
!!! note "GPU requirement"
|
||||||
For GPU support, `nvidia-smi` must return output before you run `setup.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present.
|
For GPU support, `nvidia-smi` must return output before you run `install.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -107,7 +107,7 @@ The first-run wizard launches automatically. See [First-Run Wizard](first-run-wi
|
||||||
Only NVIDIA GPUs are supported. AMD ROCm is not currently supported.
|
Only NVIDIA GPUs are supported. AMD ROCm is not currently supported.
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
- NVIDIA driver installed and `nvidia-smi` working before running `setup.sh`
|
- NVIDIA driver installed and `nvidia-smi` working before running `install.sh`
|
||||||
- CUDA 12.x recommended (CUDA 11.x may work but is untested)
|
- CUDA 12.x recommended (CUDA 11.x may work but is untested)
|
||||||
- Minimum 8 GB VRAM for `single-gpu` profile with default models
|
- Minimum 8 GB VRAM for `single-gpu` profile with default models
|
||||||
- For `dual-gpu`: GPU 0 is assigned to Ollama, GPU 1 to vLLM
|
- For `dual-gpu`: GPU 0 is assigned to Ollama, GPU 1 to vLLM
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
|
|
||||||
Peregrine automates the full job search lifecycle: discovery, matching, cover letter generation, application tracking, and interview preparation. It is privacy-first and local-first — your data never leaves your machine unless you configure an external integration.
|
Peregrine automates the full job search lifecycle: discovery, matching, cover letter generation, application tracking, and interview preparation. It is privacy-first and local-first — your data never leaves your machine unless you configure an external integration.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
@ -12,7 +14,7 @@ Peregrine automates the full job search lifecycle: discovery, matching, cover le
|
||||||
# 1. Clone and install dependencies
|
# 1. Clone and install dependencies
|
||||||
git clone https://git.circuitforge.io/circuitforge/peregrine
|
git clone https://git.circuitforge.io/circuitforge/peregrine
|
||||||
cd peregrine
|
cd peregrine
|
||||||
bash setup.sh
|
bash install.sh
|
||||||
|
|
||||||
# 2. Start Peregrine
|
# 2. Start Peregrine
|
||||||
make start # no GPU, API-only
|
make start # no GPU, API-only
|
||||||
|
|
|
||||||
1
docs/plausible.js
Normal file
1
docs/plausible.js
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
(function(){var s=document.createElement("script");s.defer=true;s.dataset.domain="docs.circuitforge.tech,circuitforge.tech";s.dataset.api="https://analytics.circuitforge.tech/api/event";s.src="https://analytics.circuitforge.tech/js/script.js";document.head.appendChild(s);})();
|
||||||
|
|
@ -337,7 +337,7 @@ webhook_url: "https://discord.com/api/webhooks/..."
|
||||||
|
|
||||||
## .env
|
## .env
|
||||||
|
|
||||||
Docker port and path overrides. Created from `.env.example` by `setup.sh`. Gitignored.
|
Docker port and path overrides. Created from `.env.example` by `install.sh`. Gitignored.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ports (change if defaults conflict with existing services)
|
# Ports (change if defaults conflict with existing services)
|
||||||
|
|
|
||||||
157
docs/reference/forgejo-feedback-schema.md
Normal file
157
docs/reference/forgejo-feedback-schema.md
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
# Forgejo Feedback API — Schema & Bug Bot Setup
|
||||||
|
|
||||||
|
## API Endpoints Used
|
||||||
|
|
||||||
|
| Operation | Method | Endpoint |
|
||||||
|
|-----------|--------|----------|
|
||||||
|
| List labels | GET | `/repos/{owner}/{repo}/labels` |
|
||||||
|
| Create label | POST | `/repos/{owner}/{repo}/labels` |
|
||||||
|
| Create issue | POST | `/repos/{owner}/{repo}/issues` |
|
||||||
|
| Upload attachment | POST | `/repos/{owner}/{repo}/issues/{index}/assets` |
|
||||||
|
| Post comment | POST | `/repos/{owner}/{repo}/issues/{index}/comments` |
|
||||||
|
|
||||||
|
Base URL: `https://git.opensourcesolarpunk.com/api/v1`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Issue Creation Payload
|
||||||
|
|
||||||
|
```json
|
||||||
|
POST /repos/{owner}/{repo}/issues
|
||||||
|
{
|
||||||
|
"title": "string",
|
||||||
|
"body": "markdown string",
|
||||||
|
"labels": [1, 2, 3] // array of label IDs (not names)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Response (201):
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"number": 42,
|
||||||
|
"html_url": "https://git.opensourcesolarpunk.com/pyr0ball/peregrine/issues/42"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Issue Body Structure
|
||||||
|
|
||||||
|
The `build_issue_body()` function produces this markdown layout:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## 🐛 Bug | ✨ Feature Request | 💬 Other
|
||||||
|
|
||||||
|
<user description>
|
||||||
|
|
||||||
|
### Reproduction Steps ← bug type only, when repro provided
|
||||||
|
|
||||||
|
<repro steps>
|
||||||
|
|
||||||
|
### Context
|
||||||
|
|
||||||
|
- **page:** Home
|
||||||
|
- **version:** v0.2.5-61-ga6d787f ← from `git describe`; "dev" inside Docker
|
||||||
|
- **tier:** free | paid | premium
|
||||||
|
- **llm_backend:** ollama | vllm | claude_code | ...
|
||||||
|
- **os:** Linux-6.8.0-65-generic-x86_64-with-glibc2.39
|
||||||
|
- **timestamp:** 2026-03-06T15:58:29Z
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>App Logs (last 100 lines)</summary>
|
||||||
|
|
||||||
|
```
|
||||||
|
... log content (PII masked) ...
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
### Recent Listings ← only when include_diag = True
|
||||||
|
|
||||||
|
- [Title @ Company](url)
|
||||||
|
|
||||||
|
---
|
||||||
|
*Submitted by: Name <email>* ← only when attribution consent checked
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Screenshot Attachment
|
||||||
|
|
||||||
|
Screenshots are uploaded as issue assets, then embedded inline via a follow-up comment:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
### Screenshot
|
||||||
|
|
||||||
|

|
||||||
|
```
|
||||||
|
|
||||||
|
This keeps the issue body clean and puts the screenshot in a distinct comment.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Labels
|
||||||
|
|
||||||
|
| Label | Color | Applied when |
|
||||||
|
|-------|-------|-------------|
|
||||||
|
| `beta-feedback` | `#0075ca` | Always |
|
||||||
|
| `needs-triage` | `#e4e669` | Always |
|
||||||
|
| `bug` | `#d73a4a` | Type = Bug |
|
||||||
|
| `feature-request` | `#a2eeef` | Type = Feature Request |
|
||||||
|
| `question` | `#d876e3` | Type = Other |
|
||||||
|
|
||||||
|
Labels are looked up by name on each submission; missing ones are auto-created via `_ensure_labels()`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Bug Bot Account Setup
|
||||||
|
|
||||||
|
The token currently bundled in `.env` is pyr0ball's personal token. For beta distribution,
|
||||||
|
create a dedicated bot account so the token has limited scope and can be rotated independently.
|
||||||
|
|
||||||
|
### Why a bot account?
|
||||||
|
|
||||||
|
- Token gets bundled in beta testers' `.env` — shouldn't be tied to the repo owner's account
|
||||||
|
- Bot can be limited to issue write only (cannot push code, see private repos, etc.)
|
||||||
|
- Token rotation doesn't affect the owner's other integrations
|
||||||
|
|
||||||
|
### Steps (requires Forgejo admin panel — API admin access not available on this token)
|
||||||
|
|
||||||
|
1. **Create bot account** at `https://git.opensourcesolarpunk.com/-/admin/users/new`
|
||||||
|
- Username: `peregrine-bot` (or `cf-bugbot`)
|
||||||
|
- Email: a real address you control (e.g. `bot+peregrine@circuitforge.tech`)
|
||||||
|
- Set a strong password (store in your password manager)
|
||||||
|
- Check "Prohibit login" if you want a pure API-only account
|
||||||
|
|
||||||
|
2. **Add as collaborator** on `pyr0ball/peregrine`:
|
||||||
|
- Settings → Collaborators → Add `peregrine-bot` with **Write** access
|
||||||
|
- Write access is required to create labels; issue creation alone would need only Read+Comment
|
||||||
|
|
||||||
|
3. **Generate API token** (log in as the bot, or use admin impersonation):
|
||||||
|
- User Settings → Applications → Generate New Token
|
||||||
|
- Name: `peregrine-feedback`
|
||||||
|
- Scopes: `issue` (write) — no repo code access needed
|
||||||
|
- Copy the token — it won't be shown again
|
||||||
|
|
||||||
|
4. **Update environment**:
|
||||||
|
```
|
||||||
|
FORGEJO_API_TOKEN=<new bot token>
|
||||||
|
FORGEJO_REPO=pyr0ball/peregrine
|
||||||
|
FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
||||||
|
```
|
||||||
|
Update both `.env` (dev machine) and any beta tester `.env` files.
|
||||||
|
|
||||||
|
5. **Verify** the bot can create issues:
|
||||||
|
```bash
|
||||||
|
curl -s -X POST https://git.opensourcesolarpunk.com/api/v1/repos/pyr0ball/peregrine/issues \
|
||||||
|
-H "Authorization: token <bot-token>" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"title":"[TEST] bot token check","body":"safe to close","labels":[]}'
|
||||||
|
```
|
||||||
|
Expected: HTTP 201 with `number` and `html_url` in response.
|
||||||
|
|
||||||
|
### Future: Heimdall token management
|
||||||
|
|
||||||
|
Once Heimdall is live, the bot token should be served by the license server rather than
|
||||||
|
bundled in `.env`. The app fetches it at startup using the user's license key → token is
|
||||||
|
never stored on disk and can be rotated server-side. Track as a future Heimdall feature.
|
||||||
BIN
docs/screenshots/01-dashboard.png
Normal file
BIN
docs/screenshots/01-dashboard.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 91 KiB |
BIN
docs/screenshots/02-review.png
Normal file
BIN
docs/screenshots/02-review.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 70 KiB |
BIN
docs/screenshots/03-apply.png
Normal file
BIN
docs/screenshots/03-apply.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 114 KiB |
File diff suppressed because it is too large
Load diff
|
|
@ -1,700 +0,0 @@
|
||||||
# Jobgether Integration Implementation Plan
|
|
||||||
|
|
||||||
> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
|
|
||||||
|
|
||||||
**Goal:** Filter Jobgether listings out of all other scrapers, add a dedicated Jobgether scraper and URL scraper (Playwright-based), and add recruiter-aware cover letter framing for Jobgether jobs.
|
|
||||||
|
|
||||||
**Architecture:** Blocklist config handles filtering with zero code changes. A new `_scrape_jobgether()` in `scrape_url.py` handles manual URL imports via Playwright with URL slug fallback. A new `scripts/custom_boards/jobgether.py` handles discovery. Cover letter framing is an `is_jobgether` flag threaded from `task_runner.py` → `generate()` → `build_prompt()`.
|
|
||||||
|
|
||||||
**Tech Stack:** Python, Playwright (already installed), SQLite, PyTest, YAML config
|
|
||||||
|
|
||||||
**Spec:** `/Library/Development/CircuitForge/peregrine/docs/superpowers/specs/2026-03-15-jobgether-integration-design.md`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Worktree Setup
|
|
||||||
|
|
||||||
- [ ] **Create worktree for this feature**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd /Library/Development/CircuitForge/peregrine
|
|
||||||
git worktree add .worktrees/jobgether-integration -b feature/jobgether-integration
|
|
||||||
```
|
|
||||||
|
|
||||||
All implementation work happens in `/Library/Development/CircuitForge/peregrine/.worktrees/jobgether-integration/`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Chunk 1: Blocklist filter + scrape_url.py
|
|
||||||
|
|
||||||
### Task 1: Add Jobgether to blocklist
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/config/blocklist.yaml`
|
|
||||||
|
|
||||||
- [ ] **Step 1: Edit blocklist.yaml**
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
companies:
|
|
||||||
- jobgether
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 2: Verify the existing `_is_blocklisted` test passes (or write one)**
|
|
||||||
|
|
||||||
Check `/Library/Development/CircuitForge/peregrine/tests/test_discover.py` for existing blocklist tests. If none cover company matching, add:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def test_is_blocklisted_jobgether():
|
|
||||||
from scripts.discover import _is_blocklisted
|
|
||||||
blocklist = {"companies": ["jobgether"], "industries": [], "locations": []}
|
|
||||||
assert _is_blocklisted({"company": "Jobgether", "location": "", "description": ""}, blocklist)
|
|
||||||
assert _is_blocklisted({"company": "jobgether inc", "location": "", "description": ""}, blocklist)
|
|
||||||
assert not _is_blocklisted({"company": "Acme Corp", "location": "", "description": ""}, blocklist)
|
|
||||||
```
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/test_discover.py -v -k "blocklist"`
|
|
||||||
Expected: PASS
|
|
||||||
|
|
||||||
- [ ] **Step 3: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add config/blocklist.yaml tests/test_discover.py
|
|
||||||
git commit -m "feat: filter Jobgether listings via blocklist"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Task 2: Add Jobgether detection to scrape_url.py
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/scrape_url.py`
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/tests/test_scrape_url.py`
|
|
||||||
|
|
||||||
- [ ] **Step 1: Write failing tests**
|
|
||||||
|
|
||||||
In `/Library/Development/CircuitForge/peregrine/tests/test_scrape_url.py`, add:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def test_detect_board_jobgether():
|
|
||||||
from scripts.scrape_url import _detect_board
|
|
||||||
assert _detect_board("https://jobgether.com/offer/69b42d9d24d79271ee0618e8-csm---resware") == "jobgether"
|
|
||||||
assert _detect_board("https://www.jobgether.com/offer/abc-role---company") == "jobgether"
|
|
||||||
|
|
||||||
|
|
||||||
def test_jobgether_slug_company_extraction():
|
|
||||||
from scripts.scrape_url import _company_from_jobgether_url
|
|
||||||
assert _company_from_jobgether_url(
|
|
||||||
"https://jobgether.com/offer/69b42d9d24d79271ee0618e8-customer-success-manager---resware"
|
|
||||||
) == "Resware"
|
|
||||||
assert _company_from_jobgether_url(
|
|
||||||
"https://jobgether.com/offer/abc123-director-of-cs---acme-corp"
|
|
||||||
) == "Acme Corp"
|
|
||||||
assert _company_from_jobgether_url(
|
|
||||||
"https://jobgether.com/offer/abc123-no-separator-here"
|
|
||||||
) == ""
|
|
||||||
|
|
||||||
|
|
||||||
def test_scrape_jobgether_no_playwright(tmp_path):
|
|
||||||
"""When Playwright is unavailable, _scrape_jobgether falls back to URL slug for company."""
|
|
||||||
# Patch playwright.sync_api to None in sys.modules so the local import inside
|
|
||||||
# _scrape_jobgether raises ImportError at call time (local imports run at call time,
|
|
||||||
# not at module load time — so no reload needed).
|
|
||||||
import sys
|
|
||||||
import unittest.mock as mock
|
|
||||||
|
|
||||||
url = "https://jobgether.com/offer/69b42d9d24d79271ee0618e8-customer-success-manager---resware"
|
|
||||||
with mock.patch.dict(sys.modules, {"playwright": None, "playwright.sync_api": None}):
|
|
||||||
from scripts.scrape_url import _scrape_jobgether
|
|
||||||
result = _scrape_jobgether(url)
|
|
||||||
|
|
||||||
assert result.get("company") == "Resware"
|
|
||||||
assert result.get("source") == "jobgether"
|
|
||||||
```
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/test_scrape_url.py::test_detect_board_jobgether tests/test_scrape_url.py::test_jobgether_slug_company_extraction tests/test_scrape_url.py::test_scrape_jobgether_no_playwright -v`
|
|
||||||
Expected: FAIL (functions not yet defined)
|
|
||||||
|
|
||||||
- [ ] **Step 2: Add `_company_from_jobgether_url()` to scrape_url.py**
|
|
||||||
|
|
||||||
Add after the `_STRIP_PARAMS` block (around line 34):
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _company_from_jobgether_url(url: str) -> str:
|
|
||||||
"""Extract company name from Jobgether offer URL slug.
|
|
||||||
|
|
||||||
Slug format: /offer/{24-hex-hash}-{title-slug}---{company-slug}
|
|
||||||
Triple-dash separator delimits title from company.
|
|
||||||
Returns title-cased company name, or "" if pattern not found.
|
|
||||||
"""
|
|
||||||
m = re.search(r"---([^/?]+)$", urlparse(url).path)
|
|
||||||
if not m:
|
|
||||||
print(f"[scrape_url] Jobgether URL slug: no company separator found in {url}")
|
|
||||||
return ""
|
|
||||||
return m.group(1).replace("-", " ").title()
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 3: Add `"jobgether"` branch to `_detect_board()`**
|
|
||||||
|
|
||||||
In `/Library/Development/CircuitForge/peregrine/scripts/scrape_url.py`, modify `_detect_board()` (add before `return "generic"`):
|
|
||||||
|
|
||||||
```python
|
|
||||||
if "jobgether.com" in url_lower:
|
|
||||||
return "jobgether"
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 4: Add `_scrape_jobgether()` function**
|
|
||||||
|
|
||||||
Add after `_scrape_glassdoor()` (around line 137):
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _scrape_jobgether(url: str) -> dict:
|
|
||||||
"""Scrape a Jobgether offer page using Playwright to bypass 403.
|
|
||||||
|
|
||||||
Falls back to URL slug for company name when Playwright is unavailable.
|
|
||||||
Does not use requests — no raise_for_status().
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
from playwright.sync_api import sync_playwright
|
|
||||||
except ImportError:
|
|
||||||
company = _company_from_jobgether_url(url)
|
|
||||||
if company:
|
|
||||||
print(f"[scrape_url] Jobgether: Playwright not installed, using slug fallback → {company}")
|
|
||||||
return {"company": company, "source": "jobgether"} if company else {}
|
|
||||||
|
|
||||||
try:
|
|
||||||
with sync_playwright() as p:
|
|
||||||
browser = p.chromium.launch(headless=True)
|
|
||||||
try:
|
|
||||||
ctx = browser.new_context(user_agent=_HEADERS["User-Agent"])
|
|
||||||
page = ctx.new_page()
|
|
||||||
page.goto(url, timeout=30_000)
|
|
||||||
page.wait_for_load_state("networkidle", timeout=20_000)
|
|
||||||
|
|
||||||
result = page.evaluate("""() => {
|
|
||||||
const title = document.querySelector('h1')?.textContent?.trim() || '';
|
|
||||||
const company = document.querySelector('[class*="company"], [class*="employer"], [data-testid*="company"]')
|
|
||||||
?.textContent?.trim() || '';
|
|
||||||
const location = document.querySelector('[class*="location"], [data-testid*="location"]')
|
|
||||||
?.textContent?.trim() || '';
|
|
||||||
const desc = document.querySelector('[class*="description"], [class*="job-desc"], article')
|
|
||||||
?.innerText?.trim() || '';
|
|
||||||
return { title, company, location, description: desc };
|
|
||||||
}""")
|
|
||||||
finally:
|
|
||||||
browser.close()
|
|
||||||
|
|
||||||
# Fall back to slug for company if DOM extraction missed it
|
|
||||||
if not result.get("company"):
|
|
||||||
result["company"] = _company_from_jobgether_url(url)
|
|
||||||
|
|
||||||
result["source"] = "jobgether"
|
|
||||||
return {k: v for k, v in result.items() if v}
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
print(f"[scrape_url] Jobgether Playwright error for {url}: {exc}")
|
|
||||||
# Last resort: slug fallback
|
|
||||||
company = _company_from_jobgether_url(url)
|
|
||||||
return {"company": company, "source": "jobgether"} if company else {}
|
|
||||||
```
|
|
||||||
|
|
||||||
> ⚠️ **The CSS selectors in the `page.evaluate()` call are placeholders.** Before committing, inspect `https://jobgether.com/offer/` in a browser to find the actual class names for title, company, location, and description. Update the selectors accordingly.
|
|
||||||
|
|
||||||
- [ ] **Step 5: Add dispatch branch in `scrape_job_url()`**
|
|
||||||
|
|
||||||
In the `if board == "linkedin":` dispatch chain (around line 208), add before the `else`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
elif board == "jobgether":
|
|
||||||
fields = _scrape_jobgether(url)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 6: Run tests to verify they pass**
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/test_scrape_url.py -v`
|
|
||||||
Expected: All PASS (including pre-existing tests)
|
|
||||||
|
|
||||||
- [ ] **Step 7: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add scripts/scrape_url.py tests/test_scrape_url.py
|
|
||||||
git commit -m "feat: add Jobgether URL detection and scraper to scrape_url.py"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Chunk 2: Jobgether custom board scraper
|
|
||||||
|
|
||||||
> ⚠️ **Pre-condition:** Before writing the scraper, inspect `https://jobgether.com/remote-jobs` live to determine the actual URL/filter param format and DOM card selectors. Use the Playwright MCP browser tool or Chrome devtools. Record: (1) the query param for job title search, (2) the job card CSS selectors for title, company, URL, location, salary.
|
|
||||||
|
|
||||||
### Task 3: Inspect Jobgether search live
|
|
||||||
|
|
||||||
**Files:** None (research step)
|
|
||||||
|
|
||||||
- [ ] **Step 1: Navigate to Jobgether remote jobs and inspect search params**
|
|
||||||
|
|
||||||
Using browser devtools or Playwright network capture, navigate to `https://jobgether.com/remote-jobs`, search for "Customer Success Manager", and capture:
|
|
||||||
- The resulting URL (query params)
|
|
||||||
- Network requests (XHR/fetch) if the page uses API calls
|
|
||||||
- CSS selectors for job card elements
|
|
||||||
|
|
||||||
Record findings here before proceeding.
|
|
||||||
|
|
||||||
- [ ] **Step 2: Test a Playwright page.evaluate() extraction manually**
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Run interactively to validate selectors
|
|
||||||
from playwright.sync_api import sync_playwright
|
|
||||||
with sync_playwright() as p:
|
|
||||||
browser = p.chromium.launch(headless=False) # headless=False to see the page
|
|
||||||
page = browser.new_page()
|
|
||||||
page.goto("https://jobgether.com/remote-jobs")
|
|
||||||
page.wait_for_load_state("networkidle")
|
|
||||||
# Test your selectors here
|
|
||||||
cards = page.query_selector_all("[YOUR_CARD_SELECTOR]")
|
|
||||||
print(len(cards))
|
|
||||||
browser.close()
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Task 4: Write jobgether.py scraper
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Create: `/Library/Development/CircuitForge/peregrine/scripts/custom_boards/jobgether.py`
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/tests/test_discover.py` (or create `tests/test_jobgether.py`)
|
|
||||||
|
|
||||||
- [ ] **Step 1: Write failing test**
|
|
||||||
|
|
||||||
In `/Library/Development/CircuitForge/peregrine/tests/test_discover.py` (or a new `tests/test_jobgether.py`):
|
|
||||||
|
|
||||||
```python
|
|
||||||
def test_jobgether_scraper_returns_empty_on_missing_playwright(monkeypatch):
|
|
||||||
"""Graceful fallback when Playwright is unavailable."""
|
|
||||||
import scripts.custom_boards.jobgether as jg
|
|
||||||
monkeypatch.setattr("scripts.custom_boards.jobgether.sync_playwright", None)
|
|
||||||
result = jg.scrape({"titles": ["Customer Success Manager"]}, "Remote", results_wanted=5)
|
|
||||||
assert result == []
|
|
||||||
|
|
||||||
|
|
||||||
def test_jobgether_scraper_respects_results_wanted(monkeypatch):
|
|
||||||
"""Scraper caps results at results_wanted."""
|
|
||||||
import scripts.custom_boards.jobgether as jg
|
|
||||||
|
|
||||||
fake_jobs = [
|
|
||||||
{"title": f"CSM {i}", "href": f"/offer/abc{i}-csm---acme", "company": f"Acme {i}",
|
|
||||||
"location": "Remote", "is_remote": True, "salary": ""}
|
|
||||||
for i in range(20)
|
|
||||||
]
|
|
||||||
|
|
||||||
class FakePage:
|
|
||||||
def goto(self, *a, **kw): pass
|
|
||||||
def wait_for_load_state(self, *a, **kw): pass
|
|
||||||
def evaluate(self, _): return fake_jobs
|
|
||||||
|
|
||||||
class FakeCtx:
|
|
||||||
def new_page(self): return FakePage()
|
|
||||||
|
|
||||||
class FakeBrowser:
|
|
||||||
def new_context(self, **kw): return FakeCtx()
|
|
||||||
def close(self): pass
|
|
||||||
|
|
||||||
class FakeChromium:
|
|
||||||
def launch(self, **kw): return FakeBrowser()
|
|
||||||
|
|
||||||
class FakeP:
|
|
||||||
chromium = FakeChromium()
|
|
||||||
def __enter__(self): return self
|
|
||||||
def __exit__(self, *a): pass
|
|
||||||
|
|
||||||
monkeypatch.setattr("scripts.custom_boards.jobgether.sync_playwright", lambda: FakeP())
|
|
||||||
result = jg.scrape({"titles": ["CSM"]}, "Remote", results_wanted=5)
|
|
||||||
assert len(result) <= 5
|
|
||||||
```
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/ -v -k "jobgether"`
|
|
||||||
Expected: FAIL (module not found)
|
|
||||||
|
|
||||||
- [ ] **Step 2: Create `scripts/custom_boards/jobgether.py`**
|
|
||||||
|
|
||||||
```python
|
|
||||||
"""Jobgether scraper — Playwright-based (requires chromium installed).
|
|
||||||
|
|
||||||
Jobgether (jobgether.com) is a remote-work job aggregator. It blocks plain
|
|
||||||
requests with 403, so we use Playwright to render the page and extract cards.
|
|
||||||
|
|
||||||
Install Playwright: conda run -n job-seeker pip install playwright &&
|
|
||||||
conda run -n job-seeker python -m playwright install chromium
|
|
||||||
|
|
||||||
Returns a list of dicts compatible with scripts.db.insert_job().
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
_BASE = "https://jobgether.com"
|
|
||||||
_SEARCH_PATH = "/remote-jobs"
|
|
||||||
|
|
||||||
# TODO: Replace with confirmed query param key after live inspection (Task 3)
|
|
||||||
_QUERY_PARAM = "search"
|
|
||||||
|
|
||||||
# Module-level import so tests can monkeypatch scripts.custom_boards.jobgether.sync_playwright
|
|
||||||
try:
|
|
||||||
from playwright.sync_api import sync_playwright
|
|
||||||
except ImportError:
|
|
||||||
sync_playwright = None
|
|
||||||
|
|
||||||
|
|
||||||
def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]:
|
|
||||||
"""
|
|
||||||
Scrape job listings from Jobgether using Playwright.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
profile: Search profile dict (uses 'titles').
|
|
||||||
location: Location string — Jobgether is remote-focused; location used
|
|
||||||
only if the site exposes a location filter.
|
|
||||||
results_wanted: Maximum results to return across all titles.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of job dicts with keys: title, company, url, source, location,
|
|
||||||
is_remote, salary, description.
|
|
||||||
"""
|
|
||||||
if sync_playwright is None:
|
|
||||||
print(
|
|
||||||
" [jobgether] playwright not installed.\n"
|
|
||||||
" Install: conda run -n job-seeker pip install playwright && "
|
|
||||||
"conda run -n job-seeker python -m playwright install chromium"
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
|
|
||||||
results: list[dict] = []
|
|
||||||
seen_urls: set[str] = set()
|
|
||||||
|
|
||||||
with sync_playwright() as p:
|
|
||||||
browser = p.chromium.launch(headless=True)
|
|
||||||
ctx = browser.new_context(
|
|
||||||
user_agent=(
|
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
|
||||||
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
page = ctx.new_page()
|
|
||||||
|
|
||||||
for title in profile.get("titles", []):
|
|
||||||
if len(results) >= results_wanted:
|
|
||||||
break
|
|
||||||
|
|
||||||
# TODO: Confirm URL param format from live inspection (Task 3)
|
|
||||||
url = f"{_BASE}{_SEARCH_PATH}?{_QUERY_PARAM}={title.replace(' ', '+')}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
page.goto(url, timeout=30_000)
|
|
||||||
page.wait_for_load_state("networkidle", timeout=20_000)
|
|
||||||
except Exception as exc:
|
|
||||||
print(f" [jobgether] Page load error for '{title}': {exc}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# TODO: Replace JS selector with confirmed card selector from Task 3
|
|
||||||
try:
|
|
||||||
raw_jobs: list[dict[str, Any]] = page.evaluate(_extract_jobs_js())
|
|
||||||
except Exception as exc:
|
|
||||||
print(f" [jobgether] JS extract error for '{title}': {exc}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not raw_jobs:
|
|
||||||
print(f" [jobgether] No cards found for '{title}' — selector may need updating")
|
|
||||||
continue
|
|
||||||
|
|
||||||
for job in raw_jobs:
|
|
||||||
href = job.get("href", "")
|
|
||||||
if not href:
|
|
||||||
continue
|
|
||||||
full_url = _BASE + href if href.startswith("/") else href
|
|
||||||
if full_url in seen_urls:
|
|
||||||
continue
|
|
||||||
seen_urls.add(full_url)
|
|
||||||
|
|
||||||
results.append({
|
|
||||||
"title": job.get("title", ""),
|
|
||||||
"company": job.get("company", ""),
|
|
||||||
"url": full_url,
|
|
||||||
"source": "jobgether",
|
|
||||||
"location": job.get("location") or "Remote",
|
|
||||||
"is_remote": True, # Jobgether is remote-focused
|
|
||||||
"salary": job.get("salary") or "",
|
|
||||||
"description": "", # not in card view; scrape_url fills in
|
|
||||||
})
|
|
||||||
|
|
||||||
if len(results) >= results_wanted:
|
|
||||||
break
|
|
||||||
|
|
||||||
time.sleep(1) # polite pacing between titles
|
|
||||||
|
|
||||||
browser.close()
|
|
||||||
|
|
||||||
return results[:results_wanted]
|
|
||||||
|
|
||||||
|
|
||||||
def _extract_jobs_js() -> str:
|
|
||||||
"""JS to run in page context — extracts job data from rendered card elements.
|
|
||||||
|
|
||||||
TODO: Replace selectors with confirmed values from Task 3 live inspection.
|
|
||||||
"""
|
|
||||||
return """() => {
|
|
||||||
// TODO: replace '[class*=job-card]' with confirmed card selector
|
|
||||||
const cards = document.querySelectorAll('[class*="job-card"], [data-testid*="job"]');
|
|
||||||
return Array.from(cards).map(card => {
|
|
||||||
// TODO: replace these selectors with confirmed values
|
|
||||||
const titleEl = card.querySelector('h2, h3, [class*="title"]');
|
|
||||||
const companyEl = card.querySelector('[class*="company"], [class*="employer"]');
|
|
||||||
const linkEl = card.querySelector('a');
|
|
||||||
const salaryEl = card.querySelector('[class*="salary"]');
|
|
||||||
const locationEl = card.querySelector('[class*="location"]');
|
|
||||||
return {
|
|
||||||
title: titleEl ? titleEl.textContent.trim() : null,
|
|
||||||
company: companyEl ? companyEl.textContent.trim() : null,
|
|
||||||
href: linkEl ? linkEl.getAttribute('href') : null,
|
|
||||||
salary: salaryEl ? salaryEl.textContent.trim() : null,
|
|
||||||
location: locationEl ? locationEl.textContent.trim() : null,
|
|
||||||
is_remote: true,
|
|
||||||
};
|
|
||||||
}).filter(j => j.title && j.href);
|
|
||||||
}"""
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 3: Run tests**
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/ -v -k "jobgether"`
|
|
||||||
Expected: PASS
|
|
||||||
|
|
||||||
- [ ] **Step 4: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add scripts/custom_boards/jobgether.py tests/test_discover.py
|
|
||||||
git commit -m "feat: add Jobgether custom board scraper (selectors pending live inspection)"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Chunk 3: Registration, config, cover letter framing
|
|
||||||
|
|
||||||
### Task 5: Register scraper in discover.py + update search_profiles.yaml
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/discover.py`
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/config/search_profiles.yaml`
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/config/search_profiles.yaml.example` (if it exists)
|
|
||||||
|
|
||||||
- [ ] **Step 1: Add import to discover.py import block (lines 20–22)**
|
|
||||||
|
|
||||||
`jobgether.py` absorbs the Playwright `ImportError` internally (module-level `try/except`), so it always imports successfully. Match the existing pattern exactly:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from scripts.custom_boards import jobgether as _jobgether
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 2: Add to CUSTOM_SCRAPERS dict literal (lines 30–34)**
|
|
||||||
|
|
||||||
```python
|
|
||||||
CUSTOM_SCRAPERS: dict[str, object] = {
|
|
||||||
"adzuna": _adzuna.scrape,
|
|
||||||
"theladders": _theladders.scrape,
|
|
||||||
"craigslist": _craigslist.scrape,
|
|
||||||
"jobgether": _jobgether.scrape,
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
When Playwright is absent, `_jobgether.scrape()` returns `[]` gracefully — no special guard needed in `discover.py`.
|
|
||||||
|
|
||||||
- [ ] **Step 3: Add `jobgether` to remote-eligible profiles in search_profiles.yaml**
|
|
||||||
|
|
||||||
Add `- jobgether` to the `custom_boards` list for every profile that has `Remote` in its `locations`. Based on the current file, that means: `cs_leadership`, `music_industry`, `animal_welfare`, `education`. Do NOT add it to `default` (locations: San Francisco CA only).
|
|
||||||
|
|
||||||
- [ ] **Step 4: Run discover tests**
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/test_discover.py -v`
|
|
||||||
Expected: All PASS
|
|
||||||
|
|
||||||
- [ ] **Step 5: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add scripts/discover.py config/search_profiles.yaml
|
|
||||||
git commit -m "feat: register Jobgether scraper and add to remote search profiles"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Task 6: Cover letter recruiter framing
|
|
||||||
|
|
||||||
**Files:**
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/generate_cover_letter.py`
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/scripts/task_runner.py`
|
|
||||||
- Modify: `/Library/Development/CircuitForge/peregrine/tests/test_match.py` or add `tests/test_cover_letter.py`
|
|
||||||
|
|
||||||
- [ ] **Step 1: Write failing test**
|
|
||||||
|
|
||||||
Create or add to `/Library/Development/CircuitForge/peregrine/tests/test_cover_letter.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def test_build_prompt_jobgether_framing_unknown_company():
|
|
||||||
from scripts.generate_cover_letter import build_prompt
|
|
||||||
prompt = build_prompt(
|
|
||||||
title="Customer Success Manager",
|
|
||||||
company="Jobgether",
|
|
||||||
description="CSM role at an undisclosed company.",
|
|
||||||
examples=[],
|
|
||||||
is_jobgether=True,
|
|
||||||
)
|
|
||||||
assert "Your client" in prompt
|
|
||||||
assert "recruiter" in prompt.lower() or "jobgether" in prompt.lower()
|
|
||||||
|
|
||||||
|
|
||||||
def test_build_prompt_jobgether_framing_known_company():
|
|
||||||
from scripts.generate_cover_letter import build_prompt
|
|
||||||
prompt = build_prompt(
|
|
||||||
title="Customer Success Manager",
|
|
||||||
company="Resware",
|
|
||||||
description="CSM role at Resware.",
|
|
||||||
examples=[],
|
|
||||||
is_jobgether=True,
|
|
||||||
)
|
|
||||||
assert "Your client at Resware" in prompt
|
|
||||||
|
|
||||||
|
|
||||||
def test_build_prompt_no_jobgether_framing_by_default():
|
|
||||||
from scripts.generate_cover_letter import build_prompt
|
|
||||||
prompt = build_prompt(
|
|
||||||
title="Customer Success Manager",
|
|
||||||
company="Acme Corp",
|
|
||||||
description="CSM role.",
|
|
||||||
examples=[],
|
|
||||||
)
|
|
||||||
assert "Your client" not in prompt
|
|
||||||
```
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/test_cover_letter.py -v`
|
|
||||||
Expected: FAIL
|
|
||||||
|
|
||||||
- [ ] **Step 2: Add `is_jobgether` to `build_prompt()` in generate_cover_letter.py**
|
|
||||||
|
|
||||||
Modify the `build_prompt()` signature (line 186):
|
|
||||||
|
|
||||||
```python
|
|
||||||
def build_prompt(
|
|
||||||
title: str,
|
|
||||||
company: str,
|
|
||||||
description: str,
|
|
||||||
examples: list[dict],
|
|
||||||
mission_hint: str | None = None,
|
|
||||||
is_jobgether: bool = False,
|
|
||||||
) -> str:
|
|
||||||
```
|
|
||||||
|
|
||||||
Add the recruiter hint block after the `mission_hint` block (after line 203):
|
|
||||||
|
|
||||||
```python
|
|
||||||
if is_jobgether:
|
|
||||||
if company and company.lower() != "jobgether":
|
|
||||||
recruiter_note = (
|
|
||||||
f"🤝 Recruiter context: This listing is posted by Jobgether on behalf of "
|
|
||||||
f"{company}. Address the cover letter to the Jobgether recruiter, not directly "
|
|
||||||
f"to the hiring company. Use framing like 'Your client at {company} will "
|
|
||||||
f"appreciate...' rather than addressing {company} directly. The role "
|
|
||||||
f"requirements are those of the actual employer."
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
recruiter_note = (
|
|
||||||
"🤝 Recruiter context: This listing is posted by Jobgether on behalf of an "
|
|
||||||
"undisclosed employer. Address the cover letter to the Jobgether recruiter. "
|
|
||||||
"Use framing like 'Your client will appreciate...' rather than addressing "
|
|
||||||
"the company directly."
|
|
||||||
)
|
|
||||||
parts.append(f"{recruiter_note}\n")
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 3: Add `is_jobgether` to `generate()` signature**
|
|
||||||
|
|
||||||
Modify `generate()` (line 233):
|
|
||||||
|
|
||||||
```python
|
|
||||||
def generate(
|
|
||||||
title: str,
|
|
||||||
company: str,
|
|
||||||
description: str = "",
|
|
||||||
previous_result: str = "",
|
|
||||||
feedback: str = "",
|
|
||||||
is_jobgether: bool = False,
|
|
||||||
_router=None,
|
|
||||||
) -> str:
|
|
||||||
```
|
|
||||||
|
|
||||||
Pass it through to `build_prompt()` (line 254):
|
|
||||||
|
|
||||||
```python
|
|
||||||
prompt = build_prompt(title, company, description, examples,
|
|
||||||
mission_hint=mission_hint, is_jobgether=is_jobgether)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 4: Pass `is_jobgether` from task_runner.py**
|
|
||||||
|
|
||||||
In `/Library/Development/CircuitForge/peregrine/scripts/task_runner.py`, modify the `generate()` call inside the `cover_letter` task block (`elif task_type == "cover_letter":` starts at line 152; the `generate()` call is at ~line 156):
|
|
||||||
|
|
||||||
```python
|
|
||||||
elif task_type == "cover_letter":
|
|
||||||
import json as _json
|
|
||||||
p = _json.loads(params or "{}")
|
|
||||||
from scripts.generate_cover_letter import generate
|
|
||||||
result = generate(
|
|
||||||
job.get("title", ""),
|
|
||||||
job.get("company", ""),
|
|
||||||
job.get("description", ""),
|
|
||||||
previous_result=p.get("previous_result", ""),
|
|
||||||
feedback=p.get("feedback", ""),
|
|
||||||
is_jobgether=job.get("source") == "jobgether",
|
|
||||||
)
|
|
||||||
update_cover_letter(db_path, job_id, result)
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Step 5: Run tests**
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/test_cover_letter.py -v`
|
|
||||||
Expected: All PASS
|
|
||||||
|
|
||||||
- [ ] **Step 6: Run full test suite**
|
|
||||||
|
|
||||||
Run: `conda run -n job-seeker python -m pytest tests/ -v`
|
|
||||||
Expected: All PASS
|
|
||||||
|
|
||||||
- [ ] **Step 7: Commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add scripts/generate_cover_letter.py scripts/task_runner.py tests/test_cover_letter.py
|
|
||||||
git commit -m "feat: add Jobgether recruiter framing to cover letter generation"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Final: Merge
|
|
||||||
|
|
||||||
- [ ] **Merge worktree branch to main**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cd /Library/Development/CircuitForge/peregrine
|
|
||||||
git merge feature/jobgether-integration
|
|
||||||
git worktree remove .worktrees/jobgether-integration
|
|
||||||
```
|
|
||||||
|
|
||||||
- [ ] **Push to remote**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git push origin main
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Manual verification after merge
|
|
||||||
|
|
||||||
1. Add the stuck Jobgether manual import (job 2286) — delete the old stuck row and re-add the URL via "Add Jobs by URL" in the Home page. Verify the scraper resolves company = "Resware".
|
|
||||||
2. Run a short discovery (`discover.py` with `results_per_board: 5`) and confirm no `company="Jobgether"` rows appear in `staging.db`.
|
|
||||||
3. Generate a cover letter for a Jobgether-sourced job and confirm recruiter framing appears.
|
|
||||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,477 +0,0 @@
|
||||||
# LLM Queue Optimizer — Design Spec
|
|
||||||
|
|
||||||
**Date:** 2026-03-14
|
|
||||||
**Branch:** `feature/llm-queue-optimizer`
|
|
||||||
**Closes:** [#2](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues/2)
|
|
||||||
**Author:** pyr0ball
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Problem
|
|
||||||
|
|
||||||
On single-GPU and CPU-only systems, the background task runner spawns a daemon thread for every task immediately on submission. When a user approves N jobs at once, N threads race to load their respective LLM models simultaneously, causing repeated model swaps and significant latency overhead.
|
|
||||||
|
|
||||||
The root issue is that `submit_task()` is a spawn-per-task model with no scheduling layer. SQLite's `background_tasks` table is a status log, not a consumed work queue.
|
|
||||||
|
|
||||||
Additionally, on restart all `queued` and `running` tasks are cleared to `failed` (inline SQL in `app.py`'s `_startup()`), discarding pending work that had not yet started executing.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Goals
|
|
||||||
|
|
||||||
- Eliminate unnecessary model switching by batching LLM tasks by type
|
|
||||||
- Allow concurrent model execution when VRAM permits multiple models simultaneously
|
|
||||||
- Preserve FIFO ordering within each task type
|
|
||||||
- Survive process restarts — `queued` tasks resume after restart; only `running` tasks (whose results are unknown) are reset to `failed`
|
|
||||||
- Apply to all tiers (no tier gating)
|
|
||||||
- Keep non-LLM tasks (discovery, email sync, scrape, enrich) unaffected — they continue to spawn free threads
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Non-Goals
|
|
||||||
|
|
||||||
- Changing the LLM router fallback chain
|
|
||||||
- Adding new task types
|
|
||||||
- Tier gating on the scheduler
|
|
||||||
- Persistent task history in memory
|
|
||||||
- Durability for non-LLM task types (discovery, email_sync, etc. — these do not survive restarts, same as current behavior)
|
|
||||||
- Dynamic VRAM tracking — `_available_vram` is read once at startup and not refreshed (see Known Limitations)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
### Task Classification
|
|
||||||
|
|
||||||
```python
|
|
||||||
LLM_TASK_TYPES = {"cover_letter", "company_research", "wizard_generate"}
|
|
||||||
```
|
|
||||||
|
|
||||||
The routing rule is: if `task_type in LLM_TASK_TYPES`, route through the scheduler. Everything else spawns a free thread unchanged from the current implementation. **Future task types default to bypass mode** unless explicitly added to `LLM_TASK_TYPES` — which is the safe default (bypass = current behavior).
|
|
||||||
|
|
||||||
`LLM_TASK_TYPES` is defined in `scripts/task_scheduler.py` and imported by `scripts/task_runner.py` for routing. This import direction (task_runner imports from task_scheduler) avoids circular imports because `task_scheduler.py` does **not** import from `task_runner.py`.
|
|
||||||
|
|
||||||
Current non-LLM types (all bypass scheduler): `discovery`, `email_sync`, `scrape_url`, `enrich_descriptions`, `enrich_craigslist`, `prepare_training`.
|
|
||||||
|
|
||||||
### Routing in `submit_task()` — No Circular Import
|
|
||||||
|
|
||||||
The routing split lives entirely in `submit_task()` in `task_runner.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def submit_task(db_path, task_type, job_id=None, params=None):
|
|
||||||
task_id, is_new = insert_task(db_path, task_type, job_id or 0, params=params)
|
|
||||||
if is_new:
|
|
||||||
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
|
|
||||||
if task_type in LLM_TASK_TYPES:
|
|
||||||
get_scheduler(db_path).enqueue(task_id, task_type, job_id or 0, params)
|
|
||||||
else:
|
|
||||||
t = threading.Thread(
|
|
||||||
target=_run_task,
|
|
||||||
args=(db_path, task_id, task_type, job_id or 0, params),
|
|
||||||
daemon=True,
|
|
||||||
)
|
|
||||||
t.start()
|
|
||||||
return task_id, is_new
|
|
||||||
```
|
|
||||||
|
|
||||||
`TaskScheduler.enqueue()` only handles LLM task types and never imports or calls `_run_task`. This eliminates any circular import between `task_runner` and `task_scheduler`.
|
|
||||||
|
|
||||||
### Component Overview
|
|
||||||
|
|
||||||
```
|
|
||||||
submit_task()
|
|
||||||
│
|
|
||||||
├── task_type in LLM_TASK_TYPES?
|
|
||||||
│ │ yes │ no
|
|
||||||
│ ▼ ▼
|
|
||||||
│ get_scheduler().enqueue() spawn free thread (unchanged)
|
|
||||||
│ │
|
|
||||||
│ ▼
|
|
||||||
│ per-type deque
|
|
||||||
│ │
|
|
||||||
│ ▼
|
|
||||||
│ Scheduler loop (daemon thread)
|
|
||||||
│ (wakes on enqueue or batch completion)
|
|
||||||
│ │
|
|
||||||
│ Sort eligible types by queue depth (desc)
|
|
||||||
│ │
|
|
||||||
│ For each type:
|
|
||||||
│ reserved_vram + budget[type] ≤ available_vram?
|
|
||||||
│ │ yes │ no
|
|
||||||
│ ▼ ▼
|
|
||||||
│ Start batch worker skip (wait for slot)
|
|
||||||
│ (serial: one task at a time)
|
|
||||||
│ │
|
|
||||||
│ Batch worker signals done → scheduler re-evaluates
|
|
||||||
```
|
|
||||||
|
|
||||||
### New File: `scripts/task_scheduler.py`
|
|
||||||
|
|
||||||
**State:**
|
|
||||||
|
|
||||||
| Attribute | Type | Purpose |
|
|
||||||
|---|---|---|
|
|
||||||
| `_queues` | `dict[str, deque[TaskSpec]]` | Per-type pending task deques |
|
|
||||||
| `_active` | `dict[str, Thread]` | Currently running batch worker per type |
|
|
||||||
| `_budgets` | `dict[str, float]` | VRAM budget per task type (GB). Loaded at construction by merging `DEFAULT_VRAM_BUDGETS` with `scheduler.vram_budgets` from `config/llm.yaml`. Config path derived from `db_path` (e.g. `db_path.parent.parent / "config/llm.yaml"`). Missing file or key → defaults used as-is. At construction, a warning is logged for any type in `LLM_TASK_TYPES` with no budget entry after the merge. |
|
|
||||||
| `_reserved_vram` | `float` | Sum of `_budgets` values for currently active type batches |
|
|
||||||
| `_available_vram` | `float` | Total VRAM from `get_gpus()` summed across all GPUs at construction; 999.0 on CPU-only systems. Static — not refreshed after startup (see Known Limitations). |
|
|
||||||
| `_max_queue_depth` | `int` | Max tasks per type queue before drops. From `scheduler.max_queue_depth` in config; default 500. |
|
|
||||||
| `_lock` | `threading.Lock` | Protects all mutable scheduler state |
|
|
||||||
| `_wake` | `threading.Event` | Pulsed on enqueue or batch completion |
|
|
||||||
| `_stop` | `threading.Event` | Set by `shutdown()` to terminate the loop |
|
|
||||||
|
|
||||||
**Default VRAM budgets (module-level constant):**
|
|
||||||
|
|
||||||
```python
|
|
||||||
DEFAULT_VRAM_BUDGETS: dict[str, float] = {
|
|
||||||
"cover_letter": 2.5, # alex-cover-writer:latest (~2GB GGUF + headroom)
|
|
||||||
"company_research": 5.0, # llama3.1:8b or vllm model
|
|
||||||
"wizard_generate": 2.5, # same model family as cover_letter
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
At construction, the scheduler validates that every type in `LLM_TASK_TYPES` has an entry
|
|
||||||
in the merged `_budgets`. If any type is missing, a warning is logged:
|
|
||||||
|
|
||||||
```
|
|
||||||
WARNING task_scheduler: No VRAM budget defined for LLM task type 'foo' — defaulting to 0.0 GB (unlimited concurrency for this type)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Scheduler loop:**
|
|
||||||
|
|
||||||
```python
|
|
||||||
while not _stop.is_set():
|
|
||||||
_wake.wait(timeout=30)
|
|
||||||
_wake.clear()
|
|
||||||
|
|
||||||
with _lock:
|
|
||||||
# Defense in depth: reap dead threads not yet cleaned by their finally block.
|
|
||||||
# In the normal path, a batch worker's finally block calls _active.pop() and
|
|
||||||
# decrements _reserved_vram BEFORE firing _wake — so by the time we scan here,
|
|
||||||
# the entry is already gone and there is no double-decrement risk.
|
|
||||||
# This reap only catches threads killed externally (daemon exit on shutdown).
|
|
||||||
for t, thread in list(_active.items()):
|
|
||||||
if not thread.is_alive():
|
|
||||||
_reserved_vram -= _budgets.get(t, 0)
|
|
||||||
del _active[t]
|
|
||||||
|
|
||||||
# Start new batches where VRAM allows
|
|
||||||
candidates = sorted(
|
|
||||||
[t for t in _queues if _queues[t] and t not in _active],
|
|
||||||
key=lambda t: len(_queues[t]),
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
for task_type in candidates:
|
|
||||||
budget = _budgets.get(task_type, 0)
|
|
||||||
if _reserved_vram + budget <= _available_vram:
|
|
||||||
thread = Thread(target=_batch_worker, args=(task_type,), daemon=True)
|
|
||||||
_active[task_type] = thread
|
|
||||||
_reserved_vram += budget
|
|
||||||
thread.start()
|
|
||||||
```
|
|
||||||
|
|
||||||
**Batch worker:**
|
|
||||||
|
|
||||||
The `finally` block is the single authoritative path for releasing `_reserved_vram` and
|
|
||||||
removing the entry from `_active`. Because `_active.pop` runs in `finally` before
|
|
||||||
`_wake.set()`, the scheduler loop's dead-thread scan will never find this entry —
|
|
||||||
no double-decrement is possible in the normal execution path.
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _batch_worker(task_type: str) -> None:
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
with _lock:
|
|
||||||
if not _queues[task_type]:
|
|
||||||
break
|
|
||||||
task = _queues[task_type].popleft()
|
|
||||||
_run_task(db_path, task.id, task_type, task.job_id, task.params)
|
|
||||||
finally:
|
|
||||||
with _lock:
|
|
||||||
_active.pop(task_type, None)
|
|
||||||
_reserved_vram -= _budgets.get(task_type, 0)
|
|
||||||
_wake.set()
|
|
||||||
```
|
|
||||||
|
|
||||||
`_run_task` here refers to `task_runner._run_task`, passed in as a callable at
|
|
||||||
construction (e.g. `self._run_task = run_task_fn`). The caller (`task_runner.py`)
|
|
||||||
passes `_run_task` when constructing the scheduler, avoiding any import of `task_runner`
|
|
||||||
from within `task_scheduler`.
|
|
||||||
|
|
||||||
**`enqueue()` method:**
|
|
||||||
|
|
||||||
`enqueue()` only accepts LLM task types. Non-LLM routing is handled in `submit_task()`
|
|
||||||
before `enqueue()` is called (see Routing section above).
|
|
||||||
|
|
||||||
```python
|
|
||||||
def enqueue(self, task_id: int, task_type: str, job_id: int, params: str | None) -> None:
|
|
||||||
with self._lock:
|
|
||||||
q = self._queues.setdefault(task_type, deque())
|
|
||||||
if len(q) >= self._max_queue_depth:
|
|
||||||
logger.warning(
|
|
||||||
"Queue depth limit reached for %s (max=%d) — task %d dropped",
|
|
||||||
task_type, self._max_queue_depth, task_id,
|
|
||||||
)
|
|
||||||
update_task_status(self._db_path, task_id, "failed",
|
|
||||||
error="Queue depth limit reached")
|
|
||||||
return
|
|
||||||
q.append(TaskSpec(task_id, job_id, params))
|
|
||||||
self._wake.set()
|
|
||||||
```
|
|
||||||
|
|
||||||
When a task is dropped at the depth limit, `update_task_status()` marks it `failed` in
|
|
||||||
SQLite immediately — the row inserted by `insert_task()` is never left as a permanent
|
|
||||||
ghost in `queued` state.
|
|
||||||
|
|
||||||
**Singleton access — thread-safe initialization:**
|
|
||||||
|
|
||||||
```python
|
|
||||||
_scheduler: TaskScheduler | None = None
|
|
||||||
_scheduler_lock = threading.Lock()
|
|
||||||
|
|
||||||
def get_scheduler(db_path: Path) -> TaskScheduler:
|
|
||||||
global _scheduler
|
|
||||||
if _scheduler is None: # fast path — avoids lock on steady state
|
|
||||||
with _scheduler_lock:
|
|
||||||
if _scheduler is None: # re-check under lock (double-checked locking)
|
|
||||||
_scheduler = TaskScheduler(db_path)
|
|
||||||
_scheduler.start()
|
|
||||||
return _scheduler
|
|
||||||
|
|
||||||
def reset_scheduler() -> None:
|
|
||||||
"""Tear down and clear singleton. Test teardown only."""
|
|
||||||
global _scheduler
|
|
||||||
with _scheduler_lock:
|
|
||||||
if _scheduler:
|
|
||||||
_scheduler.shutdown()
|
|
||||||
_scheduler = None
|
|
||||||
```
|
|
||||||
|
|
||||||
The safety guarantee comes from the **inner `with _scheduler_lock:` block and re-check**,
|
|
||||||
not from GIL atomicity. The outer `if _scheduler is None` is a performance optimization
|
|
||||||
(avoid acquiring the lock on every `submit_task()` call once the scheduler is running).
|
|
||||||
Two threads racing at startup will both pass the outer check, but only one will win the
|
|
||||||
inner lock and construct the scheduler; the other will see a non-None value on its
|
|
||||||
inner re-check and return the already-constructed instance.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Required Call Ordering in `app.py`
|
|
||||||
|
|
||||||
`reset_running_tasks()` **must complete before** `get_scheduler()` is ever called.
|
|
||||||
The scheduler's durability query reads `status='queued'` rows; if `reset_running_tasks()`
|
|
||||||
has not yet run, a row stuck in `status='running'` from a prior crash would be loaded
|
|
||||||
into the deque and re-executed, producing a duplicate result.
|
|
||||||
|
|
||||||
In practice, the first call to `get_scheduler()` is triggered by the `submit_task()` call
|
|
||||||
inside `_startup()`'s SearXNG auto-recovery block — not by a user action. The ordering
|
|
||||||
holds because `reset_running_tasks()` is called on an earlier line within the same
|
|
||||||
`_startup()` function body. **Do not reorder these calls.**
|
|
||||||
|
|
||||||
```python
|
|
||||||
@st.cache_resource
|
|
||||||
def _startup() -> None:
|
|
||||||
# Step 1: Reset interrupted tasks — MUST come first
|
|
||||||
from scripts.db import reset_running_tasks
|
|
||||||
reset_running_tasks(get_db_path())
|
|
||||||
|
|
||||||
# Step 2 (later in same function): SearXNG re-queue calls submit_task(),
|
|
||||||
# which triggers get_scheduler() for the first time. Ordering is guaranteed
|
|
||||||
# because _startup() runs synchronously and step 1 is already complete.
|
|
||||||
conn = sqlite3.connect(get_db_path())
|
|
||||||
# ... existing SearXNG re-queue logic using conn ...
|
|
||||||
conn.close()
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Changes to Existing Files
|
|
||||||
|
|
||||||
### `scripts/task_runner.py`
|
|
||||||
|
|
||||||
`submit_task()` gains routing logic; `_run_task` is passed to the scheduler at first call:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def submit_task(db_path, task_type, job_id=None, params=None):
|
|
||||||
task_id, is_new = insert_task(db_path, task_type, job_id or 0, params=params)
|
|
||||||
if is_new:
|
|
||||||
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
|
|
||||||
if task_type in LLM_TASK_TYPES:
|
|
||||||
get_scheduler(db_path, run_task_fn=_run_task).enqueue(
|
|
||||||
task_id, task_type, job_id or 0, params
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
t = threading.Thread(
|
|
||||||
target=_run_task,
|
|
||||||
args=(db_path, task_id, task_type, job_id or 0, params),
|
|
||||||
daemon=True,
|
|
||||||
)
|
|
||||||
t.start()
|
|
||||||
return task_id, is_new
|
|
||||||
```
|
|
||||||
|
|
||||||
`get_scheduler()` accepts `run_task_fn` only on first call (when constructing); subsequent
|
|
||||||
calls ignore it (singleton already initialized). `_run_task()` and all handler branches
|
|
||||||
remain unchanged.
|
|
||||||
|
|
||||||
### `scripts/db.py`
|
|
||||||
|
|
||||||
Add `reset_running_tasks()` alongside the existing `kill_stuck_tasks()`. Like
|
|
||||||
`kill_stuck_tasks()`, it uses a plain `sqlite3.connect()` — consistent with the
|
|
||||||
existing pattern in this file, and appropriate because this call happens before the
|
|
||||||
app's connection pooling is established:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def reset_running_tasks(db_path: Path = DEFAULT_DB) -> int:
|
|
||||||
"""On restart: mark in-flight tasks failed. Queued tasks survive for the scheduler."""
|
|
||||||
conn = sqlite3.connect(db_path)
|
|
||||||
count = conn.execute(
|
|
||||||
"UPDATE background_tasks SET status='failed', error='Interrupted by restart',"
|
|
||||||
" finished_at=datetime('now') WHERE status='running'"
|
|
||||||
).rowcount
|
|
||||||
conn.commit()
|
|
||||||
conn.close()
|
|
||||||
return count
|
|
||||||
```
|
|
||||||
|
|
||||||
### `app/app.py`
|
|
||||||
|
|
||||||
Inside `_startup()`, replace the inline SQL block that wipes both `queued` and `running`
|
|
||||||
rows with a call to `reset_running_tasks()`. The replacement must be the **first operation
|
|
||||||
in `_startup()`** — before the SearXNG re-queue logic that calls `submit_task()`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# REMOVE this block:
|
|
||||||
conn.execute(
|
|
||||||
"UPDATE background_tasks SET status='failed', error='Interrupted by server restart',"
|
|
||||||
" finished_at=datetime('now') WHERE status IN ('queued','running')"
|
|
||||||
)
|
|
||||||
|
|
||||||
# ADD at the top of _startup(), before any submit_task() calls:
|
|
||||||
from scripts.db import reset_running_tasks
|
|
||||||
reset_running_tasks(get_db_path())
|
|
||||||
```
|
|
||||||
|
|
||||||
The existing `conn` used for subsequent SearXNG logic is unaffected — `reset_running_tasks()`
|
|
||||||
opens and closes its own connection.
|
|
||||||
|
|
||||||
### `config/llm.yaml.example`
|
|
||||||
|
|
||||||
Add `scheduler:` section:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
scheduler:
|
|
||||||
vram_budgets:
|
|
||||||
cover_letter: 2.5 # alex-cover-writer:latest (~2GB GGUF + headroom)
|
|
||||||
company_research: 5.0 # llama3.1:8b or vllm model
|
|
||||||
wizard_generate: 2.5 # same model family as cover_letter
|
|
||||||
max_queue_depth: 500
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Model
|
|
||||||
|
|
||||||
No schema changes. The existing `background_tasks` table supports all scheduler needs:
|
|
||||||
|
|
||||||
| Column | Scheduler use |
|
|
||||||
|---|---|
|
|
||||||
| `task_type` | Queue routing — determines which deque receives the task |
|
|
||||||
| `status` | `queued` → in deque; `running` → batch worker executing; `completed`/`failed` → done |
|
|
||||||
| `created_at` | FIFO ordering within type (durability startup query sorts by this) |
|
|
||||||
| `params` | Passed through to `_run_task()` unchanged |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Durability
|
|
||||||
|
|
||||||
Scope: **LLM task types only** (`cover_letter`, `company_research`, `wizard_generate`).
|
|
||||||
Non-LLM tasks do not survive restarts, same as current behavior.
|
|
||||||
|
|
||||||
On construction, `TaskScheduler.__init__()` queries:
|
|
||||||
|
|
||||||
```sql
|
|
||||||
SELECT id, task_type, job_id, params
|
|
||||||
FROM background_tasks
|
|
||||||
WHERE status = 'queued'
|
|
||||||
AND task_type IN ('cover_letter', 'company_research', 'wizard_generate')
|
|
||||||
ORDER BY created_at ASC
|
|
||||||
```
|
|
||||||
|
|
||||||
Results are pushed onto their respective deques. This query runs inside `__init__` before
|
|
||||||
`start()` is called (before the scheduler loop thread exists), so there is no concurrency
|
|
||||||
concern with deque population.
|
|
||||||
|
|
||||||
`running` rows are reset to `failed` by `reset_running_tasks()` before `get_scheduler()`
|
|
||||||
is called — see Required Call Ordering above.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Known Limitations
|
|
||||||
|
|
||||||
**Static `_available_vram`:** Total GPU VRAM is read from `get_gpus()` once at scheduler
|
|
||||||
construction and never refreshed. Changes after startup — another process releasing VRAM,
|
|
||||||
a GPU going offline, Ollama unloading a model — are not reflected. The scheduler's
|
|
||||||
correctness depends on per-task VRAM budgets being conservative estimates of **peak model
|
|
||||||
footprint** (not free VRAM at a given moment). On a system where Ollama and vLLM share
|
|
||||||
the GPU, budgets should account for both models potentially resident simultaneously.
|
|
||||||
Dynamic VRAM polling is a future enhancement.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Memory Safety
|
|
||||||
|
|
||||||
- **`finally` block owns VRAM release** — batch worker always decrements `_reserved_vram`
|
|
||||||
and removes its `_active` entry before firing `_wake`, even on exception. The scheduler
|
|
||||||
loop's dead-thread scan is defense in depth for externally-killed daemons only; it cannot
|
|
||||||
double-decrement because `_active.pop` in `finally` runs first.
|
|
||||||
- **Max queue depth with DB cleanup** — `enqueue()` rejects tasks past `max_queue_depth`,
|
|
||||||
logs a warning, and immediately marks the dropped task `failed` in SQLite to prevent
|
|
||||||
permanent ghost rows in `queued` state.
|
|
||||||
- **No in-memory history** — deques hold only pending `TaskSpec` namedtuples. Completed
|
|
||||||
and failed state lives exclusively in SQLite. Memory footprint is `O(pending tasks)`.
|
|
||||||
- **Thread-safe singleton** — double-checked locking with `_scheduler_lock` prevents
|
|
||||||
double-construction. Safety comes from the inner lock + re-check; the outer `None`
|
|
||||||
check is a performance optimization only.
|
|
||||||
- **Missing budget warning** — any `LLM_TASK_TYPES` entry with no budget entry after
|
|
||||||
config merge logs a warning at construction; defaults to 0.0 GB (unlimited concurrency
|
|
||||||
for that type). This prevents silent incorrect scheduling for future task types.
|
|
||||||
- **`reset_scheduler()`** — explicit teardown for test isolation: sets `_stop`, joins
|
|
||||||
scheduler thread with timeout, clears module-level reference under `_scheduler_lock`.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Testing (`tests/test_task_scheduler.py`)
|
|
||||||
|
|
||||||
All tests mock `_run_task` to avoid real LLM calls. `reset_scheduler()` is called in
|
|
||||||
an `autouse` fixture for isolation between test cases.
|
|
||||||
|
|
||||||
| Test | What it verifies |
|
|
||||||
|---|---|
|
|
||||||
| `test_deepest_queue_wins_first_slot` | N cover_letter + M research enqueued (N > M); cover_letter batch starts first when `_available_vram` only fits one model budget, because it has the deeper queue |
|
|
||||||
| `test_fifo_within_type` | Arrival order preserved within a type batch |
|
|
||||||
| `test_concurrent_batches_when_vram_allows` | Two type batches start simultaneously when `_available_vram` fits both budgets combined |
|
|
||||||
| `test_new_tasks_picked_up_mid_batch` | Task enqueued via `enqueue()` while a batch is active is consumed by the running worker in the same batch |
|
|
||||||
| `test_worker_crash_releases_vram` | `_run_task` raises; `_reserved_vram` returns to 0; scheduler continues; no double-decrement |
|
|
||||||
| `test_non_llm_tasks_bypass_scheduler` | `discovery`, `email_sync` etc. spawn free threads via `submit_task()`; scheduler deques untouched |
|
|
||||||
| `test_durability_llm_tasks_on_startup` | DB has existing `queued` LLM-type rows; scheduler loads them into deques on construction |
|
|
||||||
| `test_durability_excludes_non_llm` | `queued` non-LLM rows in DB are not loaded into deques on startup |
|
|
||||||
| `test_running_rows_reset_before_scheduler` | `reset_running_tasks()` sets `running` → `failed`; `queued` rows untouched |
|
|
||||||
| `test_max_queue_depth_marks_failed` | Enqueue past limit logs warning, does not add to deque, and marks task `failed` in DB |
|
|
||||||
| `test_missing_budget_logs_warning` | Type in `LLM_TASK_TYPES` with no budget entry at construction logs a warning |
|
|
||||||
| `test_singleton_thread_safe` | Concurrent calls to `get_scheduler()` produce exactly one scheduler instance |
|
|
||||||
| `test_reset_scheduler_cleans_up` | `reset_scheduler()` stops loop thread; no lingering threads after call |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Files Touched
|
|
||||||
|
|
||||||
| File | Change |
|
|
||||||
|---|---|
|
|
||||||
| `scripts/task_scheduler.py` | **New** — ~180 lines |
|
|
||||||
| `scripts/task_runner.py` | `submit_task()` routing shim — ~12 lines changed |
|
|
||||||
| `scripts/db.py` | `reset_running_tasks()` added — ~10 lines |
|
|
||||||
| `app/app.py` | `_startup()`: inline SQL block → `reset_running_tasks()` call, placed first |
|
|
||||||
| `config/llm.yaml.example` | Add `scheduler:` section |
|
|
||||||
| `tests/test_task_scheduler.py` | **New** — ~240 lines |
|
|
||||||
|
|
@ -1,173 +0,0 @@
|
||||||
# Jobgether Integration Design
|
|
||||||
|
|
||||||
**Date:** 2026-03-15
|
|
||||||
**Status:** Approved
|
|
||||||
**Scope:** Peregrine — discovery pipeline + manual URL import
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Problem
|
|
||||||
|
|
||||||
Jobgether is a job aggregator that posts listings on LinkedIn and other boards with `company = "Jobgether"` rather than the actual employer. This causes two problems:
|
|
||||||
|
|
||||||
1. **Misleading listings** — Jobs appear to be at "Jobgether" rather than the real hiring company. Meg sees "Jobgether" as employer throughout the pipeline (Job Review, cover letters, company research).
|
|
||||||
2. **Broken manual import** — Direct `jobgether.com` URLs return HTTP 403 when scraped with plain `requests`, leaving jobs stuck as `title = "Importing…"`.
|
|
||||||
|
|
||||||
**Evidence from DB:** 29+ Jobgether-sourced LinkedIn listings with `company = "Jobgether"`. Actual employer is intentionally withheld by Jobgether's business model ("on behalf of a partner company").
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Decision: Option A — Filter + Dedicated Scraper
|
|
||||||
|
|
||||||
Drop Jobgether listings from other scrapers entirely and replace with a direct Jobgether scraper that retrieves accurate company names. Existing Jobgether-via-LinkedIn listings in the DB are left as-is for manual review/rejection.
|
|
||||||
|
|
||||||
**Why not Option B (follow-through):** LinkedIn→Jobgether→employer is a two-hop chain where the employer is deliberately hidden. Jobgether blocks `requests`. Not worth the complexity for unreliable data.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Components
|
|
||||||
|
|
||||||
### 1. Jobgether company filter — `config/blocklist.yaml`
|
|
||||||
|
|
||||||
Add `"jobgether"` to the `companies` list in `config/blocklist.yaml`. The existing `_is_blocklisted()` function in `discover.py` already performs a partial case-insensitive match on the company field and applies to all scrapers (JobSpy boards + all custom boards). No code change required.
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
companies:
|
|
||||||
- jobgether
|
|
||||||
```
|
|
||||||
|
|
||||||
This is the correct mechanism — it is user-visible, config-driven, and applies uniformly. Log output already reports blocklisted jobs per run.
|
|
||||||
|
|
||||||
### 2. URL handling in `scrape_url.py`
|
|
||||||
|
|
||||||
Three changes required:
|
|
||||||
|
|
||||||
**a) `_detect_board()`** — add `"jobgether"` branch returning `"jobgether"` when `"jobgether.com"` is in the URL. Must be added before the `return "generic"` fallback.
|
|
||||||
|
|
||||||
**b) dispatch block in `scrape_job_url()`** — add `elif board == "jobgether": fields = _scrape_jobgether(url)` to the `if/elif` chain (lines 208–215). Without this, the new `_detect_board()` branch silently falls through to `_scrape_generic()`.
|
|
||||||
|
|
||||||
**c) `_scrape_jobgether(url)`** — Playwright-based scraper to bypass 403. Extracts:
|
|
||||||
- `title` — job title from page heading
|
|
||||||
- `company` — actual employer name (visible on Jobgether offer pages)
|
|
||||||
- `location` — remote/location info
|
|
||||||
- `description` — full job description
|
|
||||||
- `source = "jobgether"`
|
|
||||||
|
|
||||||
Playwright errors (`playwright.sync_api.Error`, `TimeoutError`) are not subclasses of `requests.RequestException` but are caught by the existing broad `except Exception` handler in `scrape_job_url()` — no changes needed to the error handling block.
|
|
||||||
|
|
||||||
**URL slug fallback for company name (manual import path only):** Jobgether offer URLs follow the pattern:
|
|
||||||
```
|
|
||||||
https://jobgether.com/offer/{24-hex-hash}-{title-slug}---{company-slug}
|
|
||||||
```
|
|
||||||
When Playwright is unavailable, parse `company-slug` using:
|
|
||||||
```python
|
|
||||||
m = re.search(r'---([^/?]+)$', parsed_path)
|
|
||||||
company = m.group(1).replace("-", " ").title() if m else ""
|
|
||||||
```
|
|
||||||
Example: `/offer/69b42d9d24d79271ee0618e8-customer-success-manager---resware` → `"Resware"`.
|
|
||||||
|
|
||||||
This fallback is scoped to `_scrape_jobgether()` in `scrape_url.py` only; the discovery scraper always gets company name from the rendered DOM. `_scrape_jobgether()` does not make any `requests` calls — there is no `raise_for_status()` — so the `requests.RequestException` handler in `scrape_job_url()` is irrelevant to this path; only the broad `except Exception` applies.
|
|
||||||
|
|
||||||
**Pre-implementation checkpoint:** Confirm that Jobgether offer URLs have no tracking query params beyond UTM (already covered by `_STRIP_PARAMS`). No `canonicalize_url()` changes are expected but verify before implementation.
|
|
||||||
|
|
||||||
### 3. `scripts/custom_boards/jobgether.py`
|
|
||||||
|
|
||||||
Playwright-based search scraper following the same interface as `theladders.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
|
|
||||||
```
|
|
||||||
|
|
||||||
- Base URL: `https://jobgether.com/remote-jobs`
|
|
||||||
- Search strategy: iterate over `profile["titles"]`, apply search/filter params
|
|
||||||
- **Pre-condition — do not begin implementation of this file until live URL inspection is complete.** Use browser dev tools or a Playwright `page.on("request")` capture to determine the actual query parameter format for title/location filtering. Jobgether may use URL query params, path segments, or JS-driven state — this cannot be assumed from the URL alone.
|
|
||||||
- Extraction: job cards from rendered DOM (Playwright `page.evaluate()`)
|
|
||||||
- Returns standard job dicts: `title, company, url, source, location, is_remote, salary, description`
|
|
||||||
- `source = "jobgether"`
|
|
||||||
- Graceful `ImportError` handling if Playwright not installed (same pattern as `theladders.py`)
|
|
||||||
- Polite pacing: 1s sleep between title iterations
|
|
||||||
- Company name comes from DOM; URL slug parse is not needed in this path
|
|
||||||
|
|
||||||
### 4. Registration + config
|
|
||||||
|
|
||||||
**`discover.py` — import block (lines 20–22):**
|
|
||||||
```python
|
|
||||||
from scripts.custom_boards import jobgether as _jobgether
|
|
||||||
```
|
|
||||||
|
|
||||||
**`discover.py` — `CUSTOM_SCRAPERS` dict literal (lines 30–34):**
|
|
||||||
```python
|
|
||||||
CUSTOM_SCRAPERS: dict[str, object] = {
|
|
||||||
"adzuna": _adzuna.scrape,
|
|
||||||
"theladders": _theladders.scrape,
|
|
||||||
"craigslist": _craigslist.scrape,
|
|
||||||
"jobgether": _jobgether.scrape, # ← add this line
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**`config/search_profiles.yaml` (and `.example`):**
|
|
||||||
Add `jobgether` to `custom_boards` for any profile that includes `Remote` in its `locations` list. Jobgether is a remote-work-focused aggregator; adding it to location-specific non-remote profiles is not useful. Do not add a `custom_boards` key to profiles that don't already have one unless they are remote-eligible.
|
|
||||||
```yaml
|
|
||||||
custom_boards:
|
|
||||||
- jobgether
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Flow
|
|
||||||
|
|
||||||
```
|
|
||||||
discover.py
|
|
||||||
├── JobSpy boards → _is_blocklisted(company="jobgether") → drop → DB insert
|
|
||||||
├── custom: adzuna → _is_blocklisted(company="jobgether") → drop → DB insert
|
|
||||||
├── custom: theladders → _is_blocklisted(company="jobgether") → drop → DB insert
|
|
||||||
├── custom: craigslist → _is_blocklisted(company="jobgether") → drop → DB insert
|
|
||||||
└── custom: jobgether → (company = real employer, never "jobgether") → DB insert
|
|
||||||
|
|
||||||
scrape_url.py
|
|
||||||
└── jobgether.com URL → _detect_board() = "jobgether"
|
|
||||||
→ _scrape_jobgether()
|
|
||||||
├── Playwright available → full job fields from page
|
|
||||||
└── Playwright unavailable → company from URL slug only
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Implementation Notes
|
|
||||||
|
|
||||||
- **Slug fallback None-guard:** The regex `r'---([^/?]+)$'` returns a wrong value (not `None`) if the URL slug doesn't follow the expected format. Add a logged warning and return `""` rather than title-casing garbage.
|
|
||||||
- **Import guard in `discover.py`:** Wrap the `jobgether` import with `try/except ImportError`, setting `_jobgether = None`, and gate the `CUSTOM_SCRAPERS` registration with `if _jobgether is not None`. This ensures the graceful ImportError in `jobgether.py` (for missing Playwright) propagates cleanly to the caller rather than crashing discovery.
|
|
||||||
|
|
||||||
### 5. Cover letter recruiter framing — `scripts/generate_cover_letter.py`
|
|
||||||
|
|
||||||
When `source = "jobgether"`, inject a system hint that shifts the cover letter addressee from the employer to the Jobgether recruiter. Use Policy A: recruiter framing applies for all Jobgether-sourced jobs regardless of whether the real company name was resolved.
|
|
||||||
|
|
||||||
- If company is known (e.g. "Resware"): *"Your client at Resware will appreciate..."*
|
|
||||||
- If company is unknown: *"Your client will appreciate..."*
|
|
||||||
|
|
||||||
The real company name is always stored in the DB as resolved by the scraper — this is internal knowledge only. The framing shift is purely in the generated letter text, not in how the job is stored or displayed.
|
|
||||||
|
|
||||||
Implementation: add an `is_jobgether` flag to the cover letter prompt context (same pattern as `mission_hint` injection). Add a conditional block in the system prompt / Para 1 instructions when the flag is true.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Out of Scope
|
|
||||||
|
|
||||||
- Retroactively fixing existing `company = "Jobgether"` rows in the DB (left for manual review/rejection)
|
|
||||||
- Jobgether discovery scraper — **decided against during implementation (2026-03-15)**: Cloudflare Turnstile blocks all headless browsers on all Jobgether pages; `filter-api.jobgether.com` requires auth; `robots.txt` blocks all bots. The email digest → manual URL paste → slug company extraction flow covers the actual use case.
|
|
||||||
- Jobgether authentication / logged-in scraping
|
|
||||||
- Pagination
|
|
||||||
- Dedup between Jobgether and other boards (existing URL dedup handles this)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Files Changed
|
|
||||||
|
|
||||||
| File | Change |
|
|
||||||
|------|--------|
|
|
||||||
| `config/blocklist.yaml` | Add `"jobgether"` to `companies` list |
|
|
||||||
| `scripts/discover.py` | Add import + entry in `CUSTOM_SCRAPERS` dict literal |
|
|
||||||
| `scripts/scrape_url.py` | Add `_detect_board` branch, dispatch branch, `_scrape_jobgether()` |
|
|
||||||
| `scripts/custom_boards/jobgether.py` | New file — Playwright search scraper |
|
|
||||||
| `config/search_profiles.yaml` | Add `jobgether` to `custom_boards` |
|
|
||||||
| `config/search_profiles.yaml.example` | Same |
|
|
||||||
|
|
@ -1,258 +0,0 @@
|
||||||
# UI Switcher — Design Spec
|
|
||||||
**Date:** 2026-03-22
|
|
||||||
**Status:** Approved
|
|
||||||
**Scope:** Peregrine v0.7.0
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
Add a Reddit-style UI switcher that lets paid-tier users opt into the new Vue 3 SPA while the Streamlit UI remains the default. The Vue SPA ships merged into `main` (gated behind a paid-tier feature flag), served by a new nginx Docker service alongside Streamlit. The demo instance gets both the UI switcher (open to all visitors) and a simulated tier switcher so demo visitors can explore all feature tiers.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Decisions
|
|
||||||
|
|
||||||
| Question | Decision |
|
|
||||||
|---|---|
|
|
||||||
| Switcher placement | Banner (once per session, dismissible) + Settings → System toggle |
|
|
||||||
| Vue SPA serving | New `web` Docker service (nginx) in all three compose files |
|
|
||||||
| Preference persistence | JS cookie (`prgn_ui`) as Caddy routing signal; `user.yaml` as durability layer |
|
|
||||||
| Switching mechanism | JS cookie injection via `st.components.v1.html()` (Streamlit→Vue); client-side JS (Vue→Streamlit) |
|
|
||||||
| Tier gate | `vue_ui_beta: "paid"` in `tiers.py`; bypassed in `DEMO_MODE` |
|
|
||||||
| Branch strategy | Merge `feature-vue-spa` → `main` now; future Vue work uses `feature/vue-*` → `main` PRs |
|
|
||||||
| Demo UI switcher | Open to all demo visitors (no tier gate) |
|
|
||||||
| Demo tier switcher | Slim full-width toolbar above nav; cookie-based persistence (`prgn_demo_tier`) |
|
|
||||||
| Banner dismissal | Uses existing `dismissed_banners` list in `user.yaml` (key: `ui_switcher_beta`) |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Port Reference
|
|
||||||
|
|
||||||
| Compose file | Host port | Purpose |
|
|
||||||
|---|---|---|
|
|
||||||
| `compose.yml` | 8501 | Personal dev instance |
|
|
||||||
| `compose.demo.yml` | 8504 | Demo (`demo.circuitforge.tech`) |
|
|
||||||
| `compose.cloud.yml` | 8505 | Cloud managed (`menagerie.circuitforge.tech`) |
|
|
||||||
| `compose.yml` (web) | 8506 | Vue SPA — dev |
|
|
||||||
| `compose.demo.yml` (web) | 8507 | Vue SPA — demo |
|
|
||||||
| `compose.cloud.yml` (web) | 8508 | Vue SPA — cloud |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
Six additive components — nothing removed from the existing stack.
|
|
||||||
|
|
||||||
### 1. `web` Docker service
|
|
||||||
|
|
||||||
A minimal nginx container serving the Vue SPA `dist/` build. Added to `compose.yml`, `compose.demo.yml`, and `compose.cloud.yml`.
|
|
||||||
|
|
||||||
- `docker/web/Dockerfile` — `FROM nginx:alpine`, copies `nginx.conf`, copies `web/dist/` into `/usr/share/nginx/html/`
|
|
||||||
- `docker/web/nginx.conf` — standard SPA config with `try_files $uri /index.html` fallback
|
|
||||||
- Build step is image-baked (not a bind-mount): `docker compose build web` runs `vite build` in `web/` via a multi-stage Dockerfile, then copies the resulting `dist/` into the nginx image. This ensures a fresh clone + `manage.sh start` works without a separate manual build step.
|
|
||||||
- `manage.sh` updated: `build` target runs `docker compose build web app` so both are built together.
|
|
||||||
|
|
||||||
### 2. Caddy cookie routing
|
|
||||||
|
|
||||||
Caddy inspects the `prgn_ui` cookie on all Peregrine requests. Two vhost blocks require changes:
|
|
||||||
|
|
||||||
**`menagerie.circuitforge.tech` (cloud, port 8505/8508):**
|
|
||||||
```
|
|
||||||
handle /peregrine* {
|
|
||||||
@no_session not header Cookie *cf_session*
|
|
||||||
redir @no_session https://circuitforge.tech/login?next={uri} 302
|
|
||||||
|
|
||||||
@vue_ui header Cookie *prgn_ui=vue*
|
|
||||||
handle @vue_ui {
|
|
||||||
reverse_proxy http://host.docker.internal:8508
|
|
||||||
}
|
|
||||||
handle {
|
|
||||||
reverse_proxy http://host.docker.internal:8505
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**`demo.circuitforge.tech` (demo, port 8504/8507):**
|
|
||||||
```
|
|
||||||
handle /peregrine* {
|
|
||||||
@vue_ui header Cookie *prgn_ui=vue*
|
|
||||||
handle @vue_ui {
|
|
||||||
reverse_proxy http://host.docker.internal:8507
|
|
||||||
}
|
|
||||||
handle {
|
|
||||||
reverse_proxy http://host.docker.internal:8504
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Error handling: a `handle_errors { ... }` block on each vhost catches 502 from the Vue SPA service, redirects to the Streamlit upstream with `?ui_fallback=1`, and includes a `Set-Cookie: prgn_ui=streamlit; Path=/` response header to clear the routing cookie.
|
|
||||||
|
|
||||||
### 3. Streamlit switch mechanism
|
|
||||||
|
|
||||||
New module `app/components/ui_switcher.py`:
|
|
||||||
|
|
||||||
- `sync_ui_cookie()` — called **in the render pass** (after `pg.run()` in `app.py`), not inside the cached startup hook. Reads `user.yaml.ui_preference`; injects JS to set/clear `prgn_ui` cookie. Cookie/user.yaml conflict: **cookie wins** — if `prgn_ui` cookie is already present, writes user.yaml to match before re-injecting. If `DEMO_MODE`, skips tier check. If not `DEMO_MODE` and not `can_use("vue_ui_beta")`, resets preference to `streamlit` and clears cookie.
|
|
||||||
- `switch_ui(to: str)` — writes `user.yaml.ui_preference`, calls `sync_ui_cookie()`, then `st.rerun()`.
|
|
||||||
- `render_banner()` — dismissible banner shown to eligible users when `ui_switcher_beta` is not in `user_profile.dismissed_banners`. On dismiss: appends `ui_switcher_beta` to `dismissed_banners`, saves `user.yaml`. On "Try it": calls `switch_ui("vue")`. Also detects `?ui_fallback=1` in `st.query_params` and shows a toast ("New UI temporarily unavailable — switched back to Classic") then clears the param.
|
|
||||||
- `render_settings_toggle()` — toggle in Settings → System → Deployment expander. Calls `switch_ui()` on change.
|
|
||||||
|
|
||||||
### 4. Vue SPA switch-back
|
|
||||||
|
|
||||||
New `web/src/components/ClassicUIButton.vue`:
|
|
||||||
|
|
||||||
```js
|
|
||||||
function switchToClassic() {
|
|
||||||
document.cookie = 'prgn_ui=streamlit; path=/; SameSite=Lax';
|
|
||||||
const url = new URL(window.location.href);
|
|
||||||
url.searchParams.set('prgn_switch', 'streamlit');
|
|
||||||
window.location.href = url.toString();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Why the query param?** Streamlit cannot read HTTP cookies from Python — only client-side JS can. The `?prgn_switch=streamlit` param acts as a bridge: `sync_ui_cookie()` reads it via `st.query_params`, updates user.yaml to match, then clears the param. The cookie is set by the JS before the navigation so Caddy routes the request to Streamlit, and the param ensures user.yaml stays consistent with the cookie.
|
|
||||||
|
|
||||||
### 5. Tier gate
|
|
||||||
|
|
||||||
`app/wizard/tiers.py`:
|
|
||||||
|
|
||||||
```python
|
|
||||||
FEATURES: dict[str, str] = {
|
|
||||||
...
|
|
||||||
"vue_ui_beta": "paid", # add this
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Not in `BYOK_UNLOCKABLE` — the Vue UI has no LLM dependency; the gate is purely about beta access management.
|
|
||||||
|
|
||||||
`can_use()` signature change — keyword-only argument with a safe default:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def can_use(
|
|
||||||
tier: str,
|
|
||||||
feature: str,
|
|
||||||
has_byok: bool = False,
|
|
||||||
*,
|
|
||||||
demo_tier: str | None = None,
|
|
||||||
) -> bool:
|
|
||||||
effective_tier = demo_tier if (demo_tier and DEMO_MODE_FLAG) else tier
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
Argument order preserved from the existing implementation (`tier` first, `feature` second) — no existing call sites need updating. `DEMO_MODE_FLAG` is read from the environment, not from `st.session_state`, so this function is safe to call from background task threads and tests. `st.session_state.simulated_tier` is only read by the **caller** (`render_banner()`, `render_settings_toggle()`, page feature gates) which then passes it as `demo_tier=`.
|
|
||||||
|
|
||||||
### 6. Demo toolbar
|
|
||||||
|
|
||||||
New module `app/components/demo_toolbar.py`:
|
|
||||||
|
|
||||||
- `render_demo_toolbar()` — slim full-width bar rendered at the top of `app.py`'s render pass when `DEMO_MODE=true`. Shows `🎭 Demo mode · Free · Paid · Premium` pills with the active tier highlighted.
|
|
||||||
- `set_simulated_tier(tier: str)` — injects JS to set `prgn_demo_tier` cookie, updates `st.session_state.simulated_tier`, calls `st.rerun()`.
|
|
||||||
- Initialization: on each page load in demo mode, `app.py` reads `prgn_demo_tier` from `st.query_params` or the cookie (via a JS→hidden Streamlit input bridge, same pattern used by existing components) and sets `st.session_state.simulated_tier`. **Default if not set: `paid`** — shows the full feature set immediately on first demo load.
|
|
||||||
|
|
||||||
`useFeatureFlag.ts` (Vue SPA, `web/src/composables/`) is **demo-toolbar only** — it reads `prgn_demo_tier` cookie for the visual indicator in the Vue SPA's ClassicUIButton area. It is **not** an authoritative feature gate. All real feature gating in the Vue SPA will use a future `/api/features` endpoint (tracked under issue #8). This composable exists solely so the demo toolbar's simulated tier is visually consistent when the user has switched to the Vue SPA.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## File Changes
|
|
||||||
|
|
||||||
### New files
|
|
||||||
| File | Purpose |
|
|
||||||
|---|---|
|
|
||||||
| `app/components/ui_switcher.py` | `sync_ui_cookie`, `switch_ui`, `render_banner`, `render_settings_toggle` |
|
|
||||||
| `app/components/demo_toolbar.py` | `render_demo_toolbar`, `set_simulated_tier` |
|
|
||||||
| `docker/web/Dockerfile` | Multi-stage: `node` build stage → `nginx:alpine` serve stage |
|
|
||||||
| `docker/web/nginx.conf` | SPA-aware nginx config |
|
|
||||||
| `web/` | Vue SPA source (merged from `feature-vue-spa` worktree) |
|
|
||||||
| `web/src/components/ClassicUIButton.vue` | Switch-back button for Vue SPA nav |
|
|
||||||
| `web/src/composables/useFeatureFlag.ts` | Demo toolbar tier display (not a production gate) |
|
|
||||||
|
|
||||||
### Modified files
|
|
||||||
| File | Change |
|
|
||||||
|---|---|
|
|
||||||
| `app/app.py` | Call `sync_ui_cookie()` + `render_demo_toolbar()` + `render_banner()` in render pass |
|
|
||||||
| `app/wizard/tiers.py` | Add `vue_ui_beta: "paid"` to `FEATURES`; add `demo_tier` keyword arg to `can_use()` |
|
|
||||||
| `app/pages/2_Settings.py` | Add `render_settings_toggle()` in System → Deployment expander |
|
|
||||||
| `config/user.yaml.example` | Add `ui_preference: streamlit` |
|
|
||||||
| `scripts/user_profile.py` | Add `ui_preference` field to schema (default: `streamlit`) |
|
|
||||||
| `compose.yml` | Add `web` service (port 8506) |
|
|
||||||
| `compose.demo.yml` | Add `web` service (port 8507) |
|
|
||||||
| `compose.cloud.yml` | Add `web` service (port 8508) |
|
|
||||||
| `manage.sh` | `build` target includes `web` service |
|
|
||||||
| `/devl/caddy-proxy/Caddyfile` | Cookie routing in `menagerie.circuitforge.tech` + `demo.circuitforge.tech` peregrine blocks |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Data Flow
|
|
||||||
|
|
||||||
### Streamlit → Vue
|
|
||||||
```
|
|
||||||
User clicks "Try it" banner or Settings toggle
|
|
||||||
→ switch_ui(to="vue")
|
|
||||||
→ write user.yaml: ui_preference: vue
|
|
||||||
→ sync_ui_cookie(): inject JS → document.cookie = 'prgn_ui=vue; path=/'
|
|
||||||
→ st.rerun()
|
|
||||||
→ browser reloads → Caddy sees prgn_ui=vue → :8508/:8507 (Vue SPA)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Vue → Streamlit
|
|
||||||
```
|
|
||||||
User clicks "Classic UI" in Vue nav
|
|
||||||
→ document.cookie = 'prgn_ui=streamlit; path=/'
|
|
||||||
→ navigate to current URL + ?prgn_switch=streamlit
|
|
||||||
→ Caddy sees prgn_ui=streamlit → :8505/:8504 (Streamlit)
|
|
||||||
→ app.py render pass: sync_ui_cookie() sees ?prgn_switch=streamlit in st.query_params
|
|
||||||
→ writes user.yaml: ui_preference: streamlit
|
|
||||||
→ clears query param
|
|
||||||
→ injects JS to re-confirm cookie
|
|
||||||
```
|
|
||||||
|
|
||||||
### Demo tier switch
|
|
||||||
```
|
|
||||||
User clicks tier pill in demo toolbar
|
|
||||||
→ set_simulated_tier("paid")
|
|
||||||
→ inject JS → document.cookie = 'prgn_demo_tier=paid; path=/'
|
|
||||||
→ st.session_state.simulated_tier = "paid"
|
|
||||||
→ st.rerun()
|
|
||||||
→ render_banner() / page feature gates call can_use(..., demo_tier=st.session_state.simulated_tier)
|
|
||||||
```
|
|
||||||
|
|
||||||
### Cookie cleared (durability)
|
|
||||||
```
|
|
||||||
Browser cookies cleared
|
|
||||||
→ next Streamlit load: sync_ui_cookie() reads user.yaml: ui_preference: vue
|
|
||||||
→ re-injects prgn_ui=vue cookie
|
|
||||||
→ next navigation: Caddy routes to Vue SPA
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
|
|
||||||
| Scenario | Handling |
|
|
||||||
|---|---|
|
|
||||||
| Vue SPA service down (502) | Caddy `handle_errors` sets `Set-Cookie: prgn_ui=streamlit` + redirects to Streamlit with `?ui_fallback=1` |
|
|
||||||
| `?ui_fallback=1` detected | `render_banner()` shows toast "New UI temporarily unavailable — switched back to Classic"; calls `switch_ui("streamlit")` |
|
|
||||||
| user.yaml missing/malformed | `sync_ui_cookie()` try/except defaults to `streamlit`; no crash |
|
|
||||||
| Cookie/user.yaml conflict | Cookie wins — `sync_ui_cookie()` writes user.yaml to match cookie if present |
|
|
||||||
| Tier downgrade with vue cookie | `sync_ui_cookie()` detects `not can_use("vue_ui_beta")` → clears cookie + resets user.yaml |
|
|
||||||
| Demo toolbar in non-demo mode | `render_demo_toolbar()` only called when `DEMO_MODE=true`; `prgn_demo_tier` ignored by `can_use()` outside demo |
|
|
||||||
| `can_use()` called from background thread | `demo_tier` param defaults to `None`; `DEMO_MODE_FLAG` is env-only — no `st.session_state` access in the function body; thread-safe |
|
|
||||||
| First demo load (no cookie yet) | `st.session_state.simulated_tier` initialized to `"paid"` if `prgn_demo_tier` cookie absent |
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
- **Unit**: `sync_ui_cookie()` with all three conflict cases; `can_use("vue_ui_beta")` for free/paid/premium/demo tiers; `set_simulated_tier()` state transitions; `can_use()` called with `demo_tier=` from a non-Streamlit context (no `RuntimeError`)
|
|
||||||
- **Integration**: Caddy routing with mocked cookie headers (both directions); 502 fallback redirect + cookie clear chain
|
|
||||||
- **E2E**: Streamlit→Vue switch → verify served from Vue SPA port; Vue→Streamlit → verify Streamlit port; demo tier pill → verify feature gate state changes; cookie persistence across Streamlit restart; fresh clone `./manage.sh start` builds and serves Vue SPA correctly
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## Out of Scope
|
|
||||||
|
|
||||||
- Vue SPA feature parity with Streamlit (tracked under issue #8)
|
|
||||||
- Removing the Streamlit UI (v1 GA milestone)
|
|
||||||
- `old.peregrine.circuitforge.tech` subdomain alias (not needed — cookie approach is sufficient)
|
|
||||||
- Authoritative Vue-side feature gating via `/api/features` endpoint (post-parity, issue #8)
|
|
||||||
- Fine-tuned model or integrations gating in the Vue SPA (future work)
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
# Apply Workspace
|
# Apply Workspace
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
The Apply Workspace is where you generate cover letters, export application documents, and record that you have applied to a job.
|
The Apply Workspace is where you generate cover letters, export application documents, and record that you have applied to a job.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
# Job Review
|
# Job Review
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
The Job Review page is where you approve or reject newly discovered jobs before they enter the application pipeline.
|
The Job Review page is where you approve or reject newly discovered jobs before they enter the application pipeline.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
||||||
174
docs/vue-spa-migration.md
Normal file
174
docs/vue-spa-migration.md
Normal file
|
|
@ -0,0 +1,174 @@
|
||||||
|
# Peregrine Vue 3 SPA Migration
|
||||||
|
|
||||||
|
**Branch:** `feature/vue-spa`
|
||||||
|
**Issue:** #8 — Vue 3 SPA frontend (Paid Tier GA milestone)
|
||||||
|
**Worktree:** `.worktrees/feature-vue-spa/`
|
||||||
|
**Reference:** `avocet/docs/vue-port-gotchas.md` (15 battle-tested gotchas)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What We're Replacing
|
||||||
|
|
||||||
|
The current Streamlit UI (`app/app.py` + `app/pages/`) is an internal tool built for speed of development. The Vue SPA replaces it with a proper frontend — faster, more accessible, and extensible for the Paid Tier. The FastAPI already exists (partially, from the cloud managed instance work); the Vue SPA will consume it.
|
||||||
|
|
||||||
|
### Pages to Port
|
||||||
|
|
||||||
|
| Streamlit file | Vue view | Route | Notes |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `app/Home.py` | `HomeView.vue` | `/` | Dashboard, discovery trigger, sync status |
|
||||||
|
| `app/pages/1_Job_Review.py` | `JobReviewView.vue` | `/review` | Batch approve/reject; primary daily-driver view |
|
||||||
|
| `app/pages/4_Apply.py` | `ApplyView.vue` | `/apply` | Cover letter gen + PDF + mark applied |
|
||||||
|
| `app/pages/5_Interviews.py` | `InterviewsView.vue` | `/interviews` | Kanban: phone_screen → offer → hired |
|
||||||
|
| `app/pages/6_Interview_Prep.py` | `InterviewPrepView.vue` | `/prep` | Live reference sheet + practice Q&A |
|
||||||
|
| `app/pages/7_Survey.py` | `SurveyView.vue` | `/survey` | Culture-fit survey assist + screenshot |
|
||||||
|
| `app/pages/2_Settings.py` | `SettingsView.vue` | `/settings` | 6 tabs: Profile, Resume, Search, System, Fine-Tune, License |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Avocet Lessons Applied — What We Fixed Before Starting
|
||||||
|
|
||||||
|
The avocet SPA was the testbed. These bugs were found and fixed there; Peregrine's scaffold already incorporates all fixes. See `avocet/docs/vue-port-gotchas.md` for the full writeup.
|
||||||
|
|
||||||
|
### Applied at scaffold level (baked in — you don't need to think about these)
|
||||||
|
|
||||||
|
| # | Gotcha | How it's fixed in this scaffold |
|
||||||
|
|---|--------|----------------------------------|
|
||||||
|
| 1 | `id="app"` on App.vue root → nested `#app` elements, broken CSS specificity | `App.vue` root uses `class="app-root"`. `#app` in `index.html` is mount target only. |
|
||||||
|
| 3 | `overflow-x: hidden` on html → creates scroll container → 15px scrollbar jitter on Linux | `peregrine.css`: `html { overflow-x: clip }` |
|
||||||
|
| 4 | UnoCSS `presetAttributify` generates CSS for bare attribute names like `h2` | `uno.config.ts`: `presetAttributify({ prefix: 'un-', prefixedOnly: true })` |
|
||||||
|
| 5 | Theme variable name mismatches cause dark mode to silently fall back to hardcoded colors | `peregrine.css` alias map: `--color-bg → var(--color-surface)`, `--color-text-secondary → var(--color-text-muted)` |
|
||||||
|
| 7 | SPA cache: browser caches `index.html` indefinitely → old asset hashes → 404 on rebuild | FastAPI must register explicit `GET /` with no-cache headers before `StaticFiles` mount (see FastAPI section below) |
|
||||||
|
| 9 | `navigator.vibrate()` not supported on desktop/Safari — throws on call | `useHaptics.ts` guards with `'vibrate' in navigator` |
|
||||||
|
| 10 | Pinia options store = Vue 2 migration path | All stores use setup store form: `defineStore('id', () => { ... })` |
|
||||||
|
| 12 | `matchMedia`, `vibrate`, `ResizeObserver` absent in jsdom → composable tests throw | `test-setup.ts` stubs all three |
|
||||||
|
| 13 | `100vh` ignores mobile browser chrome | `App.vue`: `min-height: 100dvh` |
|
||||||
|
|
||||||
|
### Must actively avoid when writing new components
|
||||||
|
|
||||||
|
| # | Gotcha | Rule |
|
||||||
|
|---|--------|------|
|
||||||
|
| 2 | `transition: all` + spring easing → every CSS property bounces → layout explosion | Always enumerate: `transition: background 200ms ease, transform 250ms cubic-bezier(...)` |
|
||||||
|
| 6 | Keyboard composables called with snapshot arrays → keys don't work after async data loads | Accept `getLabels: () => labels.value` (reactive getter), not `labels: []` (snapshot) |
|
||||||
|
| 8 | Font reflow at ~780ms shifts layout measurements taken in `onMounted` | Measure layout in `document.fonts.ready` promise or after 1s timeout |
|
||||||
|
| 11 | `useSwipe` from `@vueuse/core` fires on desktop trackpad pointer events, not just touch | Add `pointer-type === 'touch'` guard if you need touch-only behavior |
|
||||||
|
| 14 | Rebuild workflow confusion | `cd web && npm run build` → refresh browser. Only restart FastAPI if `app/api.py` changed. |
|
||||||
|
| 15 | `:global(ancestor) .descendant` in `<style scoped>` → Vue drops the descendant entirely | Never use `:global(X) .Y` in scoped CSS. Use JS gate or CSS custom property token. |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## FastAPI Integration
|
||||||
|
|
||||||
|
### SPA serving (gotcha #7)
|
||||||
|
|
||||||
|
When the Vue SPA is built, FastAPI needs to serve it. Register the explicit `/` route **before** the `StaticFiles` mount, otherwise `index.html` gets cached and old asset hashes cause 404s after rebuild:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from pathlib import Path
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
|
_DIST = Path(__file__).parent.parent / "web" / "dist"
|
||||||
|
_NO_CACHE = {
|
||||||
|
"Cache-Control": "no-cache, no-store, must-revalidate",
|
||||||
|
"Pragma": "no-cache",
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
def spa_root():
|
||||||
|
return FileResponse(_DIST / "index.html", headers=_NO_CACHE)
|
||||||
|
|
||||||
|
# Must come after the explicit route above
|
||||||
|
app.mount("/", StaticFiles(directory=str(_DIST), html=True), name="spa")
|
||||||
|
```
|
||||||
|
|
||||||
|
Hashed assets (`/assets/index-abc123.js`) can be cached aggressively — their filenames change with content. Only `index.html` needs no-cache.
|
||||||
|
|
||||||
|
### API prefix
|
||||||
|
|
||||||
|
Vue Router uses HTML5 history mode. All `/api/*` routes must be registered on FastAPI before the `StaticFiles` mount. Vue routes (`/`, `/review`, `/apply`, etc.) are handled client-side; FastAPI's `html=True` on `StaticFiles` serves `index.html` for any unmatched path.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Peregrine-Specific Considerations
|
||||||
|
|
||||||
|
### Auth & license gating
|
||||||
|
|
||||||
|
The Streamlit UI uses `app/wizard/tiers.py` for tier gating. In the Vue SPA, tier state should be fetched from a `GET /api/license/status` endpoint on mount and stored in a Pinia store. Components check `licenseStore.tier` to gate features.
|
||||||
|
|
||||||
|
### Discovery trigger
|
||||||
|
|
||||||
|
The "Start Discovery" button on Home triggers `python scripts/discover.py` as a background process. The Vue version should use SSE (same pattern as avocet's finetune SSE) to stream progress back in real-time. The `useApiSSE` composable is already wired for this.
|
||||||
|
|
||||||
|
### Job Review — card stack UX
|
||||||
|
|
||||||
|
This is the daily-driver view. Consider the avocet ASMR bucket pattern here — approve/reject could transform into buckets on drag pickup. The motion tokens (`--transition-spring`, `--transition-dismiss`) are pre-defined in `peregrine.css`. The `useHaptics` composable is ready.
|
||||||
|
|
||||||
|
### Kanban (Interviews view)
|
||||||
|
|
||||||
|
The drag-to-column kanban is a strong candidate for `@vueuse/core`'s `useDraggable`. Watch for the `useSwipe` gotcha #11 — use pointer-type guards if drag behavior differs between touch and mouse.
|
||||||
|
|
||||||
|
### Settings — 6 tabs
|
||||||
|
|
||||||
|
Use a tab component with reactive route query params (`/settings?tab=license`) so direct links work and the page is shareable/bookmarkable.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Build & Dev Workflow
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From worktree root
|
||||||
|
cd web
|
||||||
|
npm install # first time only
|
||||||
|
npm run dev # Vite dev server at :5173 (proxies /api/* to FastAPI at :8502)
|
||||||
|
npm run build # output to web/dist/
|
||||||
|
npm run test # Vitest unit tests
|
||||||
|
```
|
||||||
|
|
||||||
|
FastAPI serves the built `dist/` on the main port. During dev, configure Vite to proxy `/api` to the running FastAPI:
|
||||||
|
|
||||||
|
```ts
|
||||||
|
// vite.config.ts addition for dev proxy
|
||||||
|
server: {
|
||||||
|
proxy: {
|
||||||
|
'/api': 'http://localhost:8502',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
After `npm run build`, just refresh the browser — no FastAPI restart needed unless `app/api.py` changed (gotcha #14).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Order
|
||||||
|
|
||||||
|
Suggested sequence — validate the full stack before porting complex pages:
|
||||||
|
|
||||||
|
1. **FastAPI SPA endpoint** — serve `web/dist/` with correct cache headers
|
||||||
|
2. **App shell** — nav, routing, hacker mode, motion toggle work end-to-end
|
||||||
|
3. **Home view** — dashboard widgets, discovery trigger with SSE progress
|
||||||
|
4. **Job Review** — most-used view; gets the most polish
|
||||||
|
5. **Settings** — license tab is the blocker for tier gating in other views
|
||||||
|
6. **Apply Workspace** — cover letter gen + PDF export
|
||||||
|
7. **Interviews kanban** — drag-to-column + calendar sync
|
||||||
|
8. **Interview Prep** — reference sheet, practice Q&A
|
||||||
|
9. **Survey Assistant** — screenshot + text paste
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Checklist
|
||||||
|
|
||||||
|
Copy of the avocet gotchas checklist (all pre-applied at scaffold level are checked):
|
||||||
|
|
||||||
|
- [x] App.vue root element: use `.app-root` class, NOT `id="app"`
|
||||||
|
- [ ] No `transition: all` with spring easings — enumerate properties explicitly
|
||||||
|
- [ ] No `:global(ancestor) .descendant` in scoped CSS — Vue drops the descendant
|
||||||
|
- [x] `overflow-x: clip` on html, `overflow-x: hidden` on body
|
||||||
|
- [x] UnoCSS `presetAttributify`: `prefixedOnly: true`
|
||||||
|
- [x] Product CSS aliases: `--color-bg`, `--color-text-secondary` mapped in `peregrine.css`
|
||||||
|
- [ ] Keyboard composables: accept reactive getters, not snapshot arrays
|
||||||
|
- [x] FastAPI SPA serving pattern documented — apply when wiring FastAPI
|
||||||
|
- [ ] Font reflow: measure layout after `document.fonts.ready` or 1s timeout
|
||||||
|
- [x] Haptics: guard `navigator.vibrate` with feature detection
|
||||||
|
- [x] Pinia: use setup store form (function syntax)
|
||||||
|
- [x] Tests: mock matchMedia, vibrate, ResizeObserver in test-setup.ts
|
||||||
|
- [x] `min-height: 100dvh` on full-height layout containers
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
name: job-seeker
|
name: cf
|
||||||
# Recreate: conda env create -f environment.yml
|
# Recreate: conda env create -f environment.yml
|
||||||
# Update pinned snapshot: conda env export --no-builds > environment.yml
|
# Update pinned snapshot: conda env export --no-builds > environment.yml
|
||||||
channels:
|
channels:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# setup.sh — Peregrine dependency installer
|
# install.sh — Peregrine dependency installer
|
||||||
# Installs Docker, Docker Compose v2, and (optionally) NVIDIA Container Toolkit.
|
# Installs Docker, Docker Compose v2, and (optionally) NVIDIA Container Toolkit.
|
||||||
# Supports: Ubuntu/Debian, Fedora/RHEL/CentOS, Arch Linux, macOS (Homebrew).
|
# Supports: Ubuntu/Debian, Fedora/RHEL/CentOS, Arch Linux, macOS (Homebrew).
|
||||||
# Windows: not supported — use WSL2 with Ubuntu.
|
# Windows: not supported — use WSL2 with Ubuntu.
|
||||||
|
|
@ -90,15 +90,11 @@ configure_git_safe_dir() {
|
||||||
}
|
}
|
||||||
|
|
||||||
activate_git_hooks() {
|
activate_git_hooks() {
|
||||||
local repo_dir hooks_installer
|
local repo_dir
|
||||||
repo_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
repo_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
hooks_installer="/Library/Development/CircuitForge/circuitforge-hooks/install.sh"
|
if [[ -d "$repo_dir/.githooks" ]]; then
|
||||||
if [[ -f "$hooks_installer" ]]; then
|
|
||||||
bash "$hooks_installer" --quiet
|
|
||||||
success "CircuitForge hooks activated (circuitforge-hooks)."
|
|
||||||
elif [[ -d "$repo_dir/.githooks" ]]; then
|
|
||||||
git -C "$repo_dir" config core.hooksPath .githooks
|
git -C "$repo_dir" config core.hooksPath .githooks
|
||||||
success "Git hooks activated (.githooks/) — circuitforge-hooks not found, using local fallback."
|
success "Git hooks activated (.githooks/)."
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -341,6 +337,31 @@ setup_env() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ── License key (optional) ────────────────────────────────────────────────────
|
||||||
|
capture_license_key() {
|
||||||
|
[[ ! -t 0 ]] && return # skip in non-interactive installs (curl | bash)
|
||||||
|
local env_file
|
||||||
|
env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
|
||||||
|
[[ ! -f "$env_file" ]] && return # setup_env() creates it; nothing to write into yet
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
info "License key (optional)"
|
||||||
|
echo -e " Peregrine works without a key for personal self-hosted use."
|
||||||
|
echo -e " Paid-tier users: enter your ${YELLOW}CFG-XXXX-…${NC} key to unlock cloud LLM and integrations."
|
||||||
|
echo ""
|
||||||
|
read -rp " CircuitForge license key [press Enter to skip]: " _key || true
|
||||||
|
if [[ -n "$_key" ]]; then
|
||||||
|
if echo "$_key" | grep -qE '^CFG-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}$'; then
|
||||||
|
_update_env_key "$env_file" "CF_LICENSE_KEY" "$_key"
|
||||||
|
_update_env_key "$env_file" "HEIMDALL_URL" "https://license.circuitforge.tech"
|
||||||
|
success "License key saved — paid-tier features enabled."
|
||||||
|
else
|
||||||
|
warn "Key format looks wrong (expected CFG-XXXX-AAAA-BBBB-CCCC) — skipping."
|
||||||
|
info "Add it manually to .env as CF_LICENSE_KEY= later."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# ── Model weights storage ───────────────────────────────────────────────────────
|
# ── Model weights storage ───────────────────────────────────────────────────────
|
||||||
_update_env_key() {
|
_update_env_key() {
|
||||||
# Portable in-place key=value update for .env files (Linux + macOS).
|
# Portable in-place key=value update for .env files (Linux + macOS).
|
||||||
|
|
@ -416,8 +437,15 @@ main() {
|
||||||
fi
|
fi
|
||||||
install_ollama_macos
|
install_ollama_macos
|
||||||
setup_env
|
setup_env
|
||||||
|
capture_license_key
|
||||||
configure_model_paths
|
configure_model_paths
|
||||||
|
|
||||||
|
# Read the actual port from .env so next-steps reflects any customisation
|
||||||
|
local _script_dir _port
|
||||||
|
_script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
_port="$(grep -E '^STREAMLIT_PORT=' "$_script_dir/.env" 2>/dev/null | cut -d= -f2-)"
|
||||||
|
_port="${_port:-8502}"
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
success "All dependencies installed."
|
success "All dependencies installed."
|
||||||
echo ""
|
echo ""
|
||||||
|
|
@ -429,7 +457,7 @@ main() {
|
||||||
else
|
else
|
||||||
echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (CPU)"
|
echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (CPU)"
|
||||||
fi
|
fi
|
||||||
echo -e " 2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
|
echo -e " 2. Open ${YELLOW}http://localhost:${_port}${NC} — the setup wizard will guide you"
|
||||||
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
|
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
|
||||||
echo ""
|
echo ""
|
||||||
if groups "$USER" 2>/dev/null | grep -q docker; then
|
if groups "$USER" 2>/dev/null | grep -q docker; then
|
||||||
|
|
@ -84,7 +84,7 @@ case "$CMD" in
|
||||||
|
|
||||||
setup)
|
setup)
|
||||||
info "Running dependency installer..."
|
info "Running dependency installer..."
|
||||||
bash setup.sh
|
bash install.sh
|
||||||
;;
|
;;
|
||||||
|
|
||||||
preflight)
|
preflight)
|
||||||
|
|
@ -94,7 +94,7 @@ case "$CMD" in
|
||||||
|
|
||||||
models)
|
models)
|
||||||
info "Checking ollama models..."
|
info "Checking ollama models..."
|
||||||
conda run -n job-seeker python scripts/preflight.py --models-only
|
conda run -n cf python scripts/preflight.py --models-only
|
||||||
success "Model check complete."
|
success "Model check complete."
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
|
@ -190,7 +190,7 @@ case "$CMD" in
|
||||||
RUNNER=""
|
RUNNER=""
|
||||||
fi
|
fi
|
||||||
info "Running E2E tests (mode=${MODE}, headless=${HEADLESS})..."
|
info "Running E2E tests (mode=${MODE}, headless=${HEADLESS})..."
|
||||||
$RUNNER conda run -n job-seeker pytest tests/e2e/ \
|
$RUNNER conda run -n cf pytest tests/e2e/ \
|
||||||
--mode="${MODE}" \
|
--mode="${MODE}" \
|
||||||
--json-report \
|
--json-report \
|
||||||
--json-report-file="${RESULTS_DIR}/report.json" \
|
--json-report-file="${RESULTS_DIR}/report.json" \
|
||||||
|
|
|
||||||
97
migrations/001_baseline.sql
Normal file
97
migrations/001_baseline.sql
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
-- Migration 001: Baseline schema
|
||||||
|
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS jobs (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
title TEXT,
|
||||||
|
company TEXT,
|
||||||
|
url TEXT UNIQUE,
|
||||||
|
source TEXT,
|
||||||
|
location TEXT,
|
||||||
|
is_remote INTEGER DEFAULT 0,
|
||||||
|
salary TEXT,
|
||||||
|
description TEXT,
|
||||||
|
match_score REAL,
|
||||||
|
keyword_gaps TEXT,
|
||||||
|
date_found TEXT,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
notion_page_id TEXT,
|
||||||
|
cover_letter TEXT,
|
||||||
|
applied_at TEXT,
|
||||||
|
interview_date TEXT,
|
||||||
|
rejection_stage TEXT,
|
||||||
|
phone_screen_at TEXT,
|
||||||
|
interviewing_at TEXT,
|
||||||
|
offer_at TEXT,
|
||||||
|
hired_at TEXT,
|
||||||
|
survey_at TEXT,
|
||||||
|
calendar_event_id TEXT,
|
||||||
|
optimized_resume TEXT,
|
||||||
|
ats_gap_report TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS job_contacts (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER,
|
||||||
|
direction TEXT,
|
||||||
|
subject TEXT,
|
||||||
|
from_addr TEXT,
|
||||||
|
to_addr TEXT,
|
||||||
|
body TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
is_response_needed INTEGER DEFAULT 0,
|
||||||
|
responded_at TEXT,
|
||||||
|
message_id TEXT,
|
||||||
|
stage_signal TEXT,
|
||||||
|
suggestion_dismissed INTEGER DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS company_research (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER UNIQUE,
|
||||||
|
generated_at TEXT,
|
||||||
|
company_brief TEXT,
|
||||||
|
ceo_brief TEXT,
|
||||||
|
talking_points TEXT,
|
||||||
|
raw_output TEXT,
|
||||||
|
tech_brief TEXT,
|
||||||
|
funding_brief TEXT,
|
||||||
|
competitors_brief TEXT,
|
||||||
|
red_flags TEXT,
|
||||||
|
scrape_used INTEGER DEFAULT 0,
|
||||||
|
accessibility_brief TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS background_tasks (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
task_type TEXT,
|
||||||
|
job_id INTEGER,
|
||||||
|
params TEXT,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
error TEXT,
|
||||||
|
created_at TEXT,
|
||||||
|
started_at TEXT,
|
||||||
|
finished_at TEXT,
|
||||||
|
stage TEXT,
|
||||||
|
updated_at TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS survey_responses (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER,
|
||||||
|
survey_name TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
source TEXT,
|
||||||
|
raw_input TEXT,
|
||||||
|
image_path TEXT,
|
||||||
|
mode TEXT,
|
||||||
|
llm_output TEXT,
|
||||||
|
reported_score REAL,
|
||||||
|
created_at TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS digest_queue (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_contact_id INTEGER UNIQUE,
|
||||||
|
created_at TEXT
|
||||||
|
);
|
||||||
7
migrations/002_ats_resume_columns.sql
Normal file
7
migrations/002_ats_resume_columns.sql
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
-- Add ATS resume optimizer columns introduced in v0.8.x.
|
||||||
|
-- Existing DBs that were created before the baseline included these columns
|
||||||
|
-- need this migration to add them. Safe to run on new DBs: IF NOT EXISTS guards
|
||||||
|
-- are not available for ADD COLUMN in SQLite, so we use a try/ignore pattern
|
||||||
|
-- at the application level (db_migrate.py wraps each migration in a transaction).
|
||||||
|
ALTER TABLE jobs ADD COLUMN optimized_resume TEXT;
|
||||||
|
ALTER TABLE jobs ADD COLUMN ats_gap_report TEXT;
|
||||||
3
migrations/003_resume_review.sql
Normal file
3
migrations/003_resume_review.sql
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- Resume review draft and version archive columns (migration 003)
|
||||||
|
ALTER TABLE jobs ADD COLUMN resume_draft_json TEXT;
|
||||||
|
ALTER TABLE jobs ADD COLUMN resume_archive_json TEXT;
|
||||||
5
migrations/004_resume_final_struct.sql
Normal file
5
migrations/004_resume_final_struct.sql
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
-- Migration 004: add resume_final_struct to jobs table
|
||||||
|
-- Stores the approved resume as a structured JSON dict alongside the plain text
|
||||||
|
-- (resume_optimized_text). Enables YAML export and future re-processing without
|
||||||
|
-- re-parsing the plain text.
|
||||||
|
ALTER TABLE jobs ADD COLUMN resume_final_struct TEXT;
|
||||||
17
migrations/005_resumes_table.sql
Normal file
17
migrations/005_resumes_table.sql
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
-- 005_resumes_table.sql
|
||||||
|
-- Resume library: named saved resumes per user (optimizer output, imports, manual)
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS resumes (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
source TEXT NOT NULL DEFAULT 'manual',
|
||||||
|
job_id INTEGER REFERENCES jobs(id),
|
||||||
|
text TEXT NOT NULL,
|
||||||
|
struct_json TEXT,
|
||||||
|
word_count INTEGER,
|
||||||
|
is_default INTEGER NOT NULL DEFAULT 0,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
|
||||||
|
ALTER TABLE jobs ADD COLUMN resume_id INTEGER REFERENCES resumes(id);
|
||||||
6
migrations/006_date_posted.sql
Normal file
6
migrations/006_date_posted.sql
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
-- 006_date_posted.sql
|
||||||
|
-- Add date_posted column for shadow listing detection (stale/shadow score feature).
|
||||||
|
-- New DBs already have this column from the CREATE TABLE statement in db.py;
|
||||||
|
-- this migration adds it to existing user DBs.
|
||||||
|
|
||||||
|
ALTER TABLE jobs ADD COLUMN date_posted TEXT;
|
||||||
3
migrations/007_resume_sync.sql
Normal file
3
migrations/007_resume_sync.sql
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- 007_resume_sync.sql
|
||||||
|
-- Add synced_at to resumes: ISO datetime of last library↔profile sync, null = never synced.
|
||||||
|
ALTER TABLE resumes ADD COLUMN synced_at TEXT;
|
||||||
|
|
@ -70,3 +70,6 @@ nav:
|
||||||
- Tier System: reference/tier-system.md
|
- Tier System: reference/tier-system.md
|
||||||
- LLM Router: reference/llm-router.md
|
- LLM Router: reference/llm-router.md
|
||||||
- Config Files: reference/config-files.md
|
- Config Files: reference/config-files.md
|
||||||
|
|
||||||
|
extra_javascript:
|
||||||
|
- plausible.js
|
||||||
|
|
|
||||||
92
podman-standalone.sh
Executable file
92
podman-standalone.sh
Executable file
|
|
@ -0,0 +1,92 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# podman-standalone.sh — Peregrine rootful Podman setup (no Compose)
|
||||||
|
#
|
||||||
|
# For beta testers running system Podman (non-rootless) with systemd.
|
||||||
|
# Mirrors the manage.sh "remote" profile: app + SearXNG only.
|
||||||
|
# Ollama/vLLM/vision are expected as host services if needed.
|
||||||
|
#
|
||||||
|
# ── Prerequisites ────────────────────────────────────────────────────────────
|
||||||
|
# 1. Clone the repo:
|
||||||
|
# sudo git clone <repo-url> /opt/peregrine
|
||||||
|
#
|
||||||
|
# 2. Build the app image:
|
||||||
|
# cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest .
|
||||||
|
#
|
||||||
|
# 3. Create a config directory and copy the example configs:
|
||||||
|
# sudo mkdir -p /opt/peregrine/{config,data}
|
||||||
|
# sudo cp /opt/peregrine/config/*.example /opt/peregrine/config/
|
||||||
|
# # Edit /opt/peregrine/config/llm.yaml, notion.yaml, etc. as needed
|
||||||
|
#
|
||||||
|
# 4. Run this script:
|
||||||
|
# sudo bash /opt/peregrine/podman-standalone.sh
|
||||||
|
#
|
||||||
|
# ── After setup — generate systemd unit files ────────────────────────────────
|
||||||
|
# sudo podman generate systemd --new --name peregrine-searxng \
|
||||||
|
# | sudo tee /etc/systemd/system/peregrine-searxng.service
|
||||||
|
# sudo podman generate systemd --new --name peregrine \
|
||||||
|
# | sudo tee /etc/systemd/system/peregrine.service
|
||||||
|
# sudo systemctl daemon-reload
|
||||||
|
# sudo systemctl enable --now peregrine-searxng peregrine
|
||||||
|
#
|
||||||
|
# ── SearXNG ──────────────────────────────────────────────────────────────────
|
||||||
|
# Peregrine expects a SearXNG instance with JSON format enabled.
|
||||||
|
# If you already run one, skip the SearXNG container and set the URL in
|
||||||
|
# config/llm.yaml (searxng_url key). The default is http://localhost:8888.
|
||||||
|
#
|
||||||
|
# ── Ports ────────────────────────────────────────────────────────────────────
|
||||||
|
# Peregrine UI → http://localhost:8501
|
||||||
|
#
|
||||||
|
# ── To use a different Streamlit port ────────────────────────────────────────
|
||||||
|
# Uncomment the CMD override at the bottom of the peregrine run block and
|
||||||
|
# set PORT= to your desired port. The Dockerfile default is 8501.
|
||||||
|
#
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REPO_DIR=/opt/peregrine
|
||||||
|
DATA_DIR=/opt/peregrine/data
|
||||||
|
DOCS_DIR=/Library/Documents/JobSearch # ← adjust to your docs path
|
||||||
|
TZ=America/Los_Angeles
|
||||||
|
|
||||||
|
# ── Peregrine App ─────────────────────────────────────────────────────────────
|
||||||
|
# Image is built locally — no registry auto-update label.
|
||||||
|
# To update: sudo podman build -t localhost/peregrine:latest /opt/peregrine
|
||||||
|
# sudo podman restart peregrine
|
||||||
|
#
|
||||||
|
# Env vars: ANTHROPIC_API_KEY, OPENAI_COMPAT_URL, OPENAI_COMPAT_KEY are
|
||||||
|
# optional — only needed if you're using those backends in config/llm.yaml.
|
||||||
|
#
|
||||||
|
sudo podman run -d \
|
||||||
|
--name=peregrine \
|
||||||
|
--restart=unless-stopped \
|
||||||
|
--net=host \
|
||||||
|
-v ${REPO_DIR}/config:/app/config:Z \
|
||||||
|
-v ${DATA_DIR}:/app/data:Z \
|
||||||
|
-v ${DOCS_DIR}:/docs:z \
|
||||||
|
-e STAGING_DB=/app/data/staging.db \
|
||||||
|
-e DOCS_DIR=/docs \
|
||||||
|
-e PYTHONUNBUFFERED=1 \
|
||||||
|
-e PYTHONLOGGING=WARNING \
|
||||||
|
-e TZ=${TZ} \
|
||||||
|
--health-cmd="curl -f http://localhost:8501/_stcore/health || exit 1" \
|
||||||
|
--health-interval=30s \
|
||||||
|
--health-timeout=10s \
|
||||||
|
--health-start-period=60s \
|
||||||
|
--health-retries=3 \
|
||||||
|
localhost/peregrine:latest
|
||||||
|
# To override the default port (8501), uncomment and edit the line below,
|
||||||
|
# then remove the image name above and place it at the end of the CMD:
|
||||||
|
# streamlit run app/app.py --server.port=8501 --server.headless=true --server.fileWatcherType=none
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Peregrine is starting up."
|
||||||
|
echo " App: http://localhost:8501"
|
||||||
|
echo ""
|
||||||
|
echo "Check container health with:"
|
||||||
|
echo " sudo podman ps"
|
||||||
|
echo " sudo podman logs peregrine"
|
||||||
|
echo ""
|
||||||
|
echo "To register as a systemd service:"
|
||||||
|
echo " sudo podman generate systemd --new --name peregrine \\"
|
||||||
|
echo " | sudo tee /etc/systemd/system/peregrine.service"
|
||||||
|
echo " sudo systemctl daemon-reload"
|
||||||
|
echo " sudo systemctl enable --now peregrine"
|
||||||
|
|
@ -2,6 +2,15 @@
|
||||||
# Extracted from environment.yml for Docker pip installs
|
# Extracted from environment.yml for Docker pip installs
|
||||||
# Keep in sync with environment.yml
|
# Keep in sync with environment.yml
|
||||||
|
|
||||||
|
# ── CircuitForge shared core ───────────────────────────────────────────────
|
||||||
|
# Requires circuitforge-core >= 0.8.0 (config.load_env, db, tasks; resources moved to circuitforge-orch).
|
||||||
|
# Local dev / Docker (parent-context build): path install works because
|
||||||
|
# circuitforge-core/ is a sibling directory.
|
||||||
|
# CI / fresh checkouts: falls back to the Forgejo VCS URL below.
|
||||||
|
# To use local editable install run: pip install -e ../circuitforge-core
|
||||||
|
# TODO: pin to @v0.7.0 tag once cf-core cuts a release tag.
|
||||||
|
git+https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git@main
|
||||||
|
|
||||||
# ── Web UI ────────────────────────────────────────────────────────────────
|
# ── Web UI ────────────────────────────────────────────────────────────────
|
||||||
streamlit>=1.35
|
streamlit>=1.35
|
||||||
watchdog
|
watchdog
|
||||||
|
|
@ -78,3 +87,10 @@ lxml
|
||||||
# ── Documentation ────────────────────────────────────────────────────────
|
# ── Documentation ────────────────────────────────────────────────────────
|
||||||
mkdocs>=1.5
|
mkdocs>=1.5
|
||||||
mkdocs-material>=9.5
|
mkdocs-material>=9.5
|
||||||
|
|
||||||
|
# ── Vue SPA API backend ──────────────────────────────────────────────────
|
||||||
|
fastapi>=0.100.0
|
||||||
|
uvicorn[standard]>=0.20.0
|
||||||
|
PyJWT>=2.8.0
|
||||||
|
cryptography>=40.0.0
|
||||||
|
python-multipart>=0.0.6
|
||||||
|
|
|
||||||
|
|
@ -277,7 +277,8 @@ def _load_resume_and_keywords() -> tuple[dict, list[str]]:
|
||||||
return resume, keywords
|
return resume, keywords
|
||||||
|
|
||||||
|
|
||||||
def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict:
|
def research_company(job: dict, use_scraper: bool = True, on_stage=None,
|
||||||
|
config_path: "Path | None" = None) -> dict:
|
||||||
"""
|
"""
|
||||||
Generate a pre-interview research brief for a job.
|
Generate a pre-interview research brief for a job.
|
||||||
|
|
||||||
|
|
@ -295,7 +296,7 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict
|
||||||
"""
|
"""
|
||||||
from scripts.llm_router import LLMRouter
|
from scripts.llm_router import LLMRouter
|
||||||
|
|
||||||
router = LLMRouter()
|
router = LLMRouter(config_path=config_path) if config_path else LLMRouter()
|
||||||
research_order = router.config.get("research_fallback_order") or router.config["fallback_order"]
|
research_order = router.config.get("research_fallback_order") or router.config["fallback_order"]
|
||||||
company = job.get("company") or "the company"
|
company = job.get("company") or "the company"
|
||||||
title = job.get("title") or "this role"
|
title = job.get("title") or "this role"
|
||||||
|
|
|
||||||
198
scripts/credential_store.py
Normal file
198
scripts/credential_store.py
Normal file
|
|
@ -0,0 +1,198 @@
|
||||||
|
"""
|
||||||
|
Credential store abstraction for Peregrine.
|
||||||
|
|
||||||
|
Backends (set via CREDENTIAL_BACKEND env var):
|
||||||
|
auto → try keyring, fall back to file (default)
|
||||||
|
keyring → python-keyring (OS Keychain / SecretService / libsecret)
|
||||||
|
file → Fernet-encrypted JSON in config/credentials/ (key at config/.credential_key)
|
||||||
|
|
||||||
|
Env var references:
|
||||||
|
Any stored value matching ${VAR_NAME} is resolved from os.environ at read time.
|
||||||
|
Users can store "${IMAP_PASSWORD}" as the credential value; it is never treated
|
||||||
|
as the actual secret — only the env var it points to is used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_ENV_REF = re.compile(r'^\$\{([A-Z_][A-Z0-9_]*)\}$')
|
||||||
|
|
||||||
|
_PROJECT_ROOT = Path(__file__).parent.parent
|
||||||
|
CRED_DIR = _PROJECT_ROOT / "config" / "credentials"
|
||||||
|
KEY_PATH = _PROJECT_ROOT / "config" / ".credential_key"
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_env_ref(value: str) -> Optional[str]:
|
||||||
|
"""If value is ${VAR_NAME}, return os.environ[VAR_NAME]; otherwise return None."""
|
||||||
|
m = _ENV_REF.match(value)
|
||||||
|
if m:
|
||||||
|
resolved = os.environ.get(m.group(1))
|
||||||
|
if resolved is None:
|
||||||
|
logger.warning("Credential reference %s is set but env var is not defined", value)
|
||||||
|
return resolved
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_backend() -> str:
|
||||||
|
backend = os.environ.get("CREDENTIAL_BACKEND", "auto").lower()
|
||||||
|
if backend != "auto":
|
||||||
|
return backend
|
||||||
|
# Auto: try keyring, fall back to file
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
kr = keyring.get_keyring()
|
||||||
|
# Reject the null/fail keyring — it can't actually store anything
|
||||||
|
if "fail" in type(kr).__name__.lower() or "null" in type(kr).__name__.lower():
|
||||||
|
raise RuntimeError("No usable keyring backend found")
|
||||||
|
return "keyring"
|
||||||
|
except Exception:
|
||||||
|
return "file"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_fernet():
|
||||||
|
"""Return a Fernet instance, auto-generating the key on first use."""
|
||||||
|
try:
|
||||||
|
from cryptography.fernet import Fernet
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if KEY_PATH.exists():
|
||||||
|
key = KEY_PATH.read_bytes().strip()
|
||||||
|
else:
|
||||||
|
key = Fernet.generate_key()
|
||||||
|
KEY_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
fd = os.open(str(KEY_PATH), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
f.write(key)
|
||||||
|
logger.info("Generated new credential encryption key at %s", KEY_PATH)
|
||||||
|
|
||||||
|
return Fernet(key)
|
||||||
|
|
||||||
|
|
||||||
|
def _file_read(service: str) -> dict:
|
||||||
|
"""Read the credentials file for a service, decrypting if possible."""
|
||||||
|
cred_file = CRED_DIR / f"{service}.json"
|
||||||
|
if not cred_file.exists():
|
||||||
|
return {}
|
||||||
|
raw = cred_file.read_bytes()
|
||||||
|
fernet = _get_fernet()
|
||||||
|
if fernet:
|
||||||
|
try:
|
||||||
|
return json.loads(fernet.decrypt(raw))
|
||||||
|
except Exception:
|
||||||
|
# May be an older plaintext file — try reading as text
|
||||||
|
try:
|
||||||
|
return json.loads(raw.decode())
|
||||||
|
except Exception:
|
||||||
|
logger.error("Failed to read credentials for service %s", service)
|
||||||
|
return {}
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
return json.loads(raw.decode())
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _file_write(service: str, data: dict) -> None:
|
||||||
|
"""Write the credentials file for a service, encrypting if possible."""
|
||||||
|
CRED_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
cred_file = CRED_DIR / f"{service}.json"
|
||||||
|
fernet = _get_fernet()
|
||||||
|
if fernet:
|
||||||
|
content = fernet.encrypt(json.dumps(data).encode())
|
||||||
|
fd = os.open(str(cred_file), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"cryptography package not installed — storing credentials as plaintext with chmod 600. "
|
||||||
|
"Install with: pip install cryptography"
|
||||||
|
)
|
||||||
|
content = json.dumps(data).encode()
|
||||||
|
fd = os.open(str(cred_file), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||||
|
with os.fdopen(fd, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
def get_credential(service: str, key: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Retrieve a credential. If the stored value is an env var reference (${VAR}),
|
||||||
|
resolves it from os.environ at call time.
|
||||||
|
"""
|
||||||
|
backend = _get_backend()
|
||||||
|
raw: Optional[str] = None
|
||||||
|
|
||||||
|
if backend == "keyring":
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
raw = keyring.get_password(service, key)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("keyring get failed for %s/%s: %s", service, key, e)
|
||||||
|
else: # file
|
||||||
|
data = _file_read(service)
|
||||||
|
raw = data.get(key)
|
||||||
|
|
||||||
|
if raw is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Resolve env var references transparently
|
||||||
|
resolved = _resolve_env_ref(raw)
|
||||||
|
if resolved is not None:
|
||||||
|
return resolved
|
||||||
|
if _ENV_REF.match(raw):
|
||||||
|
return None # reference defined but env var not set
|
||||||
|
|
||||||
|
return raw
|
||||||
|
|
||||||
|
|
||||||
|
def set_credential(service: str, key: str, value: str) -> None:
|
||||||
|
"""
|
||||||
|
Store a credential. Value may be a literal secret or a ${VAR_NAME} reference.
|
||||||
|
Env var references are stored as-is and resolved at get time.
|
||||||
|
"""
|
||||||
|
if not value:
|
||||||
|
return
|
||||||
|
|
||||||
|
backend = _get_backend()
|
||||||
|
|
||||||
|
if backend == "keyring":
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
keyring.set_password(service, key, value)
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("keyring set failed for %s/%s: %s — falling back to file", service, key, e)
|
||||||
|
backend = "file"
|
||||||
|
|
||||||
|
# file backend
|
||||||
|
data = _file_read(service)
|
||||||
|
data[key] = value
|
||||||
|
_file_write(service, data)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_credential(service: str, key: str) -> None:
|
||||||
|
"""Remove a stored credential."""
|
||||||
|
backend = _get_backend()
|
||||||
|
|
||||||
|
if backend == "keyring":
|
||||||
|
try:
|
||||||
|
import keyring
|
||||||
|
keyring.delete_password(service, key)
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
backend = "file"
|
||||||
|
|
||||||
|
data = _file_read(service)
|
||||||
|
data.pop(key, None)
|
||||||
|
if data:
|
||||||
|
_file_write(service, data)
|
||||||
|
else:
|
||||||
|
cred_file = CRED_DIR / f"{service}.json"
|
||||||
|
if cred_file.exists():
|
||||||
|
cred_file.unlink()
|
||||||
|
|
@ -70,7 +70,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
|
||||||
print(f" [adzuna] Skipped — {exc}")
|
print(f" [adzuna] Skipped — {exc}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
titles = profile.get("titles", [])
|
titles = profile.get("titles") or profile.get("job_titles", [])
|
||||||
hours_old = profile.get("hours_old", 240)
|
hours_old = profile.get("hours_old", 240)
|
||||||
max_days_old = max(1, hours_old // 24)
|
max_days_old = max(1, hours_old // 24)
|
||||||
is_remote_search = location.lower() == "remote"
|
is_remote_search = location.lower() == "remote"
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
|
||||||
return []
|
return []
|
||||||
metros = [metro]
|
metros = [metro]
|
||||||
|
|
||||||
titles: list[str] = profile.get("titles", [])
|
titles: list[str] = profile.get("titles") or profile.get("job_titles", [])
|
||||||
hours_old: int = profile.get("hours_old", 240)
|
hours_old: int = profile.get("hours_old", 240)
|
||||||
cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)
|
cutoff = datetime.now(tz=timezone.utc).timestamp() - (hours_old * 3600)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -107,7 +107,7 @@ def scrape(profile: dict, location: str, results_wanted: int = 50) -> list[dict]
|
||||||
)
|
)
|
||||||
page = ctx.new_page()
|
page = ctx.new_page()
|
||||||
|
|
||||||
for title in profile.get("titles", []):
|
for title in (profile.get("titles") or profile.get("job_titles", [])):
|
||||||
if len(results) >= results_wanted:
|
if len(results) >= results_wanted:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
|
||||||
396
scripts/db.py
396
scripts/db.py
|
|
@ -9,30 +9,14 @@ from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
from circuitforge_core.db import get_connection as _cf_get_connection
|
||||||
|
|
||||||
DEFAULT_DB = Path(os.environ.get("STAGING_DB", Path(__file__).parent.parent / "staging.db"))
|
DEFAULT_DB = Path(os.environ.get("STAGING_DB", Path(__file__).parent.parent / "staging.db"))
|
||||||
|
|
||||||
|
|
||||||
def get_connection(db_path: Path = DEFAULT_DB, key: str = "") -> "sqlite3.Connection":
|
def get_connection(db_path: Path = DEFAULT_DB, key: str = "") -> "sqlite3.Connection":
|
||||||
"""
|
"""Thin shim — delegates to circuitforge_core.db.get_connection."""
|
||||||
Open a database connection.
|
return _cf_get_connection(db_path, key)
|
||||||
|
|
||||||
In cloud mode with a key: uses SQLCipher (AES-256 encrypted, API-identical to sqlite3).
|
|
||||||
Otherwise: vanilla sqlite3.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
db_path: Path to the SQLite/SQLCipher database file.
|
|
||||||
key: SQLCipher encryption key (hex string). Empty = unencrypted.
|
|
||||||
"""
|
|
||||||
import os as _os
|
|
||||||
cloud_mode = _os.environ.get("CLOUD_MODE", "").lower() in ("1", "true", "yes")
|
|
||||||
if cloud_mode and key:
|
|
||||||
from pysqlcipher3 import dbapi2 as _sqlcipher
|
|
||||||
conn = _sqlcipher.connect(str(db_path))
|
|
||||||
conn.execute(f"PRAGMA key='{key}'")
|
|
||||||
return conn
|
|
||||||
else:
|
|
||||||
import sqlite3 as _sqlite3
|
|
||||||
return _sqlite3.connect(str(db_path))
|
|
||||||
|
|
||||||
|
|
||||||
CREATE_JOBS = """
|
CREATE_JOBS = """
|
||||||
|
|
@ -137,6 +121,41 @@ CREATE TABLE IF NOT EXISTS survey_responses (
|
||||||
);
|
);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
CREATE_DIGEST_QUEUE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS digest_queue (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id),
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
UNIQUE(job_contact_id)
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
CREATE_REFERENCES = """
|
||||||
|
CREATE TABLE IF NOT EXISTS references_ (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
relationship TEXT,
|
||||||
|
company TEXT,
|
||||||
|
email TEXT,
|
||||||
|
phone TEXT,
|
||||||
|
notes TEXT,
|
||||||
|
tags TEXT DEFAULT '[]',
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
CREATE_JOB_REFERENCES = """
|
||||||
|
CREATE TABLE IF NOT EXISTS job_references (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
|
||||||
|
reference_id INTEGER NOT NULL REFERENCES references_(id) ON DELETE CASCADE,
|
||||||
|
prep_email TEXT,
|
||||||
|
rec_letter TEXT,
|
||||||
|
UNIQUE(job_id, reference_id)
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
_MIGRATIONS = [
|
_MIGRATIONS = [
|
||||||
("cover_letter", "TEXT"),
|
("cover_letter", "TEXT"),
|
||||||
("applied_at", "TEXT"),
|
("applied_at", "TEXT"),
|
||||||
|
|
@ -148,6 +167,10 @@ _MIGRATIONS = [
|
||||||
("hired_at", "TEXT"),
|
("hired_at", "TEXT"),
|
||||||
("survey_at", "TEXT"),
|
("survey_at", "TEXT"),
|
||||||
("calendar_event_id", "TEXT"),
|
("calendar_event_id", "TEXT"),
|
||||||
|
("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier)
|
||||||
|
("ats_gap_report", "TEXT"), # JSON gap report (free tier)
|
||||||
|
("date_posted", "TEXT"), # Original posting date from job board (shadow listing detection)
|
||||||
|
("hired_feedback", "TEXT"), # JSON: optional post-hire "what helped" response
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -181,6 +204,9 @@ def _migrate_db(db_path: Path) -> None:
|
||||||
conn.execute("ALTER TABLE background_tasks ADD COLUMN params TEXT")
|
conn.execute("ALTER TABLE background_tasks ADD COLUMN params TEXT")
|
||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
pass # column already exists
|
pass # column already exists
|
||||||
|
# Ensure references tables exist (CREATE IF NOT EXISTS is idempotent)
|
||||||
|
conn.execute(CREATE_REFERENCES)
|
||||||
|
conn.execute(CREATE_JOB_REFERENCES)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
@ -193,6 +219,9 @@ def init_db(db_path: Path = DEFAULT_DB) -> None:
|
||||||
conn.execute(CREATE_COMPANY_RESEARCH)
|
conn.execute(CREATE_COMPANY_RESEARCH)
|
||||||
conn.execute(CREATE_BACKGROUND_TASKS)
|
conn.execute(CREATE_BACKGROUND_TASKS)
|
||||||
conn.execute(CREATE_SURVEY_RESPONSES)
|
conn.execute(CREATE_SURVEY_RESPONSES)
|
||||||
|
conn.execute(CREATE_DIGEST_QUEUE)
|
||||||
|
conn.execute(CREATE_REFERENCES)
|
||||||
|
conn.execute(CREATE_JOB_REFERENCES)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
_migrate_db(db_path)
|
_migrate_db(db_path)
|
||||||
|
|
@ -206,8 +235,8 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
|
||||||
try:
|
try:
|
||||||
cursor = conn.execute(
|
cursor = conn.execute(
|
||||||
"""INSERT INTO jobs
|
"""INSERT INTO jobs
|
||||||
(title, company, url, source, location, is_remote, salary, description, date_found)
|
(title, company, url, source, location, is_remote, salary, description, date_found, date_posted)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
(
|
(
|
||||||
job.get("title", ""),
|
job.get("title", ""),
|
||||||
job.get("company", ""),
|
job.get("company", ""),
|
||||||
|
|
@ -218,6 +247,7 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
|
||||||
job.get("salary", ""),
|
job.get("salary", ""),
|
||||||
job.get("description", ""),
|
job.get("description", ""),
|
||||||
job.get("date_found", ""),
|
job.get("date_found", ""),
|
||||||
|
job.get("date_posted", "") or "",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
@ -317,6 +347,128 @@ def update_cover_letter(db_path: Path = DEFAULT_DB, job_id: int = None, text: st
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def save_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
|
||||||
|
text: str = "", gap_report: str = "") -> None:
|
||||||
|
"""Persist ATS-optimized resume text and/or gap report for a job."""
|
||||||
|
if job_id is None:
|
||||||
|
return
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE jobs SET optimized_resume = ?, ats_gap_report = ? WHERE id = ?",
|
||||||
|
(text or None, gap_report or None, job_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
|
||||||
|
"""Return optimized_resume and ats_gap_report for a job, or empty strings if absent."""
|
||||||
|
if job_id is None:
|
||||||
|
return {"optimized_resume": "", "ats_gap_report": ""}
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT optimized_resume, ats_gap_report FROM jobs WHERE id = ?", (job_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if not row:
|
||||||
|
return {"optimized_resume": "", "ats_gap_report": ""}
|
||||||
|
return {
|
||||||
|
"optimized_resume": row["optimized_resume"] or "",
|
||||||
|
"ats_gap_report": row["ats_gap_report"] or "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def save_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None,
|
||||||
|
draft_json: str = "") -> None:
|
||||||
|
"""Persist a structured resume review draft (awaiting user approval)."""
|
||||||
|
if job_id is None:
|
||||||
|
return
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE jobs SET resume_draft_json = ? WHERE id = ?",
|
||||||
|
(draft_json or None, job_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict | None:
|
||||||
|
"""Return the pending review draft, or None if no draft is waiting."""
|
||||||
|
if job_id is None:
|
||||||
|
return None
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT resume_draft_json FROM jobs WHERE id = ?", (job_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if not row or not row["resume_draft_json"]:
|
||||||
|
return None
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
return json.loads(row["resume_draft_json"])
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def finalize_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
|
||||||
|
final_text: str = "") -> None:
|
||||||
|
"""Save approved resume text, archive the previous version, and clear draft."""
|
||||||
|
if job_id is None:
|
||||||
|
return
|
||||||
|
import json
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT optimized_resume, resume_archive_json FROM jobs WHERE id = ?", (job_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.row_factory = None
|
||||||
|
|
||||||
|
# Archive current finalized version if present
|
||||||
|
archive: list = []
|
||||||
|
if row:
|
||||||
|
if row["resume_archive_json"]:
|
||||||
|
try:
|
||||||
|
archive = json.loads(row["resume_archive_json"])
|
||||||
|
except Exception:
|
||||||
|
archive = []
|
||||||
|
if row["optimized_resume"]:
|
||||||
|
from datetime import datetime
|
||||||
|
archive.append({
|
||||||
|
"archived_at": datetime.now().isoformat()[:16],
|
||||||
|
"text": row["optimized_resume"],
|
||||||
|
})
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE jobs SET optimized_resume = ?, resume_draft_json = NULL, "
|
||||||
|
"resume_archive_json = ? WHERE id = ?",
|
||||||
|
(final_text, json.dumps(archive), job_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_resume_archive(db_path: Path = DEFAULT_DB, job_id: int = None) -> list:
|
||||||
|
"""Return list of past finalized resume versions (newest archived first)."""
|
||||||
|
if job_id is None:
|
||||||
|
return []
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT resume_archive_json FROM jobs WHERE id = ?", (job_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if not row or not row["resume_archive_json"]:
|
||||||
|
return []
|
||||||
|
import json
|
||||||
|
try:
|
||||||
|
entries = json.loads(row["resume_archive_json"])
|
||||||
|
return list(reversed(entries)) # newest first
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
_UPDATABLE_JOB_COLS = {
|
_UPDATABLE_JOB_COLS = {
|
||||||
"title", "company", "url", "source", "location", "is_remote",
|
"title", "company", "url", "source", "location", "is_remote",
|
||||||
"salary", "description", "match_score", "keyword_gaps",
|
"salary", "description", "match_score", "keyword_gaps",
|
||||||
|
|
@ -355,6 +507,19 @@ def mark_applied(db_path: Path = DEFAULT_DB, ids: list[int] = None) -> None:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def cancel_task(db_path: Path = DEFAULT_DB, task_id: int = 0) -> bool:
|
||||||
|
"""Cancel a single queued/running task by id. Returns True if a row was updated."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
count = conn.execute(
|
||||||
|
"UPDATE background_tasks SET status='failed', error='Cancelled by user',"
|
||||||
|
" finished_at=datetime('now') WHERE id=? AND status IN ('queued','running')",
|
||||||
|
(task_id,),
|
||||||
|
).rowcount
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return count > 0
|
||||||
|
|
||||||
|
|
||||||
def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int:
|
def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int:
|
||||||
"""Mark all queued/running background tasks as failed. Returns count killed."""
|
"""Mark all queued/running background tasks as failed. Returns count killed."""
|
||||||
conn = sqlite3.connect(db_path)
|
conn = sqlite3.connect(db_path)
|
||||||
|
|
@ -790,3 +955,190 @@ def get_task_for_job(db_path: Path = DEFAULT_DB, task_type: str = "",
|
||||||
).fetchone()
|
).fetchone()
|
||||||
conn.close()
|
conn.close()
|
||||||
return dict(row) if row else None
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Resume library helpers ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _resume_as_dict(row) -> dict:
|
||||||
|
"""Convert a sqlite3.Row from the resumes table to a plain dict."""
|
||||||
|
return {
|
||||||
|
"id": row["id"],
|
||||||
|
"name": row["name"],
|
||||||
|
"source": row["source"],
|
||||||
|
"job_id": row["job_id"],
|
||||||
|
"text": row["text"],
|
||||||
|
"struct_json": row["struct_json"],
|
||||||
|
"word_count": row["word_count"],
|
||||||
|
"is_default": row["is_default"],
|
||||||
|
"created_at": row["created_at"],
|
||||||
|
"updated_at": row["updated_at"],
|
||||||
|
"synced_at": row["synced_at"] if "synced_at" in row.keys() else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def create_resume(
|
||||||
|
db_path: Path = DEFAULT_DB,
|
||||||
|
name: str = "",
|
||||||
|
text: str = "",
|
||||||
|
source: str = "manual",
|
||||||
|
job_id: int | None = None,
|
||||||
|
struct_json: str | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Insert a new resume into the library. Returns the created row as a dict."""
|
||||||
|
word_count = len(text.split()) if text else 0
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
cur = conn.execute(
|
||||||
|
"""INSERT INTO resumes (name, source, job_id, text, struct_json, word_count)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?)""",
|
||||||
|
(name, source, job_id, text, struct_json, word_count),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
row = conn.execute("SELECT * FROM resumes WHERE id=?", (cur.lastrowid,)).fetchone()
|
||||||
|
return _resume_as_dict(row)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def list_resumes(db_path: Path = DEFAULT_DB) -> list[dict]:
|
||||||
|
"""Return all resumes ordered by default-first then newest-first."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM resumes ORDER BY is_default DESC, created_at DESC"
|
||||||
|
).fetchall()
|
||||||
|
return [_resume_as_dict(r) for r in rows]
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> dict | None:
|
||||||
|
"""Return a single resume by id, or None if not found."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
|
||||||
|
return _resume_as_dict(row) if row else None
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def update_resume(
|
||||||
|
db_path: Path = DEFAULT_DB,
|
||||||
|
resume_id: int = 0,
|
||||||
|
name: str | None = None,
|
||||||
|
text: str | None = None,
|
||||||
|
) -> dict | None:
|
||||||
|
"""Update name and/or text of a resume. Returns updated row or None."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
if name is not None:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE resumes SET name=?, updated_at=datetime('now') WHERE id=?",
|
||||||
|
(name, resume_id),
|
||||||
|
)
|
||||||
|
if text is not None:
|
||||||
|
word_count = len(text.split())
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE resumes SET text=?, word_count=?, updated_at=datetime('now') WHERE id=?",
|
||||||
|
(text, word_count, resume_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
|
||||||
|
return _resume_as_dict(row) if row else None
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def delete_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
|
||||||
|
"""Delete a resume by id."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
conn.execute("DELETE FROM resumes WHERE id=?", (resume_id,))
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def set_default_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
|
||||||
|
"""Set one resume as default, clearing the flag on all others."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
conn.execute("UPDATE resumes SET is_default=0")
|
||||||
|
conn.execute("UPDATE resumes SET is_default=1 WHERE id=?", (resume_id,))
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def update_resume_synced_at(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
|
||||||
|
"""Mark a library entry as synced to the profile (library→profile direction)."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"UPDATE resumes SET synced_at=datetime('now') WHERE id=?",
|
||||||
|
(resume_id,),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def update_resume_content(
|
||||||
|
db_path: Path = DEFAULT_DB,
|
||||||
|
resume_id: int = 0,
|
||||||
|
text: str = "",
|
||||||
|
struct_json: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Update text, struct_json, and synced_at for a library entry.
|
||||||
|
|
||||||
|
Called by the profile→library sync path (PUT /api/settings/resume).
|
||||||
|
"""
|
||||||
|
word_count = len(text.split()) if text else 0
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
conn.execute(
|
||||||
|
"""UPDATE resumes
|
||||||
|
SET text=?, struct_json=?, word_count=?,
|
||||||
|
synced_at=datetime('now'), updated_at=datetime('now')
|
||||||
|
WHERE id=?""",
|
||||||
|
(text, struct_json, word_count, resume_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0) -> dict | None:
|
||||||
|
"""Return the resume for a job: job-specific first, then default, then None."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"""SELECT r.* FROM resumes r
|
||||||
|
JOIN jobs j ON j.resume_id = r.id
|
||||||
|
WHERE j.id=?""",
|
||||||
|
(job_id,),
|
||||||
|
).fetchone()
|
||||||
|
if row:
|
||||||
|
return _resume_as_dict(row)
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM resumes WHERE is_default=1 LIMIT 1"
|
||||||
|
).fetchone()
|
||||||
|
return _resume_as_dict(row) if row else None
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def set_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0, resume_id: int = 0) -> None:
|
||||||
|
"""Attach a specific resume to a job (overrides default for that job)."""
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
conn.execute("UPDATE jobs SET resume_id=? WHERE id=?", (resume_id, job_id))
|
||||||
|
conn.commit()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
|
||||||
122
scripts/db_migrate.py
Normal file
122
scripts/db_migrate.py
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
"""
|
||||||
|
db_migrate.py — Rails-style numbered SQL migration runner for Peregrine user DBs.
|
||||||
|
|
||||||
|
Migration files live in migrations/ (sibling to this script's parent directory),
|
||||||
|
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
|
||||||
|
order and tracked in the schema_migrations table so each runs exactly once.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from scripts.db_migrate import migrate_db
|
||||||
|
migrate_db(Path("/path/to/user.db"))
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Resolved at import time: peregrine repo root / migrations/
|
||||||
|
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
|
||||||
|
|
||||||
|
_CREATE_MIGRATIONS_TABLE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
version TEXT PRIMARY KEY,
|
||||||
|
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_db(db_path: Path) -> list[str]:
|
||||||
|
"""Apply any pending migrations to db_path. Returns list of applied versions."""
|
||||||
|
applied: list[str] = []
|
||||||
|
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
con.execute(_CREATE_MIGRATIONS_TABLE)
|
||||||
|
con.commit()
|
||||||
|
|
||||||
|
if not _MIGRATIONS_DIR.is_dir():
|
||||||
|
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
|
||||||
|
return applied
|
||||||
|
|
||||||
|
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
|
||||||
|
if not migration_files:
|
||||||
|
return applied
|
||||||
|
|
||||||
|
already_applied = {
|
||||||
|
row[0] for row in con.execute("SELECT version FROM schema_migrations")
|
||||||
|
}
|
||||||
|
|
||||||
|
for path in migration_files:
|
||||||
|
version = path.stem # e.g. "001_baseline"
|
||||||
|
if version in already_applied:
|
||||||
|
continue
|
||||||
|
|
||||||
|
sql = path.read_text(encoding="utf-8")
|
||||||
|
log.info("Applying migration %s to %s", version, db_path.name)
|
||||||
|
try:
|
||||||
|
# Execute statements individually so that ALTER TABLE ADD COLUMN
|
||||||
|
# errors caused by already-existing columns (pre-migration DBs
|
||||||
|
# created from a newer schema) are treated as no-ops rather than
|
||||||
|
# fatal failures.
|
||||||
|
statements = [s.strip() for s in sql.split(";") if s.strip()]
|
||||||
|
for stmt in statements:
|
||||||
|
# Strip leading SQL comment lines (-- ...) before processing.
|
||||||
|
# Checking startswith("--") on the raw chunk would skip entire
|
||||||
|
# multi-line statements whose first line is a comment.
|
||||||
|
stripped_lines = [
|
||||||
|
ln for ln in stmt.splitlines()
|
||||||
|
if not ln.strip().startswith("--")
|
||||||
|
]
|
||||||
|
stmt = "\n".join(stripped_lines).strip()
|
||||||
|
if not stmt:
|
||||||
|
continue
|
||||||
|
# Pre-check: if this is ADD COLUMN and the column already exists, skip.
|
||||||
|
# This guards against schema_migrations being ahead of the actual schema
|
||||||
|
# (e.g. DB reset after migrations were recorded).
|
||||||
|
stmt_upper = stmt.upper()
|
||||||
|
if "ALTER TABLE" in stmt_upper and "ADD COLUMN" in stmt_upper:
|
||||||
|
# Extract table name and column name from the statement
|
||||||
|
import re as _re
|
||||||
|
m = _re.match(
|
||||||
|
r"ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)",
|
||||||
|
stmt, _re.IGNORECASE
|
||||||
|
)
|
||||||
|
if m:
|
||||||
|
tbl, col = m.group(1), m.group(2)
|
||||||
|
existing = {
|
||||||
|
row[1]
|
||||||
|
for row in con.execute(f"PRAGMA table_info({tbl})")
|
||||||
|
}
|
||||||
|
if col in existing:
|
||||||
|
log.info(
|
||||||
|
"Migration %s: column %s.%s already exists, skipping",
|
||||||
|
version, tbl, col,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
con.execute(stmt)
|
||||||
|
except sqlite3.OperationalError as stmt_exc:
|
||||||
|
msg = str(stmt_exc).lower()
|
||||||
|
if "duplicate column name" in msg or "already exists" in msg:
|
||||||
|
log.info(
|
||||||
|
"Migration %s: statement already applied, skipping: %s",
|
||||||
|
version, stmt_exc,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
con.execute(
|
||||||
|
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
|
||||||
|
)
|
||||||
|
con.commit()
|
||||||
|
applied.append(version)
|
||||||
|
log.info("Migration %s applied successfully", version)
|
||||||
|
except Exception as exc:
|
||||||
|
con.rollback()
|
||||||
|
log.error("Migration %s failed: %s", version, exc)
|
||||||
|
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
|
||||||
|
finally:
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
return applied
|
||||||
|
|
@ -34,17 +34,48 @@ CUSTOM_SCRAPERS: dict[str, object] = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def load_config() -> tuple[dict, dict]:
|
def _normalize_profiles(raw: dict) -> dict:
|
||||||
profiles = yaml.safe_load(PROFILES_CFG.read_text())
|
"""Normalize search_profiles.yaml to the canonical {profiles: [...]} format.
|
||||||
notion_cfg = yaml.safe_load(NOTION_CFG.read_text())
|
|
||||||
|
The onboarding wizard (pre-fix) wrote a flat `default: {...}` structure.
|
||||||
|
Canonical format is `profiles: [{name, titles/job_titles, boards, ...}]`.
|
||||||
|
This converts on load so both formats work without a migration.
|
||||||
|
"""
|
||||||
|
if "profiles" in raw:
|
||||||
|
return raw
|
||||||
|
# Wizard-written format: top-level keys are profile names (usually "default")
|
||||||
|
profiles = []
|
||||||
|
for name, body in raw.items():
|
||||||
|
if not isinstance(body, dict):
|
||||||
|
continue
|
||||||
|
# job_boards: [{name, enabled}] → boards: [name] (enabled only)
|
||||||
|
job_boards = body.pop("job_boards", None)
|
||||||
|
if job_boards and "boards" not in body:
|
||||||
|
body["boards"] = [b["name"] for b in job_boards if b.get("enabled", True)]
|
||||||
|
# blocklist_* keys live in load_blocklist, not per-profile — drop them
|
||||||
|
body.pop("blocklist_companies", None)
|
||||||
|
body.pop("blocklist_industries", None)
|
||||||
|
body.pop("blocklist_locations", None)
|
||||||
|
profiles.append({"name": name, **body})
|
||||||
|
return {"profiles": profiles}
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
|
||||||
|
cfg = config_dir or CONFIG_DIR
|
||||||
|
profiles_path = cfg / "search_profiles.yaml"
|
||||||
|
notion_path = cfg / "notion.yaml"
|
||||||
|
raw = yaml.safe_load(profiles_path.read_text()) or {}
|
||||||
|
profiles = _normalize_profiles(raw)
|
||||||
|
notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
|
||||||
return profiles, notion_cfg
|
return profiles, notion_cfg
|
||||||
|
|
||||||
|
|
||||||
def load_blocklist() -> dict:
|
def load_blocklist(config_dir: Path | None = None) -> dict:
|
||||||
"""Load global blocklist config. Returns dict with companies, industries, locations lists."""
|
"""Load global blocklist config. Returns dict with companies, industries, locations lists."""
|
||||||
if not BLOCKLIST_CFG.exists():
|
blocklist_path = (config_dir or CONFIG_DIR) / "blocklist.yaml"
|
||||||
|
if not blocklist_path.exists():
|
||||||
return {"companies": [], "industries": [], "locations": []}
|
return {"companies": [], "industries": [], "locations": []}
|
||||||
raw = yaml.safe_load(BLOCKLIST_CFG.read_text()) or {}
|
raw = yaml.safe_load(blocklist_path.read_text()) or {}
|
||||||
return {
|
return {
|
||||||
"companies": [c.lower() for c in raw.get("companies", []) if c],
|
"companies": [c.lower() for c in raw.get("companies", []) if c],
|
||||||
"industries": [i.lower() for i in raw.get("industries", []) if i],
|
"industries": [i.lower() for i in raw.get("industries", []) if i],
|
||||||
|
|
@ -117,10 +148,15 @@ def push_to_notion(notion: Client, db_id: str, job: dict, fm: dict) -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None:
|
def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_dir: Path | None = None) -> None:
|
||||||
profiles_cfg, notion_cfg = load_config()
|
# In cloud mode, config_dir is the per-user config directory derived from db_path.
|
||||||
fm = notion_cfg["field_map"]
|
# Falls back to the app-level /app/config for single-tenant deployments.
|
||||||
blocklist = load_blocklist()
|
resolved_cfg = config_dir or Path(db_path).parent / "config"
|
||||||
|
if not resolved_cfg.exists():
|
||||||
|
resolved_cfg = CONFIG_DIR
|
||||||
|
profiles_cfg, notion_cfg = load_config(resolved_cfg)
|
||||||
|
fm = notion_cfg.get("field_map") or {}
|
||||||
|
blocklist = load_blocklist(resolved_cfg)
|
||||||
|
|
||||||
_bl_summary = {k: len(v) for k, v in blocklist.items() if v}
|
_bl_summary = {k: len(v) for k, v in blocklist.items() if v}
|
||||||
if _bl_summary:
|
if _bl_summary:
|
||||||
|
|
@ -196,20 +232,59 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
|
||||||
exclude_kw = [kw.lower() for kw in profile.get("exclude_keywords", [])]
|
exclude_kw = [kw.lower() for kw in profile.get("exclude_keywords", [])]
|
||||||
results_per_board = profile.get("results_per_board", 25)
|
results_per_board = profile.get("results_per_board", 25)
|
||||||
|
|
||||||
|
# Map remote_preference → JobSpy is_remote param:
|
||||||
|
# 'remote' → True (remote-only listings)
|
||||||
|
# 'onsite' → False (on-site-only listings)
|
||||||
|
# 'both' → None (no filter — JobSpy default)
|
||||||
|
_rp = profile.get("remote_preference", "both")
|
||||||
|
_is_remote: bool | None = True if _rp == "remote" else (False if _rp == "onsite" else None)
|
||||||
|
|
||||||
|
# When filtering for remote-only, also drop hybrid roles at the description level.
|
||||||
|
# Job boards (especially LinkedIn) tag hybrid listings as is_remote=True, so the
|
||||||
|
# board-side filter alone is not reliable. We match specific work-arrangement
|
||||||
|
# phrases to avoid false positives like "hybrid cloud" or "hybrid architecture".
|
||||||
|
_HYBRID_PHRASES = [
|
||||||
|
"hybrid role", "hybrid position", "hybrid work", "hybrid schedule",
|
||||||
|
"hybrid model", "hybrid arrangement", "hybrid opportunity",
|
||||||
|
"in-office/remote", "in office/remote", "remote/in-office",
|
||||||
|
"remote/office", "office/remote",
|
||||||
|
"days in office", "days per week in", "days onsite", "days on-site",
|
||||||
|
"required to be in office", "required in office",
|
||||||
|
]
|
||||||
|
if _rp == "remote":
|
||||||
|
exclude_kw = exclude_kw + _HYBRID_PHRASES
|
||||||
|
|
||||||
for location in profile["locations"]:
|
for location in profile["locations"]:
|
||||||
|
|
||||||
# ── JobSpy boards ──────────────────────────────────────────────────
|
# ── JobSpy boards ──────────────────────────────────────────────────
|
||||||
if boards:
|
if boards:
|
||||||
print(f" [jobspy] {location} — boards: {', '.join(boards)}")
|
# Validate boards against the installed JobSpy Site enum.
|
||||||
|
# One unsupported name in the list aborts the entire scrape_jobs() call.
|
||||||
try:
|
try:
|
||||||
jobs: pd.DataFrame = scrape_jobs(
|
from jobspy import Site as _Site
|
||||||
site_name=boards,
|
_valid = {s.value for s in _Site}
|
||||||
search_term=" OR ".join(f'"{t}"' for t in profile["titles"]),
|
_filtered = [b for b in boards if b in _valid]
|
||||||
|
_dropped = [b for b in boards if b not in _valid]
|
||||||
|
if _dropped:
|
||||||
|
print(f" [jobspy] Skipping unsupported boards: {', '.join(_dropped)}")
|
||||||
|
except ImportError:
|
||||||
|
_filtered = boards # fallback: pass through unchanged
|
||||||
|
if not _filtered:
|
||||||
|
print(f" [jobspy] No valid boards for {location} — skipping")
|
||||||
|
continue
|
||||||
|
print(f" [jobspy] {location} — boards: {', '.join(_filtered)}")
|
||||||
|
try:
|
||||||
|
jobspy_kwargs: dict = dict(
|
||||||
|
site_name=_filtered,
|
||||||
|
search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
|
||||||
location=location,
|
location=location,
|
||||||
results_wanted=results_per_board,
|
results_wanted=results_per_board,
|
||||||
hours_old=profile.get("hours_old", 72),
|
hours_old=profile.get("hours_old", 72),
|
||||||
linkedin_fetch_description=True,
|
linkedin_fetch_description=True,
|
||||||
)
|
)
|
||||||
|
if _is_remote is not None:
|
||||||
|
jobspy_kwargs["is_remote"] = _is_remote
|
||||||
|
jobs: pd.DataFrame = scrape_jobs(**jobspy_kwargs)
|
||||||
print(f" [jobspy] {len(jobs)} raw results")
|
print(f" [jobspy] {len(jobs)} raw results")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(f" [jobspy] ERROR: {exc}")
|
print(f" [jobspy] ERROR: {exc}")
|
||||||
|
|
@ -232,6 +307,10 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
|
||||||
elif job_dict.get("salary_source") and str(job_dict["salary_source"]) not in ("nan", "None", ""):
|
elif job_dict.get("salary_source") and str(job_dict["salary_source"]) not in ("nan", "None", ""):
|
||||||
salary_str = str(job_dict["salary_source"])
|
salary_str = str(job_dict["salary_source"])
|
||||||
|
|
||||||
|
_dp = job_dict.get("date_posted")
|
||||||
|
date_posted_str = (
|
||||||
|
_dp.isoformat() if hasattr(_dp, "isoformat") else str(_dp)
|
||||||
|
) if _dp and str(_dp) not in ("nan", "None", "") else ""
|
||||||
row = {
|
row = {
|
||||||
"url": url,
|
"url": url,
|
||||||
"title": _s(job_dict.get("title")),
|
"title": _s(job_dict.get("title")),
|
||||||
|
|
@ -241,6 +320,7 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False) -> None
|
||||||
"is_remote": bool(job_dict.get("is_remote", False)),
|
"is_remote": bool(job_dict.get("is_remote", False)),
|
||||||
"salary": salary_str,
|
"salary": salary_str,
|
||||||
"description": _s(job_dict.get("description")),
|
"description": _s(job_dict.get("description")),
|
||||||
|
"date_posted": date_posted_str,
|
||||||
"_exclude_kw": exclude_kw,
|
"_exclude_kw": exclude_kw,
|
||||||
}
|
}
|
||||||
if _insert_if_new(row, _s(job_dict.get("site"))):
|
if _insert_if_new(row, _s(job_dict.get("site"))):
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,8 @@ import re
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
from scripts.user_profile import UserProfile
|
from scripts.user_profile import UserProfile
|
||||||
|
|
@ -26,130 +28,89 @@ LETTERS_DIR = _profile.docs_dir if _profile else Path.home() / "Documents" / "Jo
|
||||||
LETTER_GLOB = "*Cover Letter*.md"
|
LETTER_GLOB = "*Cover Letter*.md"
|
||||||
|
|
||||||
# Background injected into every prompt so the model has the candidate's facts
|
# Background injected into every prompt so the model has the candidate's facts
|
||||||
def _build_system_context() -> str:
|
def _build_system_context(profile=None) -> str:
|
||||||
if not _profile:
|
p = profile or _profile
|
||||||
|
if not p:
|
||||||
return "You are a professional cover letter writer. Write in first person."
|
return "You are a professional cover letter writer. Write in first person."
|
||||||
parts = [f"You are writing cover letters for {_profile.name}. {_profile.career_summary}"]
|
parts = [f"You are writing cover letters for {p.name}. {p.career_summary}"]
|
||||||
if _profile.candidate_voice:
|
if p.candidate_voice:
|
||||||
parts.append(
|
parts.append(
|
||||||
f"Voice and personality: {_profile.candidate_voice} "
|
f"Voice and personality: {p.candidate_voice} "
|
||||||
"Write in a way that reflects these authentic traits — not as a checklist, "
|
"Write in a way that reflects these authentic traits — not as a checklist, "
|
||||||
"but as a natural expression of who this person is."
|
"but as a natural expression of who this person is."
|
||||||
)
|
)
|
||||||
return " ".join(parts)
|
return " ".join(parts)
|
||||||
|
|
||||||
SYSTEM_CONTEXT = _build_system_context()
|
SYSTEM_CONTEXT = _build_system_context()
|
||||||
|
_candidate = _profile.name if _profile else "the candidate"
|
||||||
|
|
||||||
|
|
||||||
# ── Mission-alignment detection ───────────────────────────────────────────────
|
# ── Mission-alignment detection ───────────────────────────────────────────────
|
||||||
# When a company/JD signals one of these preferred industries, the cover letter
|
# Domains and their keyword signals are loaded from config/mission_domains.yaml.
|
||||||
# prompt injects a hint so Para 3 can reflect genuine personal connection.
|
|
||||||
# This does NOT disclose any personal disability or family information.
|
# This does NOT disclose any personal disability or family information.
|
||||||
|
|
||||||
|
_MISSION_DOMAINS_PATH = Path(__file__).parent.parent / "config" / "mission_domains.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
def load_mission_domains(path: Path | None = None) -> dict[str, dict]:
|
||||||
|
"""Load mission domain config from YAML. Returns dict keyed by domain name."""
|
||||||
|
p = path or _MISSION_DOMAINS_PATH
|
||||||
|
if not p.exists():
|
||||||
|
return {}
|
||||||
|
with p.open(encoding="utf-8") as fh:
|
||||||
|
data = yaml.safe_load(fh)
|
||||||
|
return data.get("domains", {}) if data else {}
|
||||||
|
|
||||||
|
|
||||||
|
_MISSION_DOMAINS: dict[str, dict] = load_mission_domains()
|
||||||
_MISSION_SIGNALS: dict[str, list[str]] = {
|
_MISSION_SIGNALS: dict[str, list[str]] = {
|
||||||
"music": [
|
domain: cfg.get("signals", []) for domain, cfg in _MISSION_DOMAINS.items()
|
||||||
"music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music",
|
|
||||||
"distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl",
|
|
||||||
"streaming", "artist", "label", "live nation", "ticketmaster", "aeg",
|
|
||||||
"songkick", "concert", "venue", "festival", "audio", "podcast",
|
|
||||||
"studio", "record", "musician", "playlist",
|
|
||||||
],
|
|
||||||
"animal_welfare": [
|
|
||||||
"animal", "shelter", "rescue", "humane society", "spca", "aspca",
|
|
||||||
"veterinary", "vet ", "wildlife", "pet ", "adoption", "foster",
|
|
||||||
"dog", "cat", "feline", "canine", "sanctuary", "zoo",
|
|
||||||
],
|
|
||||||
"education": [
|
|
||||||
"education", "school", "learning", "student", "edtech", "classroom",
|
|
||||||
"curriculum", "tutoring", "academic", "university", "kids", "children",
|
|
||||||
"youth", "literacy", "khan academy", "duolingo", "chegg", "coursera",
|
|
||||||
"instructure", "canvas lms", "clever", "district", "teacher",
|
|
||||||
"k-12", "k12", "grade", "pedagogy",
|
|
||||||
],
|
|
||||||
"social_impact": [
|
|
||||||
"nonprofit", "non-profit", "501(c)", "social impact", "mission-driven",
|
|
||||||
"public benefit", "community", "underserved", "equity", "justice",
|
|
||||||
"humanitarian", "advocacy", "charity", "foundation", "ngo",
|
|
||||||
"social good", "civic", "public health", "mental health", "food security",
|
|
||||||
"housing", "homelessness", "poverty", "workforce development",
|
|
||||||
],
|
|
||||||
# Health is listed last — it's a genuine but lower-priority connection than
|
|
||||||
# music/animals/education/social_impact. detect_mission_alignment returns on first
|
|
||||||
# match, so dict order = preference order.
|
|
||||||
"health": [
|
|
||||||
"patient", "patients", "healthcare", "health tech", "healthtech",
|
|
||||||
"pharma", "pharmaceutical", "clinical", "medical",
|
|
||||||
"hospital", "clinic", "therapy", "therapist",
|
|
||||||
"rare disease", "life sciences", "life science",
|
|
||||||
"treatment", "prescription", "biotech", "biopharma", "medtech",
|
|
||||||
"behavioral health", "population health",
|
|
||||||
"care management", "care coordination", "oncology", "specialty pharmacy",
|
|
||||||
"provider network", "payer", "health plan", "benefits administration",
|
|
||||||
"ehr", "emr", "fhir", "hipaa",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
_candidate = _profile.name if _profile else "the candidate"
|
|
||||||
|
|
||||||
_MISSION_DEFAULTS: dict[str, str] = {
|
|
||||||
"music": (
|
|
||||||
f"This company is in the music industry — an industry {_candidate} finds genuinely "
|
|
||||||
"compelling. Para 3 should warmly and specifically reflect this authentic alignment, "
|
|
||||||
"not as a generic fan statement, but as an honest statement of where they'd love to "
|
|
||||||
"apply their skills."
|
|
||||||
),
|
|
||||||
"animal_welfare": (
|
|
||||||
f"This organization works in animal welfare/rescue — a mission {_candidate} finds "
|
|
||||||
"genuinely meaningful. Para 3 should reflect this authentic connection warmly and "
|
|
||||||
"specifically, tying their skills to this mission."
|
|
||||||
),
|
|
||||||
"education": (
|
|
||||||
f"This company works in education or EdTech — a domain that resonates with "
|
|
||||||
f"{_candidate}'s values. Para 3 should reflect this authentic connection specifically "
|
|
||||||
"and warmly."
|
|
||||||
),
|
|
||||||
"social_impact": (
|
|
||||||
f"This organization is mission-driven / social impact focused — exactly the kind of "
|
|
||||||
f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine "
|
|
||||||
"desire to apply their skills to work that makes a real difference in people's lives."
|
|
||||||
),
|
|
||||||
"health": (
|
|
||||||
f"This company works in healthcare, life sciences, or patient care. "
|
|
||||||
f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an "
|
|
||||||
"industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies "
|
|
||||||
"exist to serve — those navigating complex, often invisible, or unusual health journeys; "
|
|
||||||
"patients facing rare or poorly understood conditions; individuals whose situations don't "
|
|
||||||
"fit a clean category. The connection is to the humans behind the data, not the industry. "
|
|
||||||
"If the user has provided a personal note, use that to anchor Para 3 specifically."
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _build_mission_notes() -> dict[str, str]:
|
def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]:
|
||||||
"""Merge user's custom mission notes with generic defaults."""
|
"""Merge user's custom mission notes with YAML defaults.
|
||||||
prefs = _profile.mission_preferences if _profile else {}
|
|
||||||
notes = {}
|
For domains defined in mission_domains.yaml the default_note is used when
|
||||||
for industry, default_note in _MISSION_DEFAULTS.items():
|
the user has not provided a custom note in user.yaml mission_preferences.
|
||||||
custom = (prefs.get(industry) or "").strip()
|
|
||||||
|
For user-defined domains (keys in mission_preferences that are NOT in the
|
||||||
|
YAML config), the custom note is used as-is; no signal detection applies.
|
||||||
|
"""
|
||||||
|
p = profile or _profile
|
||||||
|
name = candidate_name or (p.name if p else "the candidate")
|
||||||
|
prefs = p.mission_preferences if p else {}
|
||||||
|
notes: dict[str, str] = {}
|
||||||
|
|
||||||
|
for domain, cfg in _MISSION_DOMAINS.items():
|
||||||
|
default_note = (cfg.get("default_note") or "").strip()
|
||||||
|
custom = (prefs.get(domain) or "").strip()
|
||||||
if custom:
|
if custom:
|
||||||
notes[industry] = (
|
notes[domain] = (
|
||||||
f"Mission alignment — {_candidate} shared: \"{custom}\". "
|
f"Mission alignment — {name} shared: \"{custom}\". "
|
||||||
"Para 3 should warmly and specifically reflect this authentic connection."
|
"Para 3 should warmly and specifically reflect this authentic connection."
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
notes[industry] = default_note
|
notes[domain] = default_note
|
||||||
|
|
||||||
return notes
|
return notes
|
||||||
|
|
||||||
|
|
||||||
_MISSION_NOTES = _build_mission_notes()
|
_MISSION_NOTES = _build_mission_notes()
|
||||||
|
|
||||||
|
|
||||||
def detect_mission_alignment(company: str, description: str) -> str | None:
|
def detect_mission_alignment(
|
||||||
"""Return a mission hint string if company/JD matches a preferred industry, else None."""
|
company: str, description: str, mission_notes: dict | None = None
|
||||||
|
) -> str | None:
|
||||||
|
"""Return a mission hint string if company/JD matches a configured domain, else None.
|
||||||
|
|
||||||
|
Checks domains in YAML file order (dict order = match priority).
|
||||||
|
"""
|
||||||
|
notes = mission_notes if mission_notes is not None else _MISSION_NOTES
|
||||||
text = f"{company} {description}".lower()
|
text = f"{company} {description}".lower()
|
||||||
for industry, signals in _MISSION_SIGNALS.items():
|
for domain, signals in _MISSION_SIGNALS.items():
|
||||||
if any(sig in text for sig in signals):
|
if any(sig in text for sig in signals):
|
||||||
return _MISSION_NOTES[industry]
|
return notes.get(domain)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -190,10 +151,14 @@ def build_prompt(
|
||||||
examples: list[dict],
|
examples: list[dict],
|
||||||
mission_hint: str | None = None,
|
mission_hint: str | None = None,
|
||||||
is_jobgether: bool = False,
|
is_jobgether: bool = False,
|
||||||
|
system_context: str | None = None,
|
||||||
|
candidate_name: str | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
parts = [SYSTEM_CONTEXT.strip(), ""]
|
ctx = system_context if system_context is not None else SYSTEM_CONTEXT
|
||||||
|
name = candidate_name or _candidate
|
||||||
|
parts = [ctx.strip(), ""]
|
||||||
if examples:
|
if examples:
|
||||||
parts.append(f"=== STYLE EXAMPLES ({_candidate}'s past letters) ===\n")
|
parts.append(f"=== STYLE EXAMPLES ({name}'s past letters) ===\n")
|
||||||
for i, ex in enumerate(examples, 1):
|
for i, ex in enumerate(examples, 1):
|
||||||
parts.append(f"--- Example {i} ({ex['company']}) ---")
|
parts.append(f"--- Example {i} ({ex['company']}) ---")
|
||||||
parts.append(ex["text"])
|
parts.append(ex["text"])
|
||||||
|
|
@ -231,13 +196,14 @@ def build_prompt(
|
||||||
return "\n".join(parts)
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
def _trim_to_letter_end(text: str) -> str:
|
def _trim_to_letter_end(text: str, profile=None) -> str:
|
||||||
"""Remove repetitive hallucinated content after the first complete sign-off.
|
"""Remove repetitive hallucinated content after the first complete sign-off.
|
||||||
|
|
||||||
Fine-tuned models sometimes loop after completing the letter. This cuts at
|
Fine-tuned models sometimes loop after completing the letter. This cuts at
|
||||||
the first closing + candidate name so only the intended letter is saved.
|
the first closing + candidate name so only the intended letter is saved.
|
||||||
"""
|
"""
|
||||||
candidate_first = (_profile.name.split()[0] if _profile else "").strip()
|
p = profile or _profile
|
||||||
|
candidate_first = (p.name.split()[0] if p else "").strip()
|
||||||
pattern = (
|
pattern = (
|
||||||
r'(?:Warm regards|Sincerely|Best regards|Kind regards|Thank you)[,.]?\s*\n+\s*'
|
r'(?:Warm regards|Sincerely|Best regards|Kind regards|Thank you)[,.]?\s*\n+\s*'
|
||||||
+ (re.escape(candidate_first) if candidate_first else r'\w+(?:\s+\w+)?')
|
+ (re.escape(candidate_first) if candidate_first else r'\w+(?:\s+\w+)?')
|
||||||
|
|
@ -257,6 +223,8 @@ def generate(
|
||||||
feedback: str = "",
|
feedback: str = "",
|
||||||
is_jobgether: bool = False,
|
is_jobgether: bool = False,
|
||||||
_router=None,
|
_router=None,
|
||||||
|
config_path: "Path | None" = None,
|
||||||
|
user_yaml_path: "Path | None" = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Generate a cover letter and return it as a string.
|
"""Generate a cover letter and return it as a string.
|
||||||
|
|
||||||
|
|
@ -264,15 +232,29 @@ def generate(
|
||||||
and requested changes are appended to the prompt so the LLM revises rather
|
and requested changes are appended to the prompt so the LLM revises rather
|
||||||
than starting from scratch.
|
than starting from scratch.
|
||||||
|
|
||||||
|
user_yaml_path overrides the module-level profile — required in cloud mode
|
||||||
|
so each user's name/voice/mission prefs are used instead of the global default.
|
||||||
|
|
||||||
_router is an optional pre-built LLMRouter (used in tests to avoid real LLM calls).
|
_router is an optional pre-built LLMRouter (used in tests to avoid real LLM calls).
|
||||||
"""
|
"""
|
||||||
|
# Per-call profile override (cloud mode: each user has their own user.yaml)
|
||||||
|
if user_yaml_path and Path(user_yaml_path).exists():
|
||||||
|
_prof = UserProfile(Path(user_yaml_path))
|
||||||
|
else:
|
||||||
|
_prof = _profile
|
||||||
|
|
||||||
|
sys_ctx = _build_system_context(_prof)
|
||||||
|
mission_notes = _build_mission_notes(_prof, candidate_name=(_prof.name if _prof else None))
|
||||||
|
candidate_name = _prof.name if _prof else _candidate
|
||||||
|
|
||||||
corpus = load_corpus()
|
corpus = load_corpus()
|
||||||
examples = find_similar_letters(description or f"{title} {company}", corpus)
|
examples = find_similar_letters(description or f"{title} {company}", corpus)
|
||||||
mission_hint = detect_mission_alignment(company, description)
|
mission_hint = detect_mission_alignment(company, description, mission_notes=mission_notes)
|
||||||
if mission_hint:
|
if mission_hint:
|
||||||
print(f"[cover-letter] Mission alignment detected for {company}", file=sys.stderr)
|
print(f"[cover-letter] Mission alignment detected for {company}", file=sys.stderr)
|
||||||
prompt = build_prompt(title, company, description, examples,
|
prompt = build_prompt(title, company, description, examples,
|
||||||
mission_hint=mission_hint, is_jobgether=is_jobgether)
|
mission_hint=mission_hint, is_jobgether=is_jobgether,
|
||||||
|
system_context=sys_ctx, candidate_name=candidate_name)
|
||||||
|
|
||||||
if previous_result:
|
if previous_result:
|
||||||
prompt += f"\n\n---\nPrevious draft:\n{previous_result}"
|
prompt += f"\n\n---\nPrevious draft:\n{previous_result}"
|
||||||
|
|
@ -281,8 +263,9 @@ def generate(
|
||||||
|
|
||||||
if _router is None:
|
if _router is None:
|
||||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
from scripts.llm_router import LLMRouter
|
from scripts.llm_router import LLMRouter, CONFIG_PATH
|
||||||
_router = LLMRouter()
|
resolved = config_path if (config_path and Path(config_path).exists()) else CONFIG_PATH
|
||||||
|
_router = LLMRouter(resolved)
|
||||||
|
|
||||||
print(f"[cover-letter] Generating for: {title} @ {company}", file=sys.stderr)
|
print(f"[cover-letter] Generating for: {title} @ {company}", file=sys.stderr)
|
||||||
print(f"[cover-letter] Style examples: {[e['company'] for e in examples]}", file=sys.stderr)
|
print(f"[cover-letter] Style examples: {[e['company'] for e in examples]}", file=sys.stderr)
|
||||||
|
|
@ -292,7 +275,7 @@ def generate(
|
||||||
# max_tokens=1200 caps generation at ~900 words — enough for any cover letter
|
# max_tokens=1200 caps generation at ~900 words — enough for any cover letter
|
||||||
# and prevents fine-tuned models from looping into repetitive garbage output.
|
# and prevents fine-tuned models from looping into repetitive garbage output.
|
||||||
result = _router.complete(prompt, max_tokens=1200)
|
result = _router.complete(prompt, max_tokens=1200)
|
||||||
return _trim_to_letter_end(result)
|
return _trim_to_letter_end(result, _prof)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
|
|
||||||
|
|
@ -698,21 +698,43 @@ def _parse_message(conn: imaplib.IMAP4, uid: bytes) -> Optional[dict]:
|
||||||
return None
|
return None
|
||||||
msg = email.message_from_bytes(data[0][1])
|
msg = email.message_from_bytes(data[0][1])
|
||||||
|
|
||||||
body = ""
|
# Prefer text/html (preserves href attributes for digest link extraction);
|
||||||
|
# fall back to text/plain if no HTML part exists.
|
||||||
|
html_body = ""
|
||||||
|
plain_body = ""
|
||||||
if msg.is_multipart():
|
if msg.is_multipart():
|
||||||
for part in msg.walk():
|
for part in msg.walk():
|
||||||
if part.get_content_type() == "text/plain":
|
ct = part.get_content_type()
|
||||||
|
if ct == "text/html" and not html_body:
|
||||||
try:
|
try:
|
||||||
body = part.get_payload(decode=True).decode("utf-8", errors="replace")
|
html_body = part.get_payload(decode=True).decode("utf-8", errors="replace")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
elif ct == "text/plain" and not plain_body:
|
||||||
|
try:
|
||||||
|
plain_body = part.get_payload(decode=True).decode("utf-8", errors="replace")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
break
|
|
||||||
else:
|
else:
|
||||||
|
ct = msg.get_content_type()
|
||||||
try:
|
try:
|
||||||
body = msg.get_payload(decode=True).decode("utf-8", errors="replace")
|
raw = msg.get_payload(decode=True).decode("utf-8", errors="replace")
|
||||||
|
if ct == "text/html":
|
||||||
|
html_body = raw
|
||||||
|
else:
|
||||||
|
plain_body = raw
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
if html_body:
|
||||||
|
# Strip <head>…</head> (CSS, meta, title) and any stray <style> blocks.
|
||||||
|
# Keeps <body> HTML intact so href attributes survive for digest extraction.
|
||||||
|
body = re.sub(r"<head[\s\S]*?</head>", "", html_body, flags=re.I)
|
||||||
|
body = re.sub(r"<style[\s\S]*?</style>", "", body, flags=re.I)
|
||||||
|
body = re.sub(r"<script[\s\S]*?</script>", "", body, flags=re.I)
|
||||||
|
else:
|
||||||
|
body = plain_body
|
||||||
|
|
||||||
mid = msg.get("Message-ID", "").strip()
|
mid = msg.get("Message-ID", "").strip()
|
||||||
if not mid:
|
if not mid:
|
||||||
return None # No Message-ID → can't dedup; skip to avoid repeat inserts
|
return None # No Message-ID → can't dedup; skip to avoid repeat inserts
|
||||||
|
|
@ -723,7 +745,7 @@ def _parse_message(conn: imaplib.IMAP4, uid: bytes) -> Optional[dict]:
|
||||||
"from_addr": _decode_str(msg.get("From")),
|
"from_addr": _decode_str(msg.get("From")),
|
||||||
"to_addr": _decode_str(msg.get("To")),
|
"to_addr": _decode_str(msg.get("To")),
|
||||||
"date": _decode_str(msg.get("Date")),
|
"date": _decode_str(msg.get("Date")),
|
||||||
"body": body[:4000],
|
"body": body, # no truncation — digest emails need full content
|
||||||
}
|
}
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
313
scripts/job_ranker.py
Normal file
313
scripts/job_ranker.py
Normal file
|
|
@ -0,0 +1,313 @@
|
||||||
|
"""Job ranking engine — two-stage discovery → review pipeline.
|
||||||
|
|
||||||
|
Stage 1 (discover.py) scrapes a wide corpus and stores everything as 'pending'.
|
||||||
|
Stage 2 (this module) scores the corpus; GET /api/jobs/stack returns top-N best
|
||||||
|
matches for the user's current review session.
|
||||||
|
|
||||||
|
All signal functions return a float in [0, 1]. The final stack_score is 0–100.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from scripts.job_ranker import rank_jobs
|
||||||
|
ranked = rank_jobs(jobs, search_titles, salary_min, salary_max, user_level)
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
|
# ── TUNING ─────────────────────────────────────────────────────────────────────
|
||||||
|
# Adjust these constants to change how jobs are ranked.
|
||||||
|
# All individual signal scores are normalised to [0, 1] before weighting.
|
||||||
|
# Weights should sum to ≤ 1.0; the remainder is unallocated slack.
|
||||||
|
|
||||||
|
W_RESUME_MATCH = 0.40 # TF-IDF cosine similarity stored as match_score (0–100 → 0–1)
|
||||||
|
W_TITLE_MATCH = 0.30 # seniority-aware title + domain keyword overlap
|
||||||
|
W_RECENCY = 0.15 # freshness — exponential decay from date_found
|
||||||
|
W_SALARY_FIT = 0.10 # salary range overlap vs user target (neutral when unknown)
|
||||||
|
W_DESC_QUALITY = 0.05 # posting completeness — penalises stub / ghost posts
|
||||||
|
|
||||||
|
# Keyword gap penalty: each missing keyword from the resume match costs points.
|
||||||
|
# Gaps are already partially captured by W_RESUME_MATCH (same TF-IDF source),
|
||||||
|
# so this is a soft nudge, not a hard filter.
|
||||||
|
GAP_PENALTY_PER_KEYWORD: float = 0.5 # points off per gap keyword (0–100 scale)
|
||||||
|
GAP_MAX_PENALTY: float = 5.0 # hard cap so a gap-heavy job can still rank
|
||||||
|
|
||||||
|
# Recency half-life: score halves every N days past date_found
|
||||||
|
RECENCY_HALF_LIFE: int = 7 # days
|
||||||
|
|
||||||
|
# Description word-count thresholds
|
||||||
|
DESC_MIN_WORDS: int = 50 # below this → scaled penalty
|
||||||
|
DESC_TARGET_WORDS: int = 200 # at or above → full quality score
|
||||||
|
# ── END TUNING ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
# ── Seniority level map ────────────────────────────────────────────────────────
|
||||||
|
# (level, [keyword substrings that identify that level])
|
||||||
|
# Matched on " <lower_title> " with a space-padded check to avoid false hits.
|
||||||
|
# Level 3 is the default (mid-level, no seniority modifier in title).
|
||||||
|
_SENIORITY_MAP: list[tuple[int, list[str]]] = [
|
||||||
|
(1, ["intern", "internship", "trainee", "apprentice", "co-op", "coop"]),
|
||||||
|
(2, ["entry level", "entry-level", "junior", "jr ", "jr.", "associate "]),
|
||||||
|
(3, ["mid level", "mid-level", "intermediate"]),
|
||||||
|
(4, ["senior ", "senior,", "sr ", "sr.", " lead ", "lead,", " ii ", " iii ",
|
||||||
|
"specialist", "experienced"]),
|
||||||
|
(5, ["staff ", "principal ", "architect ", "expert ", "distinguished"]),
|
||||||
|
(6, ["director", "head of ", "manager ", "vice president", " vp "]),
|
||||||
|
(7, ["chief", "cto", "cio", "cpo", "president", "founder"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
# job_level − user_level → scoring multiplier
|
||||||
|
# Positive delta = job is more senior (stretch up = encouraged)
|
||||||
|
# Negative delta = job is below the user's level
|
||||||
|
_LEVEL_MULTIPLIER: dict[int, float] = {
|
||||||
|
-4: 0.05, -3: 0.10, -2: 0.25, -1: 0.65,
|
||||||
|
0: 1.00,
|
||||||
|
1: 0.90, 2: 0.65, 3: 0.25, 4: 0.05,
|
||||||
|
}
|
||||||
|
_DEFAULT_LEVEL_MULTIPLIER = 0.05
|
||||||
|
|
||||||
|
|
||||||
|
# ── Seniority helpers ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def infer_seniority(title: str) -> int:
|
||||||
|
"""Return seniority level 1–7 for a job or resume title. Defaults to 3."""
|
||||||
|
padded = f" {title.lower()} "
|
||||||
|
# Iterate highest → lowest so "Senior Lead" resolves to 4, not 6
|
||||||
|
for level, keywords in reversed(_SENIORITY_MAP):
|
||||||
|
for kw in keywords:
|
||||||
|
if kw in padded:
|
||||||
|
return level
|
||||||
|
return 3
|
||||||
|
|
||||||
|
|
||||||
|
def seniority_from_experience(titles: list[str]) -> int:
|
||||||
|
"""Estimate user's current level from their most recent experience titles.
|
||||||
|
|
||||||
|
Averages the levels of the top-3 most recent titles (first in the list).
|
||||||
|
Falls back to 3 (mid-level) if no titles are provided.
|
||||||
|
"""
|
||||||
|
if not titles:
|
||||||
|
return 3
|
||||||
|
sample = [t for t in titles if t.strip()][:3]
|
||||||
|
if not sample:
|
||||||
|
return 3
|
||||||
|
levels = [infer_seniority(t) for t in sample]
|
||||||
|
return round(sum(levels) / len(levels))
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_level_words(text: str) -> str:
|
||||||
|
"""Remove seniority/modifier words so domain keywords stand out."""
|
||||||
|
strip = {
|
||||||
|
"senior", "sr", "junior", "jr", "lead", "staff", "principal",
|
||||||
|
"associate", "entry", "mid", "intermediate", "experienced",
|
||||||
|
"director", "head", "manager", "architect", "chief", "intern",
|
||||||
|
"ii", "iii", "iv", "i",
|
||||||
|
}
|
||||||
|
return " ".join(w for w in text.lower().split() if w not in strip)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Signal functions ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def title_match_score(job_title: str, search_titles: list[str], user_level: int) -> float:
|
||||||
|
"""Seniority-aware title similarity in [0, 1].
|
||||||
|
|
||||||
|
Combines:
|
||||||
|
- Domain overlap: keyword intersection between job title and search titles
|
||||||
|
after stripping level modifiers (so "Senior Software Engineer" vs
|
||||||
|
"Software Engineer" compares only on "software engineer").
|
||||||
|
- Seniority multiplier: rewards same-level and +1 stretch; penalises
|
||||||
|
large downgrade or unreachable stretch.
|
||||||
|
"""
|
||||||
|
if not search_titles:
|
||||||
|
return 0.5 # neutral — user hasn't set title prefs yet
|
||||||
|
|
||||||
|
job_level = infer_seniority(job_title)
|
||||||
|
level_delta = job_level - user_level
|
||||||
|
seniority_factor = _LEVEL_MULTIPLIER.get(level_delta, _DEFAULT_LEVEL_MULTIPLIER)
|
||||||
|
|
||||||
|
job_core_words = {w for w in _strip_level_words(job_title).split() if len(w) > 2}
|
||||||
|
|
||||||
|
best_domain = 0.0
|
||||||
|
for st in search_titles:
|
||||||
|
st_core_words = {w for w in _strip_level_words(st).split() if len(w) > 2}
|
||||||
|
if not st_core_words:
|
||||||
|
continue
|
||||||
|
# Recall-biased overlap: what fraction of the search title keywords
|
||||||
|
# appear in the job title? (A job posting may use synonyms but we
|
||||||
|
# at least want the core nouns to match.)
|
||||||
|
overlap = len(st_core_words & job_core_words) / len(st_core_words)
|
||||||
|
best_domain = max(best_domain, overlap)
|
||||||
|
|
||||||
|
# Base score from domain match scaled by seniority appropriateness.
|
||||||
|
# A small seniority_factor bonus (×0.2) ensures that even a near-miss
|
||||||
|
# domain match still benefits from seniority alignment.
|
||||||
|
return min(1.0, best_domain * seniority_factor + seniority_factor * 0.15)
|
||||||
|
|
||||||
|
|
||||||
|
def recency_decay(date_found: str) -> float:
|
||||||
|
"""Exponential decay starting from date_found.
|
||||||
|
|
||||||
|
Returns 1.0 for today, 0.5 after RECENCY_HALF_LIFE days, ~0.0 after ~4×.
|
||||||
|
Returns 0.5 (neutral) if the date is unparseable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Support both "YYYY-MM-DD" and "YYYY-MM-DD HH:MM:SS"
|
||||||
|
found = datetime.fromisoformat(date_found.split("T")[0].split(" ")[0])
|
||||||
|
found = found.replace(tzinfo=timezone.utc)
|
||||||
|
now = datetime.now(tz=timezone.utc)
|
||||||
|
days_old = max(0.0, (now - found).total_seconds() / 86400)
|
||||||
|
return math.exp(-math.log(2) * days_old / RECENCY_HALF_LIFE)
|
||||||
|
except Exception:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_salary_range(text: str | None) -> tuple[int | None, int | None]:
|
||||||
|
"""Extract (low, high) salary integers from free-text. Returns (None, None) on failure.
|
||||||
|
|
||||||
|
Handles: "$80k - $120k", "USD 80,000 - 120,000 per year", "£45,000",
|
||||||
|
"80000", "80K/yr", "80-120k", etc.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return None, None
|
||||||
|
normalized = re.sub(r"[$,£€₹¥\s]", "", text.lower())
|
||||||
|
# Match numbers optionally followed by 'k'
|
||||||
|
raw_nums = re.findall(r"(\d+(?:\.\d+)?)k?", normalized)
|
||||||
|
values = []
|
||||||
|
for n, full in zip(raw_nums, re.finditer(r"(\d+(?:\.\d+)?)(k?)", normalized)):
|
||||||
|
val = float(full.group(1))
|
||||||
|
if full.group(2): # ends with 'k'
|
||||||
|
val *= 1000
|
||||||
|
elif val < 1000: # bare numbers < 1000 are likely thousands (e.g., "80" in "80-120k")
|
||||||
|
val *= 1000
|
||||||
|
if val >= 10_000: # sanity: ignore clearly wrong values
|
||||||
|
values.append(int(val))
|
||||||
|
values = sorted(set(values))
|
||||||
|
if not values:
|
||||||
|
return None, None
|
||||||
|
return values[0], values[-1]
|
||||||
|
|
||||||
|
|
||||||
|
def salary_fit(
|
||||||
|
salary_text: str | None,
|
||||||
|
target_min: int | None,
|
||||||
|
target_max: int | None,
|
||||||
|
) -> float:
|
||||||
|
"""Salary range overlap score in [0, 1].
|
||||||
|
|
||||||
|
Returns 0.5 (neutral) when either range is unknown — a missing salary
|
||||||
|
line is not inherently negative.
|
||||||
|
"""
|
||||||
|
if not salary_text or (target_min is None and target_max is None):
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
job_low, job_high = _parse_salary_range(salary_text)
|
||||||
|
if job_low is None:
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
t_min = target_min or 0
|
||||||
|
t_max = target_max or (int(target_min * 1.5) if target_min else job_high or job_low)
|
||||||
|
job_high = job_high or job_low
|
||||||
|
|
||||||
|
overlap_low = max(job_low, t_min)
|
||||||
|
overlap_high = min(job_high, t_max)
|
||||||
|
overlap = max(0, overlap_high - overlap_low)
|
||||||
|
target_span = max(1, t_max - t_min)
|
||||||
|
return min(1.0, overlap / target_span)
|
||||||
|
|
||||||
|
|
||||||
|
def description_quality(description: str | None) -> float:
|
||||||
|
"""Posting completeness score in [0, 1].
|
||||||
|
|
||||||
|
Stubs and ghost posts score near 0; well-written descriptions score 1.0.
|
||||||
|
"""
|
||||||
|
if not description:
|
||||||
|
return 0.0
|
||||||
|
words = len(description.split())
|
||||||
|
if words < DESC_MIN_WORDS:
|
||||||
|
return (words / DESC_MIN_WORDS) * 0.4 # steep penalty for stubs
|
||||||
|
if words >= DESC_TARGET_WORDS:
|
||||||
|
return 1.0
|
||||||
|
return 0.4 + 0.6 * (words - DESC_MIN_WORDS) / (DESC_TARGET_WORDS - DESC_MIN_WORDS)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Composite scorer ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def score_job(
|
||||||
|
job: dict,
|
||||||
|
search_titles: list[str],
|
||||||
|
target_salary_min: int | None,
|
||||||
|
target_salary_max: int | None,
|
||||||
|
user_level: int,
|
||||||
|
) -> float:
|
||||||
|
"""Compute composite stack_score (0–100) for a single job dict.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
job: Row dict from the jobs table (must have title, match_score,
|
||||||
|
date_found, salary, description, keyword_gaps).
|
||||||
|
search_titles: User's desired job titles (from search prefs).
|
||||||
|
target_salary_*: User's salary target from resume profile (or None).
|
||||||
|
user_level: Inferred seniority level 1–7.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A float 0–100. Higher = better match for this user's session.
|
||||||
|
"""
|
||||||
|
# ── Individual signals (all 0–1) ──────────────────────────────────────────
|
||||||
|
match_raw = job.get("match_score")
|
||||||
|
s_resume = (match_raw / 100.0) if match_raw is not None else 0.5
|
||||||
|
|
||||||
|
s_title = title_match_score(job.get("title", ""), search_titles, user_level)
|
||||||
|
s_recency = recency_decay(job.get("date_found", ""))
|
||||||
|
s_salary = salary_fit(job.get("salary"), target_salary_min, target_salary_max)
|
||||||
|
s_desc = description_quality(job.get("description"))
|
||||||
|
|
||||||
|
# ── Weighted sum ──────────────────────────────────────────────────────────
|
||||||
|
base = (
|
||||||
|
W_RESUME_MATCH * s_resume
|
||||||
|
+ W_TITLE_MATCH * s_title
|
||||||
|
+ W_RECENCY * s_recency
|
||||||
|
+ W_SALARY_FIT * s_salary
|
||||||
|
+ W_DESC_QUALITY * s_desc
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Keyword gap penalty (applied on the 0–100 scale) ─────────────────────
|
||||||
|
gaps_raw = job.get("keyword_gaps") or ""
|
||||||
|
gap_count = len([g for g in gaps_raw.split(",") if g.strip()]) if gaps_raw else 0
|
||||||
|
gap_penalty = min(GAP_MAX_PENALTY, gap_count * GAP_PENALTY_PER_KEYWORD) / 100.0
|
||||||
|
|
||||||
|
return round(max(0.0, base - gap_penalty) * 100, 1)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Public API ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def rank_jobs(
|
||||||
|
jobs: list[dict],
|
||||||
|
search_titles: list[str],
|
||||||
|
target_salary_min: int | None = None,
|
||||||
|
target_salary_max: int | None = None,
|
||||||
|
user_level: int = 3,
|
||||||
|
limit: int = 10,
|
||||||
|
min_score: float = 20.0,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Score and rank pending jobs; return top-N above min_score.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
jobs: List of job dicts (from DB or any source).
|
||||||
|
search_titles: User's desired job titles from search prefs.
|
||||||
|
target_salary_*: User's salary target (from resume profile).
|
||||||
|
user_level: Seniority level 1–7 (use seniority_from_experience()).
|
||||||
|
limit: Stack size; pass 0 to return all qualifying jobs.
|
||||||
|
min_score: Minimum stack_score to include (0–100).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sorted list (best first) with 'stack_score' key added to each dict.
|
||||||
|
"""
|
||||||
|
scored = []
|
||||||
|
for job in jobs:
|
||||||
|
s = score_job(job, search_titles, target_salary_min, target_salary_max, user_level)
|
||||||
|
if s >= min_score:
|
||||||
|
scored.append({**job, "stack_score": s})
|
||||||
|
|
||||||
|
scored.sort(key=lambda j: j["stack_score"], reverse=True)
|
||||||
|
return scored[:limit] if limit > 0 else scored
|
||||||
|
|
@ -1,169 +1,46 @@
|
||||||
"""
|
"""
|
||||||
LLM abstraction layer with priority fallback chain.
|
LLM abstraction layer with priority fallback chain.
|
||||||
Reads config/llm.yaml. Tries backends in order; falls back on any error.
|
Config lookup order:
|
||||||
|
1. <repo>/config/llm.yaml — per-install local config
|
||||||
|
2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default)
|
||||||
|
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, …)
|
||||||
"""
|
"""
|
||||||
import os
|
|
||||||
import yaml
|
|
||||||
import requests
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
|
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||||
|
|
||||||
|
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
|
||||||
|
# from this module continue to work.
|
||||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||||
|
|
||||||
|
|
||||||
class LLMRouter:
|
class LLMRouter(_CoreLLMRouter):
|
||||||
def __init__(self, config_path: Path = CONFIG_PATH):
|
"""Peregrine-specific LLMRouter — tri-level config path priority.
|
||||||
with open(config_path) as f:
|
|
||||||
self.config = yaml.safe_load(f)
|
|
||||||
|
|
||||||
def _is_reachable(self, base_url: str) -> bool:
|
When ``config_path`` is supplied (e.g. in tests) it is passed straight
|
||||||
"""Quick health-check ping. Returns True if backend is up."""
|
through to the core. When omitted, the lookup order is:
|
||||||
health_url = base_url.rstrip("/").removesuffix("/v1") + "/health"
|
1. <repo>/config/llm.yaml (per-install local config)
|
||||||
try:
|
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
|
||||||
resp = requests.get(health_url, timeout=2)
|
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST …)
|
||||||
return resp.status_code < 500
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _resolve_model(self, client: OpenAI, model: str) -> str:
|
|
||||||
"""Resolve __auto__ to the first model served by vLLM."""
|
|
||||||
if model != "__auto__":
|
|
||||||
return model
|
|
||||||
models = client.models.list()
|
|
||||||
return models.data[0].id
|
|
||||||
|
|
||||||
def complete(self, prompt: str, system: str | None = None,
|
|
||||||
model_override: str | None = None,
|
|
||||||
fallback_order: list[str] | None = None,
|
|
||||||
images: list[str] | None = None,
|
|
||||||
max_tokens: int | None = None) -> str:
|
|
||||||
"""
|
"""
|
||||||
Generate a completion. Tries each backend in fallback_order.
|
|
||||||
|
|
||||||
model_override: when set, replaces the configured model for
|
def __init__(self, config_path: Path | None = None) -> None:
|
||||||
openai_compat backends (e.g. pass a research-specific ollama model).
|
if config_path is not None:
|
||||||
fallback_order: when set, overrides config fallback_order for this
|
# Explicit path supplied — use it directly (e.g. tests, CLI override).
|
||||||
call (e.g. pass config["research_fallback_order"] for research tasks).
|
super().__init__(config_path)
|
||||||
images: optional list of base64-encoded PNG/JPG strings. When provided,
|
return
|
||||||
backends without supports_images=true are skipped. vision_service backends
|
|
||||||
are only tried when images is provided.
|
|
||||||
Raises RuntimeError if all backends are exhausted.
|
|
||||||
"""
|
|
||||||
if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"):
|
|
||||||
raise RuntimeError(
|
|
||||||
"AI inference is disabled in the public demo. "
|
|
||||||
"Run your own instance to use AI features."
|
|
||||||
)
|
|
||||||
order = fallback_order if fallback_order is not None else self.config["fallback_order"]
|
|
||||||
for name in order:
|
|
||||||
backend = self.config["backends"][name]
|
|
||||||
|
|
||||||
if not backend.get("enabled", True):
|
local = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||||
print(f"[LLMRouter] {name}: disabled, skipping")
|
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
||||||
continue
|
if local.exists():
|
||||||
|
super().__init__(local)
|
||||||
supports_images = backend.get("supports_images", False)
|
elif user_level.exists():
|
||||||
is_vision_service = backend["type"] == "vision_service"
|
super().__init__(user_level)
|
||||||
|
|
||||||
# vision_service only used when images provided
|
|
||||||
if is_vision_service and not images:
|
|
||||||
print(f"[LLMRouter] {name}: vision_service skipped (no images)")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# non-vision backends skipped when images provided and they don't support it
|
|
||||||
if images and not supports_images and not is_vision_service:
|
|
||||||
print(f"[LLMRouter] {name}: no image support, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if is_vision_service:
|
|
||||||
if not self._is_reachable(backend["base_url"]):
|
|
||||||
print(f"[LLMRouter] {name}: unreachable, skipping")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
resp = requests.post(
|
|
||||||
backend["base_url"].rstrip("/") + "/analyze",
|
|
||||||
json={
|
|
||||||
"prompt": prompt,
|
|
||||||
"image_base64": images[0] if images else "",
|
|
||||||
},
|
|
||||||
timeout=60,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
print(f"[LLMRouter] Used backend: {name} (vision_service)")
|
|
||||||
return resp.json()["text"]
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif backend["type"] == "openai_compat":
|
|
||||||
if not self._is_reachable(backend["base_url"]):
|
|
||||||
print(f"[LLMRouter] {name}: unreachable, skipping")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
client = OpenAI(
|
|
||||||
base_url=backend["base_url"],
|
|
||||||
api_key=backend.get("api_key") or "any",
|
|
||||||
)
|
|
||||||
raw_model = model_override or backend["model"]
|
|
||||||
model = self._resolve_model(client, raw_model)
|
|
||||||
messages = []
|
|
||||||
if system:
|
|
||||||
messages.append({"role": "system", "content": system})
|
|
||||||
if images and supports_images:
|
|
||||||
content = [{"type": "text", "text": prompt}]
|
|
||||||
for img in images:
|
|
||||||
content.append({
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": f"data:image/png;base64,{img}"},
|
|
||||||
})
|
|
||||||
messages.append({"role": "user", "content": content})
|
|
||||||
else:
|
else:
|
||||||
messages.append({"role": "user", "content": prompt})
|
# No yaml found — let circuitforge-core's env-var auto-config run.
|
||||||
|
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
|
||||||
create_kwargs: dict = {"model": model, "messages": messages}
|
# won't exist either, so _auto_config_from_env() will be triggered.
|
||||||
if max_tokens is not None:
|
super().__init__()
|
||||||
create_kwargs["max_tokens"] = max_tokens
|
|
||||||
resp = client.chat.completions.create(**create_kwargs)
|
|
||||||
print(f"[LLMRouter] Used backend: {name} ({model})")
|
|
||||||
return resp.choices[0].message.content
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif backend["type"] == "anthropic":
|
|
||||||
api_key = os.environ.get(backend["api_key_env"], "")
|
|
||||||
if not api_key:
|
|
||||||
print(f"[LLMRouter] {name}: {backend['api_key_env']} not set, skipping")
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
import anthropic as _anthropic
|
|
||||||
client = _anthropic.Anthropic(api_key=api_key)
|
|
||||||
if images and supports_images:
|
|
||||||
content = []
|
|
||||||
for img in images:
|
|
||||||
content.append({
|
|
||||||
"type": "image",
|
|
||||||
"source": {"type": "base64", "media_type": "image/png", "data": img},
|
|
||||||
})
|
|
||||||
content.append({"type": "text", "text": prompt})
|
|
||||||
else:
|
|
||||||
content = prompt
|
|
||||||
kwargs: dict = {
|
|
||||||
"model": backend["model"],
|
|
||||||
"max_tokens": 4096,
|
|
||||||
"messages": [{"role": "user", "content": content}],
|
|
||||||
}
|
|
||||||
if system:
|
|
||||||
kwargs["system"] = system
|
|
||||||
msg = client.messages.create(**kwargs)
|
|
||||||
print(f"[LLMRouter] Used backend: {name}")
|
|
||||||
return msg.content[0].text
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[LLMRouter] {name}: error — {e}, trying next")
|
|
||||||
continue
|
|
||||||
|
|
||||||
raise RuntimeError("All LLM backends exhausted")
|
|
||||||
|
|
||||||
|
|
||||||
# Module-level singleton for convenience
|
# Module-level singleton for convenience
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ OVERRIDE_YML = ROOT / "compose.override.yml"
|
||||||
_SERVICES: dict[str, tuple[str, int, str, bool, bool]] = {
|
_SERVICES: dict[str, tuple[str, int, str, bool, bool]] = {
|
||||||
"streamlit": ("streamlit_port", 8501, "STREAMLIT_PORT", True, False),
|
"streamlit": ("streamlit_port", 8501, "STREAMLIT_PORT", True, False),
|
||||||
"searxng": ("searxng_port", 8888, "SEARXNG_PORT", True, True),
|
"searxng": ("searxng_port", 8888, "SEARXNG_PORT", True, True),
|
||||||
"vllm": ("vllm_port", 8000, "VLLM_PORT", True, True),
|
# vllm removed — now managed by cf-orch (host process), not a Docker service
|
||||||
"vision": ("vision_port", 8002, "VISION_PORT", True, True),
|
"vision": ("vision_port", 8002, "VISION_PORT", True, True),
|
||||||
"ollama": ("ollama_port", 11434, "OLLAMA_PORT", True, True),
|
"ollama": ("ollama_port", 11434, "OLLAMA_PORT", True, True),
|
||||||
"ollama_research": ("ollama_research_port", 11435, "OLLAMA_RESEARCH_PORT", True, True),
|
"ollama_research": ("ollama_research_port", 11435, "OLLAMA_RESEARCH_PORT", True, True),
|
||||||
|
|
@ -65,7 +65,6 @@ _LLM_BACKENDS: dict[str, list[tuple[str, str]]] = {
|
||||||
_DOCKER_INTERNAL: dict[str, tuple[str, int]] = {
|
_DOCKER_INTERNAL: dict[str, tuple[str, int]] = {
|
||||||
"ollama": ("ollama", 11434),
|
"ollama": ("ollama", 11434),
|
||||||
"ollama_research": ("ollama_research", 11434), # container-internal port is always 11434
|
"ollama_research": ("ollama_research", 11434), # container-internal port is always 11434
|
||||||
"vllm": ("vllm", 8000),
|
|
||||||
"vision": ("vision", 8002),
|
"vision": ("vision", 8002),
|
||||||
"searxng": ("searxng", 8080), # searxng internal port differs from host port
|
"searxng": ("searxng", 8080), # searxng internal port differs from host port
|
||||||
}
|
}
|
||||||
|
|
@ -493,6 +492,12 @@ def main() -> None:
|
||||||
# binds a harmless free port instead of conflicting with the external service.
|
# binds a harmless free port instead of conflicting with the external service.
|
||||||
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||||
env_updates["RECOMMENDED_PROFILE"] = profile
|
env_updates["RECOMMENDED_PROFILE"] = profile
|
||||||
|
# When Ollama is adopted from the host process, write OLLAMA_HOST so
|
||||||
|
# LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
|
||||||
|
ollama_info = ports.get("ollama")
|
||||||
|
if ollama_info and ollama_info.get("external"):
|
||||||
|
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
|
||||||
|
|
||||||
if offload_gb > 0:
|
if offload_gb > 0:
|
||||||
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
||||||
# GPU info for the app container (which lacks nvidia-smi access)
|
# GPU info for the app container (which lacks nvidia-smi access)
|
||||||
|
|
|
||||||
840
scripts/resume_optimizer.py
Normal file
840
scripts/resume_optimizer.py
Normal file
|
|
@ -0,0 +1,840 @@
|
||||||
|
"""
|
||||||
|
ATS Resume Optimizer — rewrite a candidate's resume to maximize keyword match
|
||||||
|
for a specific job description without fabricating experience.
|
||||||
|
|
||||||
|
Tier behaviour:
|
||||||
|
Free → gap report only (extract_jd_signals + prioritize_gaps, no LLM rewrite)
|
||||||
|
Paid → full LLM rewrite targeting the JD (rewrite_for_ats)
|
||||||
|
Premium → same as paid for now; fine-tuned voice model is a future enhancement
|
||||||
|
|
||||||
|
Pipeline:
|
||||||
|
job.description
|
||||||
|
→ extract_jd_signals() # TF-IDF gaps + LLM-extracted ATS signals
|
||||||
|
→ prioritize_gaps() # rank by impact, map to resume sections
|
||||||
|
→ rewrite_for_ats() # per-section LLM rewrite (paid+)
|
||||||
|
→ hallucination_check() # reject rewrites that invent new experience
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ── Signal extraction ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
|
||||||
|
"""Return ATS keyword signals from a job description.
|
||||||
|
|
||||||
|
Combines two sources:
|
||||||
|
1. TF-IDF keyword gaps from match.py (fast, deterministic, no LLM cost)
|
||||||
|
2. LLM extraction for phrasing nuance TF-IDF misses (e.g. "cross-functional"
|
||||||
|
vs "cross-team", "led" vs "managed")
|
||||||
|
|
||||||
|
Falls back to TF-IDF-only if LLM is unavailable.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
description: Raw job description text.
|
||||||
|
resume_text: Candidate's resume text (used to compute gap vs. already present).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deduplicated list of ATS keyword signals, most impactful first.
|
||||||
|
"""
|
||||||
|
# Phase 1: deterministic TF-IDF gaps (always available)
|
||||||
|
tfidf_gaps: list[str] = []
|
||||||
|
if resume_text:
|
||||||
|
try:
|
||||||
|
from scripts.match import match_score
|
||||||
|
_, tfidf_gaps = match_score(resume_text, description)
|
||||||
|
except Exception:
|
||||||
|
log.warning("[resume_optimizer] TF-IDF gap extraction failed", exc_info=True)
|
||||||
|
|
||||||
|
# Phase 2: LLM extraction for phrasing/qualifier nuance
|
||||||
|
llm_signals: list[str] = []
|
||||||
|
try:
|
||||||
|
from scripts.llm_router import LLMRouter
|
||||||
|
prompt = (
|
||||||
|
"Extract the most important ATS (applicant tracking system) keywords and "
|
||||||
|
"phrases from this job description. Focus on:\n"
|
||||||
|
"- Required skills and technologies (exact phrasing matters)\n"
|
||||||
|
"- Action verbs used to describe responsibilities\n"
|
||||||
|
"- Qualification signals ('required', 'must have', 'preferred')\n"
|
||||||
|
"- Industry-specific terminology\n\n"
|
||||||
|
"Return a JSON array of strings only. No explanation.\n\n"
|
||||||
|
f"Job description:\n{description[:3000]}"
|
||||||
|
)
|
||||||
|
raw = LLMRouter().complete(prompt)
|
||||||
|
# Extract JSON array from response (LLM may wrap it in markdown)
|
||||||
|
match = re.search(r"\[.*\]", raw, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
json_str = match.group(0)
|
||||||
|
# LLMs occasionally emit invalid JSON escape sequences (e.g. \s, \d, \p)
|
||||||
|
# that are valid regex but not valid JSON. Replace bare backslashes that
|
||||||
|
# aren't followed by a recognised JSON escape character.
|
||||||
|
json_str = re.sub(r'\\([^"\\/bfnrtu])', r'\1', json_str)
|
||||||
|
llm_signals = json.loads(json_str)
|
||||||
|
llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
|
||||||
|
except Exception:
|
||||||
|
log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
|
||||||
|
|
||||||
|
# Merge: LLM signals first (richer phrasing), TF-IDF fills gaps
|
||||||
|
seen: set[str] = set()
|
||||||
|
merged: list[str] = []
|
||||||
|
for term in llm_signals + tfidf_gaps:
|
||||||
|
key = term.lower()
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
merged.append(term)
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
# ── Gap prioritization ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# Map each gap term to the resume section where it would have the most ATS impact.
|
||||||
|
# ATS systems weight keywords higher in certain sections:
|
||||||
|
# skills — direct keyword match, highest density, indexed first
|
||||||
|
# summary — executive summary keywords often boost overall relevance score
|
||||||
|
# experience — verbs + outcomes in bullet points; adds context weight
|
||||||
|
_SECTION_KEYWORDS: dict[str, list[str]] = {
|
||||||
|
"skills": [
|
||||||
|
"python", "sql", "java", "typescript", "react", "vue", "docker",
|
||||||
|
"kubernetes", "aws", "gcp", "azure", "terraform", "ci/cd", "git",
|
||||||
|
"postgresql", "redis", "kafka", "spark", "tableau", "salesforce",
|
||||||
|
"jira", "figma", "excel", "powerpoint", "machine learning", "llm",
|
||||||
|
"deep learning", "pytorch", "tensorflow", "scikit-learn",
|
||||||
|
],
|
||||||
|
"summary": [
|
||||||
|
"leadership", "strategy", "vision", "executive", "director", "vp",
|
||||||
|
"growth", "transformation", "stakeholder", "cross-functional",
|
||||||
|
"p&l", "revenue", "budget", "board", "c-suite",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def prioritize_gaps(gaps: list[str], resume_sections: dict[str, Any]) -> list[dict]:
|
||||||
|
"""Rank keyword gaps by ATS impact and map each to a target resume section.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
gaps: List of missing keyword signals from extract_jd_signals().
|
||||||
|
resume_sections: Structured resume dict from resume_parser.parse_resume().
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts, sorted by priority score descending:
|
||||||
|
{
|
||||||
|
"term": str, # the keyword/phrase to inject
|
||||||
|
"section": str, # target resume section ("skills", "summary", "experience")
|
||||||
|
"priority": int, # 1=high, 2=medium, 3=low
|
||||||
|
"rationale": str, # why this section was chosen
|
||||||
|
}
|
||||||
|
|
||||||
|
TODO: implement the ranking logic below.
|
||||||
|
The current stub assigns every gap to "experience" at medium priority.
|
||||||
|
A good implementation should:
|
||||||
|
- Score "skills" section terms highest (direct keyword density)
|
||||||
|
- Score "summary" terms next (executive/leadership signals)
|
||||||
|
- Route remaining gaps to "experience" bullets
|
||||||
|
- Deprioritize terms already present in any section (case-insensitive)
|
||||||
|
- Consider gap term length: multi-word phrases > single words (more specific = higher ATS weight)
|
||||||
|
"""
|
||||||
|
existing_text = _flatten_resume_text(resume_sections).lower()
|
||||||
|
|
||||||
|
prioritized: list[dict] = []
|
||||||
|
for term in gaps:
|
||||||
|
# Skip terms already present anywhere in the resume
|
||||||
|
if term.lower() in existing_text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# REVIEW: _SECTION_KEYWORDS lists are tech-centric; domain-specific roles
|
||||||
|
# (creative, healthcare, operations) may over-route to experience.
|
||||||
|
# Consider expanding the lists or making them config-driven.
|
||||||
|
term_lower = term.lower()
|
||||||
|
|
||||||
|
# Partial-match: term contains a skills keyword (handles "PostgreSQL" vs "postgresql",
|
||||||
|
# "AWS Lambda" vs "aws", etc.)
|
||||||
|
skills_match = any(kw in term_lower or term_lower in kw
|
||||||
|
for kw in _SECTION_KEYWORDS["skills"])
|
||||||
|
summary_match = any(kw in term_lower or term_lower in kw
|
||||||
|
for kw in _SECTION_KEYWORDS["summary"])
|
||||||
|
|
||||||
|
if skills_match:
|
||||||
|
section = "skills"
|
||||||
|
priority = 1
|
||||||
|
rationale = "matched technical skills list — highest ATS keyword density"
|
||||||
|
elif summary_match:
|
||||||
|
section = "summary"
|
||||||
|
priority = 1
|
||||||
|
rationale = "matched leadership/executive signals — boosts overall relevance score"
|
||||||
|
elif len(term.split()) > 1:
|
||||||
|
section = "experience"
|
||||||
|
priority = 2
|
||||||
|
rationale = "multi-word phrase — more specific than single keywords, context weight in bullets"
|
||||||
|
else:
|
||||||
|
section = "experience"
|
||||||
|
priority = 3
|
||||||
|
rationale = "single generic term — lowest ATS impact, added to experience for coverage"
|
||||||
|
|
||||||
|
prioritized.append({
|
||||||
|
"term": term,
|
||||||
|
"section": section,
|
||||||
|
"priority": priority,
|
||||||
|
"rationale": rationale,
|
||||||
|
})
|
||||||
|
|
||||||
|
prioritized.sort(key=lambda x: x["priority"])
|
||||||
|
return prioritized
|
||||||
|
|
||||||
|
|
||||||
|
def _flatten_resume_text(resume: dict[str, Any]) -> str:
|
||||||
|
"""Concatenate all text from a structured resume dict into one searchable string."""
|
||||||
|
parts: list[str] = []
|
||||||
|
parts.append(resume.get("career_summary", "") or "")
|
||||||
|
parts.extend(resume.get("skills", []))
|
||||||
|
for exp in resume.get("experience", []):
|
||||||
|
parts.append(exp.get("title", ""))
|
||||||
|
parts.append(exp.get("company", ""))
|
||||||
|
parts.extend(exp.get("bullets", []))
|
||||||
|
for edu in resume.get("education", []):
|
||||||
|
parts.append(edu.get("degree", ""))
|
||||||
|
parts.append(edu.get("field", ""))
|
||||||
|
parts.append(edu.get("institution", ""))
|
||||||
|
parts.extend(resume.get("achievements", []))
|
||||||
|
return " ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
# ── LLM rewrite ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def rewrite_for_ats(
|
||||||
|
resume: dict[str, Any],
|
||||||
|
prioritized_gaps: list[dict],
|
||||||
|
job: dict[str, Any],
|
||||||
|
candidate_voice: str = "",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Rewrite resume sections to naturally incorporate ATS keyword gaps.
|
||||||
|
|
||||||
|
Operates section-by-section. For each target section in prioritized_gaps,
|
||||||
|
builds a focused prompt that injects only the gaps destined for that section.
|
||||||
|
The hallucination constraint is enforced in the prompt itself and verified
|
||||||
|
post-hoc by hallucination_check().
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resume: Structured resume dict (from resume_parser.parse_resume).
|
||||||
|
prioritized_gaps: Output of prioritize_gaps().
|
||||||
|
job: Job dict with at minimum {"title": str, "company": str, "description": str}.
|
||||||
|
candidate_voice: Free-text personality/style note from user.yaml (may be empty).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
New resume dict (same structure as input) with rewritten sections.
|
||||||
|
Sections with no relevant gaps are copied through unchanged.
|
||||||
|
"""
|
||||||
|
from scripts.llm_router import LLMRouter
|
||||||
|
router = LLMRouter()
|
||||||
|
|
||||||
|
# Group gaps by target section
|
||||||
|
by_section: dict[str, list[str]] = {}
|
||||||
|
for gap in prioritized_gaps:
|
||||||
|
by_section.setdefault(gap["section"], []).append(gap["term"])
|
||||||
|
|
||||||
|
rewritten = dict(resume) # shallow copy — sections replaced below
|
||||||
|
|
||||||
|
for section, terms in by_section.items():
|
||||||
|
terms_str = ", ".join(f'"{t}"' for t in terms)
|
||||||
|
original_content = _section_text_for_prompt(resume, section)
|
||||||
|
|
||||||
|
voice_note = (
|
||||||
|
f'\n\nCandidate voice/style: "{candidate_voice}". '
|
||||||
|
"Preserve this authentic tone — do not write generically."
|
||||||
|
) if candidate_voice else ""
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"You are rewriting the **{section}** section of a resume to help it pass "
|
||||||
|
f"ATS (applicant tracking system) screening for this role:\n"
|
||||||
|
f" Job title: {job.get('title', 'Unknown')}\n"
|
||||||
|
f" Company: {job.get('company', 'Unknown')}\n\n"
|
||||||
|
f"Inject these missing ATS keywords naturally into the section:\n"
|
||||||
|
f" {terms_str}\n\n"
|
||||||
|
f"CRITICAL RULES — violating any of these invalidates the rewrite:\n"
|
||||||
|
f"1. Do NOT invent new employers, job titles, dates, or education.\n"
|
||||||
|
f"2. Do NOT add skills the candidate did not already demonstrate.\n"
|
||||||
|
f"3. Only rephrase existing content — replace vague verbs/nouns with the "
|
||||||
|
f" ATS-preferred equivalents listed above.\n"
|
||||||
|
f"4. Keep the same number of bullet points in experience entries.\n"
|
||||||
|
f"5. Return ONLY the rewritten section content, no labels or explanation."
|
||||||
|
f"{voice_note}\n\n"
|
||||||
|
f"Original {section} section:\n{original_content}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = router.complete(prompt)
|
||||||
|
rewritten = _apply_section_rewrite(rewritten, section, result.strip())
|
||||||
|
except Exception:
|
||||||
|
log.warning("[resume_optimizer] rewrite failed for section %r", section, exc_info=True)
|
||||||
|
# Leave section unchanged on failure
|
||||||
|
|
||||||
|
return rewritten
|
||||||
|
|
||||||
|
|
||||||
|
def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
|
||||||
|
"""Render a resume section as plain text suitable for an LLM prompt."""
|
||||||
|
if section == "summary":
|
||||||
|
return resume.get("career_summary", "") or "(empty)"
|
||||||
|
if section == "skills":
|
||||||
|
skills = resume.get("skills", [])
|
||||||
|
return ", ".join(skills) if skills else "(empty)"
|
||||||
|
if section == "experience":
|
||||||
|
lines: list[str] = []
|
||||||
|
for exp in resume.get("experience", []):
|
||||||
|
lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}–{exp['end_date']})")
|
||||||
|
for b in exp.get("bullets", []):
|
||||||
|
lines.append(f" • {b}")
|
||||||
|
return "\n".join(lines) if lines else "(empty)"
|
||||||
|
return "(unsupported section)"
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str) -> dict[str, Any]:
|
||||||
|
"""Return a new resume dict with the given section replaced by rewritten text."""
|
||||||
|
updated = dict(resume)
|
||||||
|
if section == "summary":
|
||||||
|
updated["career_summary"] = rewritten
|
||||||
|
elif section == "skills":
|
||||||
|
# LLM returns comma-separated or newline-separated skills
|
||||||
|
skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
|
||||||
|
updated["skills"] = skills
|
||||||
|
elif section == "experience":
|
||||||
|
# For experience, we keep the structured entries but replace the bullets.
|
||||||
|
# The LLM rewrites the whole section as plain text; we re-parse the bullets.
|
||||||
|
updated["experience"] = _reparse_experience_bullets(resume.get("experience", []), rewritten)
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def _reparse_experience_bullets(
|
||||||
|
original_entries: list[dict],
|
||||||
|
rewritten_text: str,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Re-associate rewritten bullet text with the original experience entries.
|
||||||
|
|
||||||
|
The LLM rewrites the section as a block of text. We split on the original
|
||||||
|
entry headers (title + company) to re-bind bullets to entries. Falls back
|
||||||
|
to the original entries if splitting fails.
|
||||||
|
"""
|
||||||
|
if not original_entries:
|
||||||
|
return original_entries
|
||||||
|
|
||||||
|
result: list[dict] = []
|
||||||
|
remaining = rewritten_text
|
||||||
|
|
||||||
|
for i, entry in enumerate(original_entries):
|
||||||
|
# Find where the next entry starts so we can slice out this entry's bullets
|
||||||
|
if i + 1 < len(original_entries):
|
||||||
|
next_title = original_entries[i + 1]["title"]
|
||||||
|
# Look for the next entry header in the remaining text
|
||||||
|
split_pat = re.escape(next_title)
|
||||||
|
m = re.search(split_pat, remaining, re.IGNORECASE)
|
||||||
|
chunk = remaining[:m.start()] if m else remaining
|
||||||
|
remaining = remaining[m.start():] if m else ""
|
||||||
|
else:
|
||||||
|
chunk = remaining
|
||||||
|
|
||||||
|
bullets = [
|
||||||
|
re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
|
||||||
|
for line in chunk.splitlines()
|
||||||
|
if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
|
||||||
|
]
|
||||||
|
new_entry = dict(entry)
|
||||||
|
new_entry["bullets"] = bullets if bullets else entry["bullets"]
|
||||||
|
result.append(new_entry)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ── Gap framing ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def frame_skill_gaps(
|
||||||
|
struct: dict[str, Any],
|
||||||
|
gap_framings: list[dict],
|
||||||
|
job: dict[str, Any],
|
||||||
|
candidate_voice: str = "",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Inject honest framing language for skills the candidate doesn't have directly.
|
||||||
|
|
||||||
|
For each gap framing decision the user provided:
|
||||||
|
- mode "adjacent": user has related experience → injects one bridging sentence
|
||||||
|
into the most relevant experience entry's bullets
|
||||||
|
- mode "learning": actively developing the skill → prepends a structured
|
||||||
|
"Developing: X (context)" note to the skills list
|
||||||
|
- mode "skip": no connection at all → no change
|
||||||
|
|
||||||
|
The user-supplied context text is the source of truth. The LLM's job is only
|
||||||
|
to phrase it naturally in resume style — not to invent new claims.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
struct: Resume dict (already processed by apply_review_decisions).
|
||||||
|
gap_framings: List of dicts with keys:
|
||||||
|
skill — the ATS term the candidate lacks
|
||||||
|
mode — "adjacent" | "learning" | "skip"
|
||||||
|
context — candidate's own words describing their related background
|
||||||
|
job: Job dict for role context in prompts.
|
||||||
|
candidate_voice: Free-text style note from user.yaml.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
New resume dict with framing language injected.
|
||||||
|
"""
|
||||||
|
from scripts.llm_router import LLMRouter
|
||||||
|
router = LLMRouter()
|
||||||
|
|
||||||
|
updated = dict(struct)
|
||||||
|
updated["experience"] = [dict(e) for e in (struct.get("experience") or [])]
|
||||||
|
|
||||||
|
adjacent_framings = [f for f in gap_framings if f.get("mode") == "adjacent" and f.get("context")]
|
||||||
|
learning_framings = [f for f in gap_framings if f.get("mode") == "learning" and f.get("context")]
|
||||||
|
|
||||||
|
# ── Adjacent experience: inject bridging sentence into most relevant entry ─
|
||||||
|
for framing in adjacent_framings:
|
||||||
|
skill = framing["skill"]
|
||||||
|
context = framing["context"]
|
||||||
|
|
||||||
|
# Find the experience entry most likely to be relevant (simple keyword match)
|
||||||
|
best_entry_idx = _find_most_relevant_entry(updated["experience"], skill)
|
||||||
|
if best_entry_idx is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
entry = updated["experience"][best_entry_idx]
|
||||||
|
bullets = list(entry.get("bullets") or [])
|
||||||
|
|
||||||
|
voice_note = (
|
||||||
|
f'\n\nCandidate voice/style: "{candidate_voice}". Match this tone.'
|
||||||
|
) if candidate_voice else ""
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"You are adding one honest framing sentence to a resume bullet list.\n\n"
|
||||||
|
f"The candidate does not have direct experience with '{skill}', "
|
||||||
|
f"but they have relevant background they described as:\n"
|
||||||
|
f' "{context}"\n\n'
|
||||||
|
f"Job context: {job.get('title', '')} at {job.get('company', '')}.\n\n"
|
||||||
|
f"RULES:\n"
|
||||||
|
f"1. Add exactly ONE new bullet point that bridges their background to '{skill}'.\n"
|
||||||
|
f"2. Do NOT fabricate anything beyond what their context description says.\n"
|
||||||
|
f"3. Use honest language: 'adjacent experience in', 'strong foundation applicable to', "
|
||||||
|
f" 'directly transferable background in', etc.\n"
|
||||||
|
f"4. Return ONLY the single new bullet text — no prefix, no explanation."
|
||||||
|
f"{voice_note}\n\n"
|
||||||
|
f"Existing bullets for context:\n"
|
||||||
|
+ "\n".join(f" • {b}" for b in bullets[:3])
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
new_bullet = router.complete(prompt).strip()
|
||||||
|
new_bullet = re.sub(r"^[•\-–—*◦▪▸►]\s*", "", new_bullet).strip()
|
||||||
|
if new_bullet:
|
||||||
|
bullets.append(new_bullet)
|
||||||
|
new_entry = dict(entry)
|
||||||
|
new_entry["bullets"] = bullets
|
||||||
|
updated["experience"][best_entry_idx] = new_entry
|
||||||
|
except Exception:
|
||||||
|
log.warning(
|
||||||
|
"[resume_optimizer] frame_skill_gaps adjacent failed for skill %r", skill,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── Learning framing: add structured note to skills list ──────────────────
|
||||||
|
if learning_framings:
|
||||||
|
skills = list(updated.get("skills") or [])
|
||||||
|
for framing in learning_framings:
|
||||||
|
skill = framing["skill"]
|
||||||
|
context = framing["context"].strip()
|
||||||
|
# Format: "Developing: Kubernetes (strong Docker/container orchestration background)"
|
||||||
|
note = f"Developing: {skill} ({context})" if context else f"Developing: {skill}"
|
||||||
|
if note not in skills:
|
||||||
|
skills.append(note)
|
||||||
|
updated["skills"] = skills
|
||||||
|
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def _find_most_relevant_entry(
|
||||||
|
experience: list[dict],
|
||||||
|
skill: str,
|
||||||
|
) -> int | None:
|
||||||
|
"""Return the index of the experience entry most relevant to a skill term.
|
||||||
|
|
||||||
|
Uses simple keyword overlap between the skill and entry title/bullets.
|
||||||
|
Falls back to the most recent (first) entry if no match found.
|
||||||
|
"""
|
||||||
|
if not experience:
|
||||||
|
return None
|
||||||
|
|
||||||
|
skill_words = set(skill.lower().split())
|
||||||
|
best_idx = 0
|
||||||
|
best_score = -1
|
||||||
|
|
||||||
|
for i, entry in enumerate(experience):
|
||||||
|
entry_text = (
|
||||||
|
(entry.get("title") or "") + " " +
|
||||||
|
" ".join(entry.get("bullets") or [])
|
||||||
|
).lower()
|
||||||
|
entry_words = set(entry_text.split())
|
||||||
|
score = len(skill_words & entry_words)
|
||||||
|
if score > best_score:
|
||||||
|
best_score = score
|
||||||
|
best_idx = i
|
||||||
|
|
||||||
|
return best_idx
|
||||||
|
|
||||||
|
|
||||||
|
def apply_review_decisions(
|
||||||
|
draft: dict[str, Any],
|
||||||
|
decisions: dict[str, Any],
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Apply user section-level review decisions to the rewritten struct.
|
||||||
|
|
||||||
|
Handles approved skills, summary accept/reject, and per-entry experience
|
||||||
|
accept/reject. Returns the updated struct; does not call the LLM.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
draft: The review draft dict from build_review_diff (contains
|
||||||
|
"sections" and "rewritten_struct").
|
||||||
|
decisions: Dict of per-section decisions from the review UI:
|
||||||
|
skills: {"approved_additions": [...]}
|
||||||
|
summary: {"accepted": bool}
|
||||||
|
experience: {"accepted_entries": [{"title", "company", "accepted"}]}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Updated resume struct ready for gap framing and final render.
|
||||||
|
"""
|
||||||
|
struct = dict(draft.get("rewritten_struct") or {})
|
||||||
|
sections = draft.get("sections") or []
|
||||||
|
|
||||||
|
# ── Skills: keep original + only approved additions ────────────────────
|
||||||
|
skills_decision = decisions.get("skills", {})
|
||||||
|
approved_additions = set(skills_decision.get("approved_additions") or [])
|
||||||
|
for sec in sections:
|
||||||
|
if sec["section"] == "skills":
|
||||||
|
original_kept = set(sec.get("kept") or [])
|
||||||
|
struct["skills"] = sorted(original_kept | approved_additions)
|
||||||
|
break
|
||||||
|
|
||||||
|
# ── Summary: accept proposed or revert to original ──────────────────────
|
||||||
|
if not decisions.get("summary", {}).get("accepted", True):
|
||||||
|
for sec in sections:
|
||||||
|
if sec["section"] == "summary":
|
||||||
|
struct["career_summary"] = sec.get("original", struct.get("career_summary", ""))
|
||||||
|
break
|
||||||
|
|
||||||
|
# ── Experience: per-entry accept/reject ─────────────────────────────────
|
||||||
|
exp_decisions: dict[str, bool] = {
|
||||||
|
f"{ed.get('title', '')}|{ed.get('company', '')}": ed.get("accepted", True)
|
||||||
|
for ed in (decisions.get("experience", {}).get("accepted_entries") or [])
|
||||||
|
}
|
||||||
|
for sec in sections:
|
||||||
|
if sec["section"] == "experience":
|
||||||
|
for entry_diff in (sec.get("entries") or []):
|
||||||
|
key = f"{entry_diff['title']}|{entry_diff['company']}"
|
||||||
|
if not exp_decisions.get(key, True):
|
||||||
|
for exp_entry in (struct.get("experience") or []):
|
||||||
|
if (exp_entry.get("title") == entry_diff["title"] and
|
||||||
|
exp_entry.get("company") == entry_diff["company"]):
|
||||||
|
exp_entry["bullets"] = entry_diff["original_bullets"]
|
||||||
|
break
|
||||||
|
|
||||||
|
return struct
|
||||||
|
|
||||||
|
|
||||||
|
# ── Hallucination guard ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
|
||||||
|
"""Return True if the rewrite is safe (no fabricated facts detected).
|
||||||
|
|
||||||
|
Checks that the set of employers, job titles, and date ranges in the
|
||||||
|
rewritten resume is a subset of those in the original. Any new entry
|
||||||
|
signals hallucination.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
original: Structured resume dict before rewrite.
|
||||||
|
rewritten: Structured resume dict after rewrite.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True → rewrite is safe to use
|
||||||
|
False → hallucination detected; caller should fall back to original
|
||||||
|
"""
|
||||||
|
orig_anchors = _extract_anchors(original)
|
||||||
|
rewrite_anchors = _extract_anchors(rewritten)
|
||||||
|
|
||||||
|
new_anchors = rewrite_anchors - orig_anchors
|
||||||
|
if new_anchors:
|
||||||
|
log.warning(
|
||||||
|
"[resume_optimizer] hallucination_check FAILED — new anchors in rewrite: %s",
|
||||||
|
new_anchors,
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_anchors(resume: dict[str, Any]) -> frozenset[str]:
|
||||||
|
"""Extract stable factual anchors (company, title, dates) from experience entries."""
|
||||||
|
anchors: set[str] = set()
|
||||||
|
for exp in resume.get("experience", []):
|
||||||
|
for field in ("company", "title", "start_date", "end_date"):
|
||||||
|
val = (exp.get(field) or "").strip().lower()
|
||||||
|
if val:
|
||||||
|
anchors.add(val)
|
||||||
|
for edu in resume.get("education", []):
|
||||||
|
val = (edu.get("institution") or "").strip().lower()
|
||||||
|
if val:
|
||||||
|
anchors.add(val)
|
||||||
|
return frozenset(anchors)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Resume → plain text renderer ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
def render_resume_text(resume: dict[str, Any]) -> str:
|
||||||
|
"""Render a structured resume dict back to formatted plain text for PDF export."""
|
||||||
|
lines: list[str] = []
|
||||||
|
|
||||||
|
contact_parts = [resume.get("name", ""), resume.get("email", ""), resume.get("phone", "")]
|
||||||
|
lines.append(" ".join(p for p in contact_parts if p))
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("career_summary"):
|
||||||
|
lines.append("SUMMARY")
|
||||||
|
lines.append(resume["career_summary"])
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("experience"):
|
||||||
|
lines.append("EXPERIENCE")
|
||||||
|
for exp in resume["experience"]:
|
||||||
|
lines.append(
|
||||||
|
f"{exp.get('title', '')} | {exp.get('company', '')} "
|
||||||
|
f"({exp.get('start_date', '')}–{exp.get('end_date', '')})"
|
||||||
|
)
|
||||||
|
for b in exp.get("bullets", []):
|
||||||
|
lines.append(f" • {b}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("education"):
|
||||||
|
lines.append("EDUCATION")
|
||||||
|
for edu in resume["education"]:
|
||||||
|
lines.append(
|
||||||
|
f"{edu.get('degree', '')} {edu.get('field', '')} | "
|
||||||
|
f"{edu.get('institution', '')} {edu.get('graduation_year', '')}"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("skills"):
|
||||||
|
lines.append("SKILLS")
|
||||||
|
lines.append(", ".join(resume["skills"]))
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if resume.get("achievements"):
|
||||||
|
lines.append("ACHIEVEMENTS")
|
||||||
|
for a in resume["achievements"]:
|
||||||
|
lines.append(f" • {a}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Review diff builder ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def build_review_diff(
|
||||||
|
original: dict[str, Any],
|
||||||
|
rewritten: dict[str, Any],
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""Build a structured diff between original and rewritten resume for the review UI.
|
||||||
|
|
||||||
|
Returns a dict with:
|
||||||
|
sections: list of per-section diffs
|
||||||
|
rewritten_struct: the full rewritten resume dict (used by finalize endpoint)
|
||||||
|
|
||||||
|
Each section diff has:
|
||||||
|
section: "skills" | "summary" | "experience"
|
||||||
|
type: "skills_diff" | "text_diff" | "bullets_diff"
|
||||||
|
For skills_diff:
|
||||||
|
added: list of new skill strings (each requires user approval)
|
||||||
|
removed: list of removed skill strings
|
||||||
|
kept: list of unchanged skills
|
||||||
|
For text_diff (summary):
|
||||||
|
original: str
|
||||||
|
proposed: str
|
||||||
|
For bullets_diff (experience):
|
||||||
|
entries: list of {title, company, original_bullets, proposed_bullets}
|
||||||
|
"""
|
||||||
|
sections = []
|
||||||
|
|
||||||
|
# ── Skills diff ────────────────────────────────────────────────────────
|
||||||
|
orig_skills = set(s.strip() for s in (original.get("skills") or []))
|
||||||
|
new_skills = set(s.strip() for s in (rewritten.get("skills") or []))
|
||||||
|
|
||||||
|
added = sorted(new_skills - orig_skills)
|
||||||
|
removed = sorted(orig_skills - new_skills)
|
||||||
|
kept = sorted(orig_skills & new_skills)
|
||||||
|
|
||||||
|
if added or removed:
|
||||||
|
sections.append({
|
||||||
|
"section": "skills",
|
||||||
|
"type": "skills_diff",
|
||||||
|
"added": added,
|
||||||
|
"removed": removed,
|
||||||
|
"kept": kept,
|
||||||
|
})
|
||||||
|
|
||||||
|
# ── Summary diff ───────────────────────────────────────────────────────
|
||||||
|
orig_summary = (original.get("career_summary") or "").strip()
|
||||||
|
new_summary = (rewritten.get("career_summary") or "").strip()
|
||||||
|
|
||||||
|
if orig_summary != new_summary and new_summary:
|
||||||
|
sections.append({
|
||||||
|
"section": "summary",
|
||||||
|
"type": "text_diff",
|
||||||
|
"original": orig_summary,
|
||||||
|
"proposed": new_summary,
|
||||||
|
})
|
||||||
|
|
||||||
|
# ── Experience diff ────────────────────────────────────────────────────
|
||||||
|
orig_exp = original.get("experience") or []
|
||||||
|
new_exp = rewritten.get("experience") or []
|
||||||
|
|
||||||
|
entry_diffs = []
|
||||||
|
for orig_entry, new_entry in zip(orig_exp, new_exp):
|
||||||
|
orig_bullets = orig_entry.get("bullets") or []
|
||||||
|
new_bullets = new_entry.get("bullets") or []
|
||||||
|
if orig_bullets != new_bullets:
|
||||||
|
entry_diffs.append({
|
||||||
|
"title": orig_entry.get("title", ""),
|
||||||
|
"company": orig_entry.get("company", ""),
|
||||||
|
"original_bullets": orig_bullets,
|
||||||
|
"proposed_bullets": new_bullets,
|
||||||
|
})
|
||||||
|
|
||||||
|
if entry_diffs:
|
||||||
|
sections.append({
|
||||||
|
"section": "experience",
|
||||||
|
"type": "bullets_diff",
|
||||||
|
"entries": entry_diffs,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"sections": sections,
|
||||||
|
"rewritten_struct": rewritten,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── PDF export ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def export_pdf(resume: dict[str, Any], output_path: str) -> None:
|
||||||
|
"""Render a structured resume dict to a clean PDF using reportlab.
|
||||||
|
|
||||||
|
Uses a single-column layout with section headers, consistent spacing,
|
||||||
|
and a readable sans-serif body font suitable for ATS submission.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
resume: Structured resume dict (same format as resume_parser output).
|
||||||
|
output_path: Absolute path for the output .pdf file.
|
||||||
|
"""
|
||||||
|
from reportlab.lib.pagesizes import LETTER
|
||||||
|
from reportlab.lib.units import inch
|
||||||
|
from reportlab.lib.styles import ParagraphStyle
|
||||||
|
from reportlab.lib.enums import TA_CENTER, TA_LEFT
|
||||||
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
|
||||||
|
from reportlab.lib import colors
|
||||||
|
|
||||||
|
MARGIN = 0.75 * inch
|
||||||
|
|
||||||
|
name_style = ParagraphStyle(
|
||||||
|
"name", fontName="Helvetica-Bold", fontSize=16, leading=20,
|
||||||
|
alignment=TA_CENTER, spaceAfter=2,
|
||||||
|
)
|
||||||
|
contact_style = ParagraphStyle(
|
||||||
|
"contact", fontName="Helvetica", fontSize=9, leading=12,
|
||||||
|
alignment=TA_CENTER, spaceAfter=6,
|
||||||
|
textColor=colors.HexColor("#555555"),
|
||||||
|
)
|
||||||
|
section_style = ParagraphStyle(
|
||||||
|
"section", fontName="Helvetica-Bold", fontSize=10, leading=14,
|
||||||
|
spaceBefore=10, spaceAfter=2,
|
||||||
|
textColor=colors.HexColor("#1a1a2e"),
|
||||||
|
)
|
||||||
|
body_style = ParagraphStyle(
|
||||||
|
"body", fontName="Helvetica", fontSize=9, leading=13, alignment=TA_LEFT,
|
||||||
|
)
|
||||||
|
role_style = ParagraphStyle(
|
||||||
|
"role", fontName="Helvetica-Bold", fontSize=9, leading=13,
|
||||||
|
)
|
||||||
|
meta_style = ParagraphStyle(
|
||||||
|
"meta", fontName="Helvetica-Oblique", fontSize=8, leading=12,
|
||||||
|
textColor=colors.HexColor("#555555"), spaceAfter=2,
|
||||||
|
)
|
||||||
|
bullet_style = ParagraphStyle(
|
||||||
|
"bullet", fontName="Helvetica", fontSize=9, leading=13, leftIndent=12,
|
||||||
|
)
|
||||||
|
|
||||||
|
def hr():
|
||||||
|
return HRFlowable(width="100%", thickness=0.5,
|
||||||
|
color=colors.HexColor("#cccccc"),
|
||||||
|
spaceAfter=4, spaceBefore=2)
|
||||||
|
|
||||||
|
story = []
|
||||||
|
|
||||||
|
if resume.get("name"):
|
||||||
|
story.append(Paragraph(resume["name"], name_style))
|
||||||
|
|
||||||
|
contact_parts = [p for p in (
|
||||||
|
resume.get("email", ""), resume.get("phone", ""),
|
||||||
|
resume.get("location", ""), resume.get("linkedin", ""),
|
||||||
|
) if p]
|
||||||
|
if contact_parts:
|
||||||
|
story.append(Paragraph(" | ".join(contact_parts), contact_style))
|
||||||
|
|
||||||
|
story.append(hr())
|
||||||
|
|
||||||
|
summary = (resume.get("career_summary") or "").strip()
|
||||||
|
if summary:
|
||||||
|
story.append(Paragraph("SUMMARY", section_style))
|
||||||
|
story.append(hr())
|
||||||
|
story.append(Paragraph(summary, body_style))
|
||||||
|
story.append(Spacer(1, 4))
|
||||||
|
|
||||||
|
if resume.get("experience"):
|
||||||
|
story.append(Paragraph("EXPERIENCE", section_style))
|
||||||
|
story.append(hr())
|
||||||
|
for exp in resume["experience"]:
|
||||||
|
dates = f"{exp.get('start_date', '')}–{exp.get('end_date', '')}"
|
||||||
|
story.append(Paragraph(
|
||||||
|
f"{exp.get('title', '')} | {exp.get('company', '')}", role_style
|
||||||
|
))
|
||||||
|
story.append(Paragraph(dates, meta_style))
|
||||||
|
for bullet in (exp.get("bullets") or []):
|
||||||
|
story.append(Paragraph(f"• {bullet}", bullet_style))
|
||||||
|
story.append(Spacer(1, 4))
|
||||||
|
|
||||||
|
if resume.get("education"):
|
||||||
|
story.append(Paragraph("EDUCATION", section_style))
|
||||||
|
story.append(hr())
|
||||||
|
for edu in resume["education"]:
|
||||||
|
degree = f"{edu.get('degree', '')} {edu.get('field', '')}".strip()
|
||||||
|
story.append(Paragraph(
|
||||||
|
f"{degree} | {edu.get('institution', '')} {edu.get('graduation_year', '')}".strip(),
|
||||||
|
body_style,
|
||||||
|
))
|
||||||
|
story.append(Spacer(1, 4))
|
||||||
|
|
||||||
|
if resume.get("skills"):
|
||||||
|
story.append(Paragraph("SKILLS", section_style))
|
||||||
|
story.append(hr())
|
||||||
|
story.append(Paragraph(", ".join(resume["skills"]), body_style))
|
||||||
|
story.append(Spacer(1, 4))
|
||||||
|
|
||||||
|
if resume.get("achievements"):
|
||||||
|
story.append(Paragraph("ACHIEVEMENTS", section_style))
|
||||||
|
story.append(hr())
|
||||||
|
for a in resume["achievements"]:
|
||||||
|
story.append(Paragraph(f"• {a}", bullet_style))
|
||||||
|
|
||||||
|
doc = SimpleDocTemplate(
|
||||||
|
output_path, pagesize=LETTER,
|
||||||
|
leftMargin=MARGIN, rightMargin=MARGIN,
|
||||||
|
topMargin=MARGIN, bottomMargin=MARGIN,
|
||||||
|
)
|
||||||
|
doc.build(story)
|
||||||
217
scripts/resume_sync.py
Normal file
217
scripts/resume_sync.py
Normal file
|
|
@ -0,0 +1,217 @@
|
||||||
|
"""
|
||||||
|
Resume format transform — library ↔ profile.
|
||||||
|
|
||||||
|
Converts between:
|
||||||
|
- Library format: struct_json produced by resume_parser.parse_resume()
|
||||||
|
{name, email, phone, career_summary, experience[{title,company,start_date,end_date,location,bullets[]}],
|
||||||
|
education[{institution,degree,field,start_date,end_date}], skills[], achievements[]}
|
||||||
|
- Profile content format: ResumePayload content fields (plain_text_resume.yaml)
|
||||||
|
{name, surname, email, phone, career_summary,
|
||||||
|
experience[{title,company,period,location,industry,responsibilities,skills[]}],
|
||||||
|
education[{institution,degree,field,start_date,end_date}],
|
||||||
|
skills[], achievements[]}
|
||||||
|
|
||||||
|
Profile metadata fields (salary, work prefs, self-ID, PII) are never touched here.
|
||||||
|
|
||||||
|
License: MIT
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
_CONTENT_FIELDS = frozenset({
|
||||||
|
"name", "surname", "email", "phone", "career_summary",
|
||||||
|
"experience", "skills", "education", "achievements",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def library_to_profile_content(struct_json: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""Transform a library struct_json to ResumePayload content fields.
|
||||||
|
|
||||||
|
Returns only content fields. Caller is responsible for merging with existing
|
||||||
|
metadata fields (salary, preferences, self-ID) so they are not overwritten.
|
||||||
|
|
||||||
|
Lossy for experience[].industry (always blank — parser does not capture it).
|
||||||
|
name is split on first space into name/surname.
|
||||||
|
"""
|
||||||
|
full_name: str = struct_json.get("name") or ""
|
||||||
|
parts = full_name.split(" ", 1)
|
||||||
|
name = parts[0]
|
||||||
|
surname = parts[1] if len(parts) > 1 else ""
|
||||||
|
|
||||||
|
experience = []
|
||||||
|
for exp in struct_json.get("experience") or []:
|
||||||
|
start = (exp.get("start_date") or "").strip()
|
||||||
|
end = (exp.get("end_date") or "").strip()
|
||||||
|
if start and end:
|
||||||
|
period = f"{start} \u2013 {end}"
|
||||||
|
elif start:
|
||||||
|
period = start
|
||||||
|
elif end:
|
||||||
|
period = end
|
||||||
|
else:
|
||||||
|
period = ""
|
||||||
|
|
||||||
|
bullets: list[str] = exp.get("bullets") or []
|
||||||
|
responsibilities = "\n".join(b for b in bullets if b)
|
||||||
|
|
||||||
|
experience.append({
|
||||||
|
"title": exp.get("title") or "",
|
||||||
|
"company": exp.get("company") or "",
|
||||||
|
"period": period,
|
||||||
|
"location": exp.get("location") or "",
|
||||||
|
"industry": "", # not captured by parser
|
||||||
|
"responsibilities": responsibilities,
|
||||||
|
"skills": [],
|
||||||
|
})
|
||||||
|
|
||||||
|
education = []
|
||||||
|
for edu in struct_json.get("education") or []:
|
||||||
|
education.append({
|
||||||
|
"institution": edu.get("institution") or "",
|
||||||
|
"degree": edu.get("degree") or "",
|
||||||
|
"field": edu.get("field") or "",
|
||||||
|
"start_date": edu.get("start_date") or "",
|
||||||
|
"end_date": edu.get("end_date") or "",
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"name": name,
|
||||||
|
"surname": surname,
|
||||||
|
"email": struct_json.get("email") or "",
|
||||||
|
"phone": struct_json.get("phone") or "",
|
||||||
|
"career_summary": struct_json.get("career_summary") or "",
|
||||||
|
"experience": experience,
|
||||||
|
"skills": list(struct_json.get("skills") or []),
|
||||||
|
"education": education,
|
||||||
|
"achievements": list(struct_json.get("achievements") or []),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def profile_to_library(payload: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||||
|
"""Transform ResumePayload content fields to (plain_text, struct_json).
|
||||||
|
|
||||||
|
Inverse of library_to_profile_content. The plain_text is a best-effort
|
||||||
|
reconstruction for display and re-parsing. struct_json is the canonical
|
||||||
|
structured representation stored in the resumes table.
|
||||||
|
"""
|
||||||
|
name_parts = [payload.get("name") or "", payload.get("surname") or ""]
|
||||||
|
full_name = " ".join(p for p in name_parts if p).strip()
|
||||||
|
|
||||||
|
career_summary = (payload.get("career_summary") or "").strip()
|
||||||
|
|
||||||
|
lines: list[str] = []
|
||||||
|
if full_name:
|
||||||
|
lines.append(full_name)
|
||||||
|
email = payload.get("email") or ""
|
||||||
|
phone = payload.get("phone") or ""
|
||||||
|
if email:
|
||||||
|
lines.append(email)
|
||||||
|
if phone:
|
||||||
|
lines.append(phone)
|
||||||
|
|
||||||
|
if career_summary:
|
||||||
|
lines += ["", "SUMMARY", career_summary]
|
||||||
|
|
||||||
|
experience_structs = []
|
||||||
|
for exp in payload.get("experience") or []:
|
||||||
|
title = (exp.get("title") or "").strip()
|
||||||
|
company = (exp.get("company") or "").strip()
|
||||||
|
period = (exp.get("period") or "").strip()
|
||||||
|
location = (exp.get("location") or "").strip()
|
||||||
|
|
||||||
|
# Split period back to start_date / end_date.
|
||||||
|
# Split on the dash/dash separator BEFORE normalising to plain hyphens
|
||||||
|
# so that ISO dates like "2023-01 – 2025-03" round-trip correctly.
|
||||||
|
if "\u2013" in period: # en-dash
|
||||||
|
date_parts = [p.strip() for p in period.split("\u2013", 1)]
|
||||||
|
elif "\u2014" in period: # em-dash
|
||||||
|
date_parts = [p.strip() for p in period.split("\u2014", 1)]
|
||||||
|
else:
|
||||||
|
date_parts = [period.strip()] if period.strip() else []
|
||||||
|
start_date = date_parts[0] if date_parts else ""
|
||||||
|
end_date = date_parts[1] if len(date_parts) > 1 else ""
|
||||||
|
|
||||||
|
resp = (exp.get("responsibilities") or "").strip()
|
||||||
|
bullets = [b.strip() for b in resp.split("\n") if b.strip()]
|
||||||
|
|
||||||
|
if title or company:
|
||||||
|
header = " | ".join(p for p in [title, company, period] if p)
|
||||||
|
lines += ["", header]
|
||||||
|
if location:
|
||||||
|
lines.append(location)
|
||||||
|
for b in bullets:
|
||||||
|
lines.append(f"\u2022 {b}")
|
||||||
|
|
||||||
|
experience_structs.append({
|
||||||
|
"title": title,
|
||||||
|
"company": company,
|
||||||
|
"start_date": start_date,
|
||||||
|
"end_date": end_date,
|
||||||
|
"location": location,
|
||||||
|
"bullets": bullets,
|
||||||
|
})
|
||||||
|
|
||||||
|
skills: list[str] = list(payload.get("skills") or [])
|
||||||
|
if skills:
|
||||||
|
lines += ["", "SKILLS", ", ".join(skills)]
|
||||||
|
|
||||||
|
education_structs = []
|
||||||
|
for edu in payload.get("education") or []:
|
||||||
|
institution = (edu.get("institution") or "").strip()
|
||||||
|
degree = (edu.get("degree") or "").strip()
|
||||||
|
field = (edu.get("field") or "").strip()
|
||||||
|
start_date = (edu.get("start_date") or "").strip()
|
||||||
|
end_date = (edu.get("end_date") or "").strip()
|
||||||
|
if institution or degree:
|
||||||
|
label = " ".join(p for p in [degree, field] if p)
|
||||||
|
lines.append(f"{label} \u2014 {institution}" if institution else label)
|
||||||
|
education_structs.append({
|
||||||
|
"institution": institution,
|
||||||
|
"degree": degree,
|
||||||
|
"field": field,
|
||||||
|
"start_date": start_date,
|
||||||
|
"end_date": end_date,
|
||||||
|
})
|
||||||
|
|
||||||
|
achievements: list[str] = list(payload.get("achievements") or [])
|
||||||
|
|
||||||
|
struct_json: dict[str, Any] = {
|
||||||
|
"name": full_name,
|
||||||
|
"email": email,
|
||||||
|
"phone": phone,
|
||||||
|
"career_summary": career_summary,
|
||||||
|
"experience": experience_structs,
|
||||||
|
"skills": skills,
|
||||||
|
"education": education_structs,
|
||||||
|
"achievements": achievements,
|
||||||
|
}
|
||||||
|
|
||||||
|
plain_text = "\n".join(lines).strip()
|
||||||
|
return plain_text, struct_json
|
||||||
|
|
||||||
|
|
||||||
|
def make_auto_backup_name(source_name: str) -> str:
|
||||||
|
"""Generate a timestamped auto-backup name.
|
||||||
|
|
||||||
|
Example: "Auto-backup before Senior Engineer Resume — 2026-04-16"
|
||||||
|
"""
|
||||||
|
today = date.today().isoformat()
|
||||||
|
return f"Auto-backup before {source_name} \u2014 {today}"
|
||||||
|
|
||||||
|
|
||||||
|
def blank_fields_on_import(struct_json: dict[str, Any]) -> list[str]:
|
||||||
|
"""Return content field names that will be blank after a library→profile import.
|
||||||
|
|
||||||
|
Used to warn the user in the confirmation modal so they know what to fill in.
|
||||||
|
"""
|
||||||
|
blank: list[str] = []
|
||||||
|
if struct_json.get("experience"):
|
||||||
|
# industry is always blank — parser never captures it
|
||||||
|
blank.append("experience[].industry")
|
||||||
|
# location may be blank for some entries
|
||||||
|
if any(not (e.get("location") or "").strip() for e in struct_json["experience"]):
|
||||||
|
blank.append("experience[].location")
|
||||||
|
return blank
|
||||||
|
|
@ -9,10 +9,68 @@ and marks the task completed or failed.
|
||||||
Deduplication: only one queued/running task per (task_type, job_id) is allowed.
|
Deduplication: only one queued/running task per (task_type, job_id) is allowed.
|
||||||
Different task types for the same job run concurrently (e.g. cover letter + research).
|
Different task types for the same job run concurrently (e.g. cover letter + research).
|
||||||
"""
|
"""
|
||||||
|
import logging
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import threading
|
import threading
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_aihawk_resume(raw: dict) -> dict:
|
||||||
|
"""Convert a plain_text_resume.yaml (AIHawk format) into the optimizer struct.
|
||||||
|
|
||||||
|
Handles two AIHawk variants:
|
||||||
|
- Newer Peregrine wizard output: already uses bullets/start_date/end_date/career_summary
|
||||||
|
- Older raw AIHawk format: uses responsibilities (str), period ("YYYY – Present")
|
||||||
|
"""
|
||||||
|
import re as _re
|
||||||
|
|
||||||
|
def _split_responsibilities(text: str) -> list[str]:
|
||||||
|
lines = [ln.strip() for ln in text.strip().splitlines() if ln.strip()]
|
||||||
|
return lines if lines else [text.strip()]
|
||||||
|
|
||||||
|
def _parse_period(period: str) -> tuple[str, str]:
|
||||||
|
parts = _re.split(r"\s*[–—-]\s*", period, maxsplit=1)
|
||||||
|
start = parts[0].strip() if parts else ""
|
||||||
|
end = parts[1].strip() if len(parts) > 1 else "Present"
|
||||||
|
return start, end
|
||||||
|
|
||||||
|
experience = []
|
||||||
|
for entry in raw.get("experience", []):
|
||||||
|
if "responsibilities" in entry:
|
||||||
|
bullets = _split_responsibilities(entry["responsibilities"])
|
||||||
|
else:
|
||||||
|
bullets = entry.get("bullets", [])
|
||||||
|
|
||||||
|
if "period" in entry:
|
||||||
|
start_date, end_date = _parse_period(entry["period"])
|
||||||
|
else:
|
||||||
|
start_date = entry.get("start_date", "")
|
||||||
|
end_date = entry.get("end_date", "Present")
|
||||||
|
|
||||||
|
experience.append({
|
||||||
|
"title": entry.get("title", ""),
|
||||||
|
"company": entry.get("company", ""),
|
||||||
|
"start_date": start_date,
|
||||||
|
"end_date": end_date,
|
||||||
|
"bullets": bullets,
|
||||||
|
})
|
||||||
|
|
||||||
|
# career_summary may be a string or absent; assessment field is a legacy bool in some profiles
|
||||||
|
career_summary = raw.get("career_summary", "")
|
||||||
|
if not isinstance(career_summary, str):
|
||||||
|
career_summary = ""
|
||||||
|
|
||||||
|
return {
|
||||||
|
"career_summary": career_summary,
|
||||||
|
"experience": experience,
|
||||||
|
"education": raw.get("education", []),
|
||||||
|
"skills": raw.get("skills", []),
|
||||||
|
"achievements": raw.get("achievements", []),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
from scripts.db import (
|
from scripts.db import (
|
||||||
DEFAULT_DB,
|
DEFAULT_DB,
|
||||||
insert_task,
|
insert_task,
|
||||||
|
|
@ -20,6 +78,7 @@ from scripts.db import (
|
||||||
update_task_stage,
|
update_task_stage,
|
||||||
update_cover_letter,
|
update_cover_letter,
|
||||||
save_research,
|
save_research,
|
||||||
|
save_optimized_resume,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -39,9 +98,13 @@ def submit_task(db_path: Path = DEFAULT_DB, task_type: str = "",
|
||||||
if is_new:
|
if is_new:
|
||||||
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
|
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
|
||||||
if task_type in LLM_TASK_TYPES:
|
if task_type in LLM_TASK_TYPES:
|
||||||
get_scheduler(db_path, run_task_fn=_run_task).enqueue(
|
enqueued = get_scheduler(db_path, run_task_fn=_run_task).enqueue(
|
||||||
task_id, task_type, job_id or 0, params
|
task_id, task_type, job_id or 0, params
|
||||||
)
|
)
|
||||||
|
if not enqueued:
|
||||||
|
update_task_status(
|
||||||
|
db_path, task_id, "failed", error="Queue depth limit reached"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
t = threading.Thread(
|
t = threading.Thread(
|
||||||
target=_run_task,
|
target=_run_task,
|
||||||
|
|
@ -158,7 +221,8 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
from scripts.discover import run_discovery
|
from scripts.discover import run_discovery
|
||||||
new_count = run_discovery(db_path)
|
from pathlib import Path as _Path
|
||||||
|
new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config")
|
||||||
n = new_count or 0
|
n = new_count or 0
|
||||||
update_task_status(
|
update_task_status(
|
||||||
db_path, task_id, "completed",
|
db_path, task_id, "completed",
|
||||||
|
|
@ -170,6 +234,9 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
||||||
import json as _json
|
import json as _json
|
||||||
p = _json.loads(params or "{}")
|
p = _json.loads(params or "{}")
|
||||||
from scripts.generate_cover_letter import generate
|
from scripts.generate_cover_letter import generate
|
||||||
|
_cfg_dir = Path(db_path).parent / "config"
|
||||||
|
_user_llm_cfg = _cfg_dir / "llm.yaml"
|
||||||
|
_user_yaml = _cfg_dir / "user.yaml"
|
||||||
result = generate(
|
result = generate(
|
||||||
job.get("title", ""),
|
job.get("title", ""),
|
||||||
job.get("company", ""),
|
job.get("company", ""),
|
||||||
|
|
@ -177,14 +244,19 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
||||||
previous_result=p.get("previous_result", ""),
|
previous_result=p.get("previous_result", ""),
|
||||||
feedback=p.get("feedback", ""),
|
feedback=p.get("feedback", ""),
|
||||||
is_jobgether=job.get("source") == "jobgether",
|
is_jobgether=job.get("source") == "jobgether",
|
||||||
|
config_path=_user_llm_cfg,
|
||||||
|
user_yaml_path=_user_yaml,
|
||||||
)
|
)
|
||||||
update_cover_letter(db_path, job_id, result)
|
update_cover_letter(db_path, job_id, result)
|
||||||
|
|
||||||
elif task_type == "company_research":
|
elif task_type == "company_research":
|
||||||
from scripts.company_research import research_company
|
from scripts.company_research import research_company
|
||||||
|
_cfg_dir = Path(db_path).parent / "config"
|
||||||
|
_user_llm_cfg = _cfg_dir / "llm.yaml"
|
||||||
result = research_company(
|
result = research_company(
|
||||||
job,
|
job,
|
||||||
on_stage=lambda s: update_task_stage(db_path, task_id, s),
|
on_stage=lambda s: update_task_stage(db_path, task_id, s),
|
||||||
|
config_path=_user_llm_cfg if _user_llm_cfg.exists() else None,
|
||||||
)
|
)
|
||||||
save_research(db_path, job_id=job_id, **result)
|
save_research(db_path, job_id=job_id, **result)
|
||||||
|
|
||||||
|
|
@ -261,6 +333,77 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
elif task_type == "resume_optimize":
|
||||||
|
import json as _json
|
||||||
|
from scripts.resume_parser import structure_resume
|
||||||
|
from scripts.resume_optimizer import (
|
||||||
|
extract_jd_signals,
|
||||||
|
prioritize_gaps,
|
||||||
|
rewrite_for_ats,
|
||||||
|
hallucination_check,
|
||||||
|
render_resume_text,
|
||||||
|
)
|
||||||
|
from scripts.user_profile import load_user_profile
|
||||||
|
|
||||||
|
_user_yaml = Path(db_path).parent / "config" / "user.yaml"
|
||||||
|
description = job.get("description", "")
|
||||||
|
resume_path = load_user_profile(str(_user_yaml)).get("resume_path", "")
|
||||||
|
|
||||||
|
# Parse the candidate's resume
|
||||||
|
update_task_stage(db_path, task_id, "parsing resume")
|
||||||
|
_plain_yaml = Path(db_path).parent / "config" / "plain_text_resume.yaml"
|
||||||
|
if resume_path and Path(resume_path).exists():
|
||||||
|
resume_text = Path(resume_path).read_text(errors="replace")
|
||||||
|
resume_struct, parse_err = structure_resume(resume_text)
|
||||||
|
elif _plain_yaml.exists():
|
||||||
|
import yaml as _yaml
|
||||||
|
_raw = _yaml.safe_load(_plain_yaml.read_text(encoding="utf-8")) or {}
|
||||||
|
resume_struct = _normalize_aihawk_resume(_raw)
|
||||||
|
resume_text = resume_struct.get("career_summary", "")
|
||||||
|
parse_err = ""
|
||||||
|
else:
|
||||||
|
resume_text = ""
|
||||||
|
resume_struct, parse_err = structure_resume("")
|
||||||
|
|
||||||
|
# Extract keyword gaps and build gap report (free tier)
|
||||||
|
update_task_stage(db_path, task_id, "extracting keyword gaps")
|
||||||
|
gaps = extract_jd_signals(description, resume_text)
|
||||||
|
prioritized = prioritize_gaps(gaps, resume_struct)
|
||||||
|
gap_report = _json.dumps(prioritized, indent=2)
|
||||||
|
|
||||||
|
# Full rewrite (paid tier only) → enters awaiting_review, not completed
|
||||||
|
p = _json.loads(params or "{}")
|
||||||
|
selected_gaps = p.get("selected_gaps", None)
|
||||||
|
if selected_gaps is not None:
|
||||||
|
selected_set = set(selected_gaps)
|
||||||
|
prioritized = [g for g in prioritized if g.get("term") in selected_set]
|
||||||
|
if p.get("full_rewrite", False):
|
||||||
|
update_task_stage(db_path, task_id, "rewriting resume sections")
|
||||||
|
candidate_voice = load_user_profile(str(_user_yaml)).get("candidate_voice", "")
|
||||||
|
rewritten = rewrite_for_ats(resume_struct, prioritized, job, candidate_voice)
|
||||||
|
if hallucination_check(resume_struct, rewritten):
|
||||||
|
from scripts.resume_optimizer import build_review_diff
|
||||||
|
from scripts.db import save_resume_draft
|
||||||
|
draft = build_review_diff(resume_struct, rewritten)
|
||||||
|
# Attach gap report to draft for reference in the review UI
|
||||||
|
draft["gap_report"] = prioritized
|
||||||
|
save_resume_draft(db_path, job_id=job_id,
|
||||||
|
draft_json=_json.dumps(draft))
|
||||||
|
# Save gap report now; final text written after user review
|
||||||
|
save_optimized_resume(db_path, job_id=job_id,
|
||||||
|
text="", gap_report=gap_report)
|
||||||
|
# Park task in awaiting_review — finalize endpoint resolves it
|
||||||
|
update_task_status(db_path, task_id, "awaiting_review")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
log.warning("[task_runner] resume_optimize hallucination check failed for job %d", job_id)
|
||||||
|
save_optimized_resume(db_path, job_id=job_id,
|
||||||
|
text="", gap_report=gap_report)
|
||||||
|
else:
|
||||||
|
# Gap-only run (free tier): save report, no draft
|
||||||
|
save_optimized_resume(db_path, job_id=job_id,
|
||||||
|
text="", gap_report=gap_report)
|
||||||
|
|
||||||
elif task_type == "prepare_training":
|
elif task_type == "prepare_training":
|
||||||
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
|
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
|
||||||
records = build_records()
|
records = build_records()
|
||||||
|
|
|
||||||
|
|
@ -1,232 +1,167 @@
|
||||||
# scripts/task_scheduler.py
|
# scripts/task_scheduler.py
|
||||||
"""Resource-aware batch scheduler for LLM background tasks.
|
"""Peregrine LLM task scheduler — thin shim over circuitforge_core.tasks.scheduler.
|
||||||
|
|
||||||
Routes LLM task types through per-type deques with VRAM-aware scheduling.
|
All scheduling logic lives in circuitforge_core. This module defines
|
||||||
Non-LLM tasks bypass this module — routing lives in scripts/task_runner.py.
|
Peregrine-specific task types, VRAM budgets, and config loading.
|
||||||
|
|
||||||
Public API:
|
Public API (unchanged — callers do not need to change):
|
||||||
LLM_TASK_TYPES — set of task type strings routed through the scheduler
|
LLM_TASK_TYPES — frozenset of task type strings routed through the scheduler
|
||||||
get_scheduler() — lazy singleton accessor
|
DEFAULT_VRAM_BUDGETS — dict of conservative peak VRAM estimates per task type
|
||||||
|
TaskSpec — lightweight task descriptor (re-exported from core)
|
||||||
|
TaskScheduler — backward-compatible wrapper around the core scheduler class
|
||||||
|
get_scheduler() — returns the process-level TaskScheduler singleton
|
||||||
reset_scheduler() — test teardown only
|
reset_scheduler() — test teardown only
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import sqlite3
|
import os
|
||||||
import threading
|
import threading
|
||||||
from collections import deque, namedtuple
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
# Module-level import so tests can monkeypatch scripts.task_scheduler._get_gpus
|
from circuitforge_core.tasks.scheduler import (
|
||||||
try:
|
TaskSpec, # re-export unchanged
|
||||||
from scripts.preflight import get_gpus as _get_gpus
|
LocalScheduler as _CoreTaskScheduler,
|
||||||
except Exception: # graceful degradation if preflight unavailable
|
)
|
||||||
_get_gpus = lambda: []
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Task types that go through the scheduler (all others spawn free threads)
|
# ── Peregrine task types and VRAM budgets ─────────────────────────────────────
|
||||||
|
|
||||||
LLM_TASK_TYPES: frozenset[str] = frozenset({
|
LLM_TASK_TYPES: frozenset[str] = frozenset({
|
||||||
"cover_letter",
|
"cover_letter",
|
||||||
"company_research",
|
"company_research",
|
||||||
"wizard_generate",
|
"wizard_generate",
|
||||||
|
"resume_optimize",
|
||||||
})
|
})
|
||||||
|
|
||||||
# Conservative peak VRAM estimates (GB) per task type.
|
# Conservative peak VRAM estimates (GB) per task type.
|
||||||
# Overridable per-install via scheduler.vram_budgets in config/llm.yaml.
|
# Overridable per-install via scheduler.vram_budgets in config/llm.yaml.
|
||||||
DEFAULT_VRAM_BUDGETS: dict[str, float] = {
|
DEFAULT_VRAM_BUDGETS: dict[str, float] = {
|
||||||
"cover_letter": 2.5, # alex-cover-writer:latest (~2GB GGUF + headroom)
|
"cover_letter": 2.5, # alex-cover-writer:latest (~2 GB GGUF + headroom)
|
||||||
"company_research": 5.0, # llama3.1:8b or vllm model
|
"company_research": 5.0, # llama3.1:8b or vllm model
|
||||||
"wizard_generate": 2.5, # same model family as cover_letter
|
"wizard_generate": 2.5, # same model family as cover_letter
|
||||||
|
"resume_optimize": 5.0, # section-by-section rewrite; same budget as research
|
||||||
}
|
}
|
||||||
|
|
||||||
# Lightweight task descriptor stored in per-type deques
|
_DEFAULT_MAX_QUEUE_DEPTH = 500
|
||||||
TaskSpec = namedtuple("TaskSpec", ["id", "job_id", "params"])
|
|
||||||
|
|
||||||
|
|
||||||
class TaskScheduler:
|
def _load_config_overrides(db_path: Path) -> tuple[dict[str, float], int]:
|
||||||
"""Resource-aware LLM task batch scheduler. Use get_scheduler() — not direct construction."""
|
"""Load VRAM budget overrides and max_queue_depth from config/llm.yaml."""
|
||||||
|
budgets = dict(DEFAULT_VRAM_BUDGETS)
|
||||||
def __init__(self, db_path: Path, run_task_fn: Callable) -> None:
|
max_depth = _DEFAULT_MAX_QUEUE_DEPTH
|
||||||
self._db_path = db_path
|
|
||||||
self._run_task = run_task_fn
|
|
||||||
|
|
||||||
self._lock = threading.Lock()
|
|
||||||
self._wake = threading.Event()
|
|
||||||
self._stop = threading.Event()
|
|
||||||
self._queues: dict[str, deque] = {}
|
|
||||||
self._active: dict[str, threading.Thread] = {}
|
|
||||||
self._reserved_vram: float = 0.0
|
|
||||||
self._thread: Optional[threading.Thread] = None
|
|
||||||
|
|
||||||
# Load VRAM budgets: defaults + optional config overrides
|
|
||||||
self._budgets: dict[str, float] = dict(DEFAULT_VRAM_BUDGETS)
|
|
||||||
config_path = db_path.parent.parent / "config" / "llm.yaml"
|
config_path = db_path.parent.parent / "config" / "llm.yaml"
|
||||||
self._max_queue_depth: int = 500
|
|
||||||
if config_path.exists():
|
if config_path.exists():
|
||||||
try:
|
try:
|
||||||
import yaml
|
import yaml
|
||||||
with open(config_path) as f:
|
with open(config_path) as f:
|
||||||
cfg = yaml.safe_load(f) or {}
|
cfg = yaml.safe_load(f) or {}
|
||||||
sched_cfg = cfg.get("scheduler", {})
|
sched_cfg = cfg.get("scheduler", {})
|
||||||
self._budgets.update(sched_cfg.get("vram_budgets", {}))
|
budgets.update(sched_cfg.get("vram_budgets", {}))
|
||||||
self._max_queue_depth = sched_cfg.get("max_queue_depth", 500)
|
max_depth = int(sched_cfg.get("max_queue_depth", max_depth))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("Failed to load scheduler config from %s: %s", config_path, exc)
|
logger.warning(
|
||||||
|
"Failed to load scheduler config from %s: %s", config_path, exc
|
||||||
|
)
|
||||||
|
return budgets, max_depth
|
||||||
|
|
||||||
# Warn on LLM types with no budget entry after merge
|
|
||||||
|
# Module-level stub so tests can monkeypatch scripts.task_scheduler._get_gpus
|
||||||
|
# (existing tests monkeypatch this symbol — keep it here for backward compat).
|
||||||
|
try:
|
||||||
|
from scripts.preflight import get_gpus as _get_gpus
|
||||||
|
except Exception:
|
||||||
|
_get_gpus = lambda: [] # noqa: E731
|
||||||
|
|
||||||
|
|
||||||
|
class TaskScheduler(_CoreTaskScheduler):
|
||||||
|
"""Peregrine-specific TaskScheduler.
|
||||||
|
|
||||||
|
Extends circuitforge_core.tasks.scheduler.TaskScheduler with:
|
||||||
|
- Peregrine default VRAM budgets and task types wired into __init__
|
||||||
|
- Config loading from config/llm.yaml
|
||||||
|
- Backward-compatible two-argument __init__ signature (db_path, run_task_fn)
|
||||||
|
- _get_gpus monkeypatch support (existing tests patch this module-level symbol)
|
||||||
|
- Backward-compatible enqueue() that marks dropped tasks failed in the DB
|
||||||
|
and logs under the scripts.task_scheduler logger
|
||||||
|
|
||||||
|
Direct construction is still supported for tests; production code should
|
||||||
|
use get_scheduler() instead.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, db_path: Path, run_task_fn: Callable) -> None:
|
||||||
|
budgets, max_depth = _load_config_overrides(db_path)
|
||||||
|
|
||||||
|
# Warn under this module's logger for any task types with no VRAM budget
|
||||||
|
# (mirrors the core warning but captures under scripts.task_scheduler
|
||||||
|
# so existing tests using caplog.at_level(logger="scripts.task_scheduler") pass)
|
||||||
for t in LLM_TASK_TYPES:
|
for t in LLM_TASK_TYPES:
|
||||||
if t not in self._budgets:
|
if t not in budgets:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"No VRAM budget defined for LLM task type %r — "
|
"No VRAM budget defined for LLM task type %r — "
|
||||||
"defaulting to 0.0 GB (unlimited concurrency for this type)", t
|
"defaulting to 0.0 GB (unlimited concurrency for this type)", t
|
||||||
)
|
)
|
||||||
|
|
||||||
# Detect total GPU VRAM; fall back to unlimited (999) on CPU-only systems.
|
super().__init__(
|
||||||
# Uses module-level _get_gpus so tests can monkeypatch scripts.task_scheduler._get_gpus.
|
db_path=db_path,
|
||||||
try:
|
run_task_fn=run_task_fn,
|
||||||
gpus = _get_gpus()
|
task_types=LLM_TASK_TYPES,
|
||||||
self._available_vram: float = (
|
vram_budgets=budgets,
|
||||||
sum(g["vram_total_gb"] for g in gpus) if gpus else 999.0
|
max_queue_depth=max_depth,
|
||||||
)
|
)
|
||||||
except Exception:
|
|
||||||
self._available_vram = 999.0
|
|
||||||
|
|
||||||
# Durability: reload surviving 'queued' LLM tasks from prior run
|
def enqueue(
|
||||||
self._load_queued_tasks()
|
self,
|
||||||
|
task_id: int,
|
||||||
def enqueue(self, task_id: int, task_type: str, job_id: int,
|
task_type: str,
|
||||||
params: Optional[str]) -> None:
|
job_id: int,
|
||||||
|
params: Optional[str],
|
||||||
|
) -> bool:
|
||||||
"""Add an LLM task to the scheduler queue.
|
"""Add an LLM task to the scheduler queue.
|
||||||
|
|
||||||
If the queue for this type is at max_queue_depth, the task is marked
|
When the queue is full, marks the task failed in SQLite immediately
|
||||||
failed in SQLite immediately (no ghost queued rows) and a warning is logged.
|
(backward-compatible with the original Peregrine behavior) and logs a
|
||||||
"""
|
warning under the scripts.task_scheduler logger.
|
||||||
from scripts.db import update_task_status
|
|
||||||
|
|
||||||
with self._lock:
|
Returns True if enqueued, False if the queue was full.
|
||||||
q = self._queues.setdefault(task_type, deque())
|
"""
|
||||||
if len(q) >= self._max_queue_depth:
|
enqueued = super().enqueue(task_id, task_type, job_id, params)
|
||||||
|
if not enqueued:
|
||||||
|
# Log under this module's logger so existing caplog tests pass
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Queue depth limit reached for %s (max=%d) — task %d dropped",
|
"Queue depth limit reached for %s (max=%d) — task %d dropped",
|
||||||
task_type, self._max_queue_depth, task_id,
|
task_type, self._max_queue_depth, task_id,
|
||||||
)
|
)
|
||||||
update_task_status(self._db_path, task_id, "failed",
|
from scripts.db import update_task_status
|
||||||
error="Queue depth limit reached")
|
update_task_status(
|
||||||
return
|
self._db_path, task_id, "failed", error="Queue depth limit reached"
|
||||||
q.append(TaskSpec(task_id, job_id, params))
|
|
||||||
|
|
||||||
self._wake.set()
|
|
||||||
|
|
||||||
def start(self) -> None:
|
|
||||||
"""Start the background scheduler loop thread. Call once after construction."""
|
|
||||||
self._thread = threading.Thread(
|
|
||||||
target=self._scheduler_loop, name="task-scheduler", daemon=True
|
|
||||||
)
|
)
|
||||||
self._thread.start()
|
return enqueued
|
||||||
|
|
||||||
def shutdown(self, timeout: float = 5.0) -> None:
|
|
||||||
"""Signal the scheduler to stop and wait for it to exit."""
|
|
||||||
self._stop.set()
|
|
||||||
self._wake.set() # unblock any wait()
|
|
||||||
if self._thread and self._thread.is_alive():
|
|
||||||
self._thread.join(timeout=timeout)
|
|
||||||
|
|
||||||
def _scheduler_loop(self) -> None:
|
|
||||||
"""Main scheduler daemon — wakes on enqueue or batch completion."""
|
|
||||||
while not self._stop.is_set():
|
|
||||||
self._wake.wait(timeout=30)
|
|
||||||
self._wake.clear()
|
|
||||||
|
|
||||||
with self._lock:
|
|
||||||
# Defense in depth: reap externally-killed batch threads.
|
|
||||||
# In normal operation _active.pop() runs in finally before _wake fires,
|
|
||||||
# so this reap finds nothing — no double-decrement risk.
|
|
||||||
for t, thread in list(self._active.items()):
|
|
||||||
if not thread.is_alive():
|
|
||||||
self._reserved_vram -= self._budgets.get(t, 0.0)
|
|
||||||
del self._active[t]
|
|
||||||
|
|
||||||
# Start new type batches while VRAM allows
|
|
||||||
candidates = sorted(
|
|
||||||
[t for t in self._queues if self._queues[t] and t not in self._active],
|
|
||||||
key=lambda t: len(self._queues[t]),
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
for task_type in candidates:
|
|
||||||
budget = self._budgets.get(task_type, 0.0)
|
|
||||||
# Always allow at least one batch to run even if its budget
|
|
||||||
# exceeds _available_vram (prevents permanent starvation when
|
|
||||||
# a single type's budget is larger than the VRAM ceiling).
|
|
||||||
if self._reserved_vram == 0.0 or self._reserved_vram + budget <= self._available_vram:
|
|
||||||
thread = threading.Thread(
|
|
||||||
target=self._batch_worker,
|
|
||||||
args=(task_type,),
|
|
||||||
name=f"batch-{task_type}",
|
|
||||||
daemon=True,
|
|
||||||
)
|
|
||||||
self._active[task_type] = thread
|
|
||||||
self._reserved_vram += budget
|
|
||||||
thread.start()
|
|
||||||
|
|
||||||
def _batch_worker(self, task_type: str) -> None:
|
|
||||||
"""Serial consumer for one task type. Runs until the type's deque is empty."""
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
with self._lock:
|
|
||||||
q = self._queues.get(task_type)
|
|
||||||
if not q:
|
|
||||||
break
|
|
||||||
task = q.popleft()
|
|
||||||
# _run_task is scripts.task_runner._run_task (passed at construction)
|
|
||||||
self._run_task(
|
|
||||||
self._db_path, task.id, task_type, task.job_id, task.params
|
|
||||||
)
|
|
||||||
finally:
|
|
||||||
# Always release — even if _run_task raises.
|
|
||||||
# _active.pop here prevents the scheduler loop reap from double-decrementing.
|
|
||||||
with self._lock:
|
|
||||||
self._active.pop(task_type, None)
|
|
||||||
self._reserved_vram -= self._budgets.get(task_type, 0.0)
|
|
||||||
self._wake.set()
|
|
||||||
|
|
||||||
def _load_queued_tasks(self) -> None:
|
|
||||||
"""Load pre-existing queued LLM tasks from SQLite into deques (called once in __init__)."""
|
|
||||||
llm_types = sorted(LLM_TASK_TYPES) # sorted for deterministic SQL params in logs
|
|
||||||
placeholders = ",".join("?" * len(llm_types))
|
|
||||||
conn = sqlite3.connect(self._db_path)
|
|
||||||
rows = conn.execute(
|
|
||||||
f"SELECT id, task_type, job_id, params FROM background_tasks"
|
|
||||||
f" WHERE status='queued' AND task_type IN ({placeholders})"
|
|
||||||
f" ORDER BY created_at ASC",
|
|
||||||
llm_types,
|
|
||||||
).fetchall()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
for row_id, task_type, job_id, params in rows:
|
|
||||||
q = self._queues.setdefault(task_type, deque())
|
|
||||||
q.append(TaskSpec(row_id, job_id, params))
|
|
||||||
|
|
||||||
if rows:
|
|
||||||
logger.info("Scheduler: resumed %d queued task(s) from prior run", len(rows))
|
|
||||||
|
|
||||||
|
|
||||||
# ── Singleton ─────────────────────────────────────────────────────────────────
|
# ── Peregrine-local singleton ──────────────────────────────────────────────────
|
||||||
|
# We manage our own singleton (not the core one) so the process-level instance
|
||||||
|
# is always a Peregrine TaskScheduler (with the enqueue() override).
|
||||||
|
|
||||||
_scheduler: Optional[TaskScheduler] = None
|
_scheduler: Optional[TaskScheduler] = None
|
||||||
_scheduler_lock = threading.Lock()
|
_scheduler_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def get_scheduler(db_path: Path, run_task_fn: Callable = None) -> TaskScheduler:
|
def get_scheduler(
|
||||||
"""Return the process-level TaskScheduler singleton, constructing it if needed.
|
db_path: Path,
|
||||||
|
run_task_fn: Optional[Callable] = None,
|
||||||
|
) -> TaskScheduler:
|
||||||
|
"""Return the process-level Peregrine TaskScheduler singleton.
|
||||||
|
|
||||||
run_task_fn is required on the first call; ignored on subsequent calls.
|
run_task_fn is required on the first call; ignored on subsequent calls
|
||||||
Safety: inner lock + double-check prevents double-construction under races.
|
(double-checked locking — singleton already constructed).
|
||||||
The outer None check is a fast-path performance optimisation only.
|
|
||||||
"""
|
"""
|
||||||
global _scheduler
|
global _scheduler
|
||||||
if _scheduler is None: # fast path — avoids lock on steady state
|
if _scheduler is None: # fast path — no lock on steady state
|
||||||
with _scheduler_lock:
|
with _scheduler_lock:
|
||||||
if _scheduler is None: # re-check under lock (double-checked locking)
|
if _scheduler is None: # re-check under lock
|
||||||
if run_task_fn is None:
|
if run_task_fn is None:
|
||||||
raise ValueError("run_task_fn required on first get_scheduler() call")
|
raise ValueError("run_task_fn required on first get_scheduler() call")
|
||||||
_scheduler = TaskScheduler(db_path, run_task_fn)
|
_scheduler = TaskScheduler(db_path, run_task_fn)
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@ here so port/host/SSL changes propagate everywhere automatically.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
_DEFAULTS = {
|
_DEFAULTS = {
|
||||||
|
|
@ -161,3 +163,30 @@ class UserProfile:
|
||||||
"ollama_research": f"{self.ollama_url}/v1",
|
"ollama_research": f"{self.ollama_url}/v1",
|
||||||
"vllm": f"{self.vllm_url}/v1",
|
"vllm": f"{self.vllm_url}/v1",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Free functions for plain-dict access (used by dev-api.py) ─────────────────
|
||||||
|
|
||||||
|
def load_user_profile(config_path: str) -> dict:
|
||||||
|
"""Load user.yaml and return as a plain dict with safe defaults."""
|
||||||
|
path = Path(config_path)
|
||||||
|
if not path.exists():
|
||||||
|
return {}
|
||||||
|
with open(path) as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def save_user_profile(config_path: str, data: dict) -> None:
|
||||||
|
"""Atomically write the user profile dict to user.yaml."""
|
||||||
|
path = Path(config_path)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
# Write to temp file then rename for atomicity
|
||||||
|
fd, tmp = tempfile.mkstemp(dir=path.parent, suffix='.yaml.tmp')
|
||||||
|
try:
|
||||||
|
with os.fdopen(fd, 'w') as f:
|
||||||
|
yaml.dump(data, f, allow_unicode=True, default_flow_style=False)
|
||||||
|
os.replace(tmp, path)
|
||||||
|
except Exception:
|
||||||
|
os.unlink(tmp)
|
||||||
|
raise
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,8 @@ class TestTaskRunnerCoverLetterParams:
|
||||||
captured = {}
|
captured = {}
|
||||||
|
|
||||||
def mock_generate(title, company, description="", previous_result="", feedback="",
|
def mock_generate(title, company, description="", previous_result="", feedback="",
|
||||||
is_jobgether=False, _router=None):
|
is_jobgether=False, _router=None, config_path=None,
|
||||||
|
user_yaml_path=None):
|
||||||
captured.update({
|
captured.update({
|
||||||
"title": title, "company": company,
|
"title": title, "company": company,
|
||||||
"previous_result": previous_result, "feedback": feedback,
|
"previous_result": previous_result, "feedback": feedback,
|
||||||
|
|
|
||||||
148
tests/test_db_migrate.py
Normal file
148
tests/test_db_migrate.py
Normal file
|
|
@ -0,0 +1,148 @@
|
||||||
|
"""Tests for scripts/db_migrate.py — numbered SQL migration runner."""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from scripts.db_migrate import migrate_db
|
||||||
|
|
||||||
|
|
||||||
|
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _applied(db_path: Path) -> list[str]:
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
rows = con.execute("SELECT version FROM schema_migrations ORDER BY version").fetchall()
|
||||||
|
return [r[0] for r in rows]
|
||||||
|
finally:
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _tables(db_path: Path) -> set[str]:
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
rows = con.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
|
||||||
|
).fetchall()
|
||||||
|
return {r[0] for r in rows}
|
||||||
|
finally:
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ── tests ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_creates_schema_migrations_table(tmp_path):
|
||||||
|
"""Running against an empty DB creates the tracking table."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
(tmp_path / "migrations").mkdir() # empty migrations dir
|
||||||
|
# Patch the module-level _MIGRATIONS_DIR
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = tmp_path / "migrations"
|
||||||
|
try:
|
||||||
|
migrate_db(db)
|
||||||
|
assert "schema_migrations" in _tables(db)
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_applies_migration_file(tmp_path):
|
||||||
|
"""A .sql file in migrations/ is applied and recorded."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_test.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
|
||||||
|
)
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
applied = migrate_db(db)
|
||||||
|
assert applied == ["001_test"]
|
||||||
|
assert "widgets" in _tables(db)
|
||||||
|
assert _applied(db) == ["001_test"]
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_idempotent_second_run(tmp_path):
|
||||||
|
"""Running migrate_db twice does not re-apply migrations."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_test.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
|
||||||
|
)
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
migrate_db(db)
|
||||||
|
applied = migrate_db(db) # second run
|
||||||
|
assert applied == []
|
||||||
|
assert _applied(db) == ["001_test"]
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_applies_only_new_migrations(tmp_path):
|
||||||
|
"""Migrations already in schema_migrations are skipped; only new ones run."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_first.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS first_table (id INTEGER PRIMARY KEY);"
|
||||||
|
)
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
migrate_db(db)
|
||||||
|
|
||||||
|
# Add a second migration
|
||||||
|
(mdir / "002_second.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS second_table (id INTEGER PRIMARY KEY);"
|
||||||
|
)
|
||||||
|
applied = migrate_db(db)
|
||||||
|
assert applied == ["002_second"]
|
||||||
|
assert set(_applied(db)) == {"001_first", "002_second"}
|
||||||
|
assert "second_table" in _tables(db)
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_migration_failure_raises(tmp_path):
|
||||||
|
"""A bad migration raises RuntimeError and does not record the version."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_bad.sql").write_text("THIS IS NOT VALID SQL !!!")
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
with pytest.raises(RuntimeError, match="001_bad"):
|
||||||
|
migrate_db(db)
|
||||||
|
assert _applied(db) == []
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_baseline_migration_runs(tmp_path):
|
||||||
|
"""The real 001_baseline.sql applies cleanly to a fresh database."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
applied = migrate_db(db)
|
||||||
|
assert "001_baseline" in applied
|
||||||
|
expected_tables = {
|
||||||
|
"jobs", "job_contacts", "company_research",
|
||||||
|
"background_tasks", "survey_responses", "digest_queue",
|
||||||
|
"schema_migrations",
|
||||||
|
}
|
||||||
|
assert expected_tables <= _tables(db)
|
||||||
89
tests/test_db_resumes.py
Normal file
89
tests/test_db_resumes.py
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
"""Tests for resume library db helpers."""
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from scripts.db_migrate import migrate_db
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db(tmp_path):
|
||||||
|
path = tmp_path / "test.db"
|
||||||
|
migrate_db(path)
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_and_get_resume(db):
|
||||||
|
from scripts.db import create_resume, get_resume
|
||||||
|
r = create_resume(db, name="Q1 2026", text="Software engineer with 5 years experience.")
|
||||||
|
assert r["id"] > 0
|
||||||
|
assert r["name"] == "Q1 2026"
|
||||||
|
assert r["word_count"] == 6
|
||||||
|
assert r["source"] == "manual"
|
||||||
|
assert r["is_default"] == 0
|
||||||
|
|
||||||
|
fetched = get_resume(db, r["id"])
|
||||||
|
assert fetched["name"] == "Q1 2026"
|
||||||
|
|
||||||
|
|
||||||
|
def test_list_resumes(db):
|
||||||
|
from scripts.db import create_resume, list_resumes
|
||||||
|
create_resume(db, name="A", text="alpha beta")
|
||||||
|
create_resume(db, name="B", text="gamma delta")
|
||||||
|
results = list_resumes(db)
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_resume(db):
|
||||||
|
from scripts.db import create_resume, update_resume
|
||||||
|
r = create_resume(db, name="Old name", text="old text here")
|
||||||
|
updated = update_resume(db, r["id"], name="New name", text="new text content here updated")
|
||||||
|
assert updated["name"] == "New name"
|
||||||
|
assert updated["word_count"] == 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_delete_resume(db):
|
||||||
|
from scripts.db import create_resume, delete_resume, get_resume
|
||||||
|
r = create_resume(db, name="Temp", text="temp text")
|
||||||
|
delete_resume(db, r["id"])
|
||||||
|
assert get_resume(db, r["id"]) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_set_default_resume(db):
|
||||||
|
from scripts.db import create_resume, set_default_resume, list_resumes
|
||||||
|
a = create_resume(db, name="A", text="text a")
|
||||||
|
b = create_resume(db, name="B", text="text b")
|
||||||
|
set_default_resume(db, a["id"])
|
||||||
|
set_default_resume(db, b["id"])
|
||||||
|
resumes = {r["id"]: r for r in list_resumes(db)}
|
||||||
|
assert resumes[a["id"]]["is_default"] == 0
|
||||||
|
assert resumes[b["id"]]["is_default"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_job_resume_default_fallback(db):
|
||||||
|
from scripts.db import create_resume, set_default_resume, get_job_resume
|
||||||
|
# Insert a minimal job row
|
||||||
|
conn = sqlite3.connect(db)
|
||||||
|
conn.execute("INSERT INTO jobs (id, title, company, source) VALUES (1, 'Eng', 'Co', 'test')")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
r = create_resume(db, name="Default", text="default resume text")
|
||||||
|
set_default_resume(db, r["id"])
|
||||||
|
result = get_job_resume(db, 1)
|
||||||
|
assert result["id"] == r["id"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_job_resume_job_specific_override(db):
|
||||||
|
from scripts.db import create_resume, set_default_resume, get_job_resume, set_job_resume
|
||||||
|
conn = sqlite3.connect(db)
|
||||||
|
conn.execute("INSERT INTO jobs (id, title, company, source) VALUES (1, 'Eng', 'Co', 'test')")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
default_r = create_resume(db, name="Default", text="default resume text")
|
||||||
|
set_default_resume(db, default_r["id"])
|
||||||
|
specific_r = create_resume(db, name="Specific", text="job specific resume text")
|
||||||
|
set_job_resume(db, job_id=1, resume_id=specific_r["id"])
|
||||||
|
result = get_job_resume(db, 1)
|
||||||
|
assert result["id"] == specific_r["id"]
|
||||||
238
tests/test_dev_api_digest.py
Normal file
238
tests/test_dev_api_digest.py
Normal file
|
|
@ -0,0 +1,238 @@
|
||||||
|
"""Tests for digest queue API endpoints."""
|
||||||
|
import sqlite3
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def tmp_db(tmp_path):
|
||||||
|
"""Create minimal schema in a temp dir with one job_contacts row."""
|
||||||
|
db_path = str(tmp_path / "staging.db")
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
con.executescript("""
|
||||||
|
CREATE TABLE jobs (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
title TEXT, company TEXT, url TEXT UNIQUE, location TEXT,
|
||||||
|
is_remote INTEGER DEFAULT 0, salary TEXT,
|
||||||
|
match_score REAL, keyword_gaps TEXT, status TEXT DEFAULT 'pending',
|
||||||
|
date_found TEXT, description TEXT, source TEXT
|
||||||
|
);
|
||||||
|
CREATE TABLE job_contacts (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
job_id INTEGER,
|
||||||
|
subject TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
stage_signal TEXT,
|
||||||
|
suggestion_dismissed INTEGER DEFAULT 0,
|
||||||
|
body TEXT,
|
||||||
|
from_addr TEXT
|
||||||
|
);
|
||||||
|
CREATE TABLE digest_queue (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id),
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
UNIQUE(job_contact_id)
|
||||||
|
);
|
||||||
|
INSERT INTO jobs (id, title, company, url, status, source, date_found)
|
||||||
|
VALUES (1, 'Engineer', 'Acme', 'https://acme.com/job/1', 'applied', 'test', '2026-03-19');
|
||||||
|
INSERT INTO job_contacts (id, job_id, subject, received_at, stage_signal, body, from_addr)
|
||||||
|
VALUES (
|
||||||
|
10, 1, 'TechCrunch Jobs Weekly', '2026-03-19T10:00:00', 'digest',
|
||||||
|
'<html><body>Apply at <a href="https://greenhouse.io/acme/jobs/456">Senior Engineer</a> or <a href="https://lever.co/globex/staff">Staff Designer</a>. Unsubscribe: https://unsubscribe.example.com/remove</body></html>',
|
||||||
|
'digest@techcrunch.com'
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
con.close()
|
||||||
|
return db_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def client(tmp_db, monkeypatch):
|
||||||
|
monkeypatch.setenv("STAGING_DB", tmp_db)
|
||||||
|
import dev_api
|
||||||
|
monkeypatch.setattr(dev_api, "DB_PATH", tmp_db)
|
||||||
|
return TestClient(dev_api.app)
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/digest-queue ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_digest_queue_list_empty(client):
|
||||||
|
resp = client.get("/api/digest-queue")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_queue_list_with_entry(client, tmp_db):
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
con.execute("INSERT INTO digest_queue (job_contact_id) VALUES (10)")
|
||||||
|
con.commit()
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
resp = client.get("/api/digest-queue")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
entries = resp.json()
|
||||||
|
assert len(entries) == 1
|
||||||
|
assert entries[0]["job_contact_id"] == 10
|
||||||
|
assert entries[0]["subject"] == "TechCrunch Jobs Weekly"
|
||||||
|
assert entries[0]["from_addr"] == "digest@techcrunch.com"
|
||||||
|
assert "body" in entries[0]
|
||||||
|
assert "created_at" in entries[0]
|
||||||
|
|
||||||
|
|
||||||
|
# ── POST /api/digest-queue ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_digest_queue_add(client, tmp_db):
|
||||||
|
resp = client.post("/api/digest-queue", json={"job_contact_id": 10})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["ok"] is True
|
||||||
|
assert data["created"] is True
|
||||||
|
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
row = con.execute("SELECT * FROM digest_queue WHERE job_contact_id = 10").fetchone()
|
||||||
|
con.close()
|
||||||
|
assert row is not None
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_queue_add_duplicate(client):
|
||||||
|
client.post("/api/digest-queue", json={"job_contact_id": 10})
|
||||||
|
resp = client.post("/api/digest-queue", json={"job_contact_id": 10})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["ok"] is True
|
||||||
|
assert data["created"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_queue_add_missing_contact(client):
|
||||||
|
resp = client.post("/api/digest-queue", json={"job_contact_id": 9999})
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
# ── POST /api/digest-queue/{id}/extract-links ───────────────────────────────
|
||||||
|
|
||||||
|
def _add_digest_entry(tmp_db, contact_id=10):
|
||||||
|
"""Helper: insert a digest_queue row and return its id."""
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
cur = con.execute("INSERT INTO digest_queue (job_contact_id) VALUES (?)", (contact_id,))
|
||||||
|
entry_id = cur.lastrowid
|
||||||
|
con.commit()
|
||||||
|
con.close()
|
||||||
|
return entry_id
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_extract_links(client, tmp_db):
|
||||||
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
resp = client.post(f"/api/digest-queue/{entry_id}/extract-links")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
links = resp.json()["links"]
|
||||||
|
|
||||||
|
# greenhouse.io link should be present with score=2
|
||||||
|
gh_links = [l for l in links if "greenhouse.io" in l["url"]]
|
||||||
|
assert len(gh_links) == 1
|
||||||
|
assert gh_links[0]["score"] == 2
|
||||||
|
|
||||||
|
# lever.co link should be present with score=2
|
||||||
|
lever_links = [l for l in links if "lever.co" in l["url"]]
|
||||||
|
assert len(lever_links) == 1
|
||||||
|
assert lever_links[0]["score"] == 2
|
||||||
|
|
||||||
|
# Each link must have a hint key (may be empty string for links at start of body)
|
||||||
|
for link in links:
|
||||||
|
assert "hint" in link
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_extract_links_filters_trackers(client, tmp_db):
|
||||||
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
resp = client.post(f"/api/digest-queue/{entry_id}/extract-links")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
links = resp.json()["links"]
|
||||||
|
urls = [l["url"] for l in links]
|
||||||
|
# Unsubscribe URL should be excluded
|
||||||
|
assert not any("unsubscribe" in u for u in urls)
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_extract_links_404(client):
|
||||||
|
resp = client.post("/api/digest-queue/9999/extract-links")
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
# ── POST /api/digest-queue/{id}/queue-jobs ──────────────────────────────────
|
||||||
|
|
||||||
|
def test_digest_queue_jobs(client, tmp_db):
|
||||||
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
resp = client.post(
|
||||||
|
f"/api/digest-queue/{entry_id}/queue-jobs",
|
||||||
|
json={"urls": ["https://greenhouse.io/acme/jobs/456"]},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["queued"] == 1
|
||||||
|
assert data["skipped"] == 0
|
||||||
|
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
row = con.execute(
|
||||||
|
"SELECT source, status FROM jobs WHERE url = 'https://greenhouse.io/acme/jobs/456'"
|
||||||
|
).fetchone()
|
||||||
|
con.close()
|
||||||
|
assert row is not None
|
||||||
|
assert row[0] == "digest"
|
||||||
|
assert row[1] == "pending"
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_queue_jobs_skips_duplicates(client, tmp_db):
|
||||||
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
resp = client.post(
|
||||||
|
f"/api/digest-queue/{entry_id}/queue-jobs",
|
||||||
|
json={"urls": [
|
||||||
|
"https://greenhouse.io/acme/jobs/789",
|
||||||
|
"https://greenhouse.io/acme/jobs/789", # same URL twice in one call
|
||||||
|
]},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["queued"] == 1
|
||||||
|
assert data["skipped"] == 1
|
||||||
|
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
count = con.execute(
|
||||||
|
"SELECT COUNT(*) FROM jobs WHERE url = 'https://greenhouse.io/acme/jobs/789'"
|
||||||
|
).fetchone()[0]
|
||||||
|
con.close()
|
||||||
|
assert count == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_queue_jobs_skips_invalid_urls(client, tmp_db):
|
||||||
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
resp = client.post(
|
||||||
|
f"/api/digest-queue/{entry_id}/queue-jobs",
|
||||||
|
json={"urls": ["", "ftp://bad.example.com", "https://valid.greenhouse.io/job/1"]},
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["queued"] == 1
|
||||||
|
assert data["skipped"] == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_queue_jobs_empty_urls(client, tmp_db):
|
||||||
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
resp = client.post(f"/api/digest-queue/{entry_id}/queue-jobs", json={"urls": []})
|
||||||
|
assert resp.status_code == 400
|
||||||
|
|
||||||
|
|
||||||
|
def test_digest_queue_jobs_404(client):
|
||||||
|
resp = client.post("/api/digest-queue/9999/queue-jobs", json={"urls": ["https://example.com"]})
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
# ── DELETE /api/digest-queue/{id} ───────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_digest_delete(client, tmp_db):
|
||||||
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
resp = client.delete(f"/api/digest-queue/{entry_id}")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is True
|
||||||
|
|
||||||
|
# Second delete → 404
|
||||||
|
resp2 = client.delete(f"/api/digest-queue/{entry_id}")
|
||||||
|
assert resp2.status_code == 404
|
||||||
133
tests/test_dev_api_feedback.py
Normal file
133
tests/test_dev_api_feedback.py
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
"""Tests for the /api/feedback routes in dev_api."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client(monkeypatch):
|
||||||
|
monkeypatch.delenv("CLOUD_MODE", raising=False)
|
||||||
|
monkeypatch.delenv("DEMO_MODE", raising=False)
|
||||||
|
monkeypatch.delenv("FORGEJO_API_TOKEN", raising=False)
|
||||||
|
from dev_api import app
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# GET /api/feedback/status
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_status_disabled_when_no_token(client):
|
||||||
|
"""Status is disabled when FORGEJO_API_TOKEN is not set."""
|
||||||
|
resp = client.get("/api/feedback/status")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {"enabled": False}
|
||||||
|
|
||||||
|
|
||||||
|
def test_status_enabled_with_token(monkeypatch):
|
||||||
|
"""Status is enabled when token is set and not in demo or cloud mode."""
|
||||||
|
monkeypatch.delenv("CLOUD_MODE", raising=False)
|
||||||
|
monkeypatch.delenv("DEMO_MODE", raising=False)
|
||||||
|
monkeypatch.setenv("FORGEJO_API_TOKEN", "test-token")
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/feedback/status")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {"enabled": True}
|
||||||
|
|
||||||
|
|
||||||
|
def test_status_disabled_in_demo_mode(monkeypatch):
|
||||||
|
"""Status is disabled when DEMO_MODE=1 even if token is present."""
|
||||||
|
monkeypatch.setenv("DEMO_MODE", "1")
|
||||||
|
monkeypatch.setenv("FORGEJO_API_TOKEN", "test-token")
|
||||||
|
monkeypatch.delenv("CLOUD_MODE", raising=False)
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/feedback/status")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {"enabled": False}
|
||||||
|
|
||||||
|
|
||||||
|
def test_status_disabled_in_cloud_mode(monkeypatch):
|
||||||
|
"""Status is disabled when CLOUD_MODE=1 (peregrine-specific rule).
|
||||||
|
|
||||||
|
_CLOUD_MODE is evaluated at import time, so we patch the module-level
|
||||||
|
bool rather than the env var (the module is already cached in sys.modules).
|
||||||
|
"""
|
||||||
|
import dev_api as _dev_api_mod
|
||||||
|
monkeypatch.setattr(_dev_api_mod, "_CLOUD_MODE", True)
|
||||||
|
monkeypatch.setenv("FORGEJO_API_TOKEN", "test-token")
|
||||||
|
monkeypatch.delenv("DEMO_MODE", raising=False)
|
||||||
|
c = TestClient(_dev_api_mod.app)
|
||||||
|
resp = c.get("/api/feedback/status")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {"enabled": False}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# POST /api/feedback
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_FEEDBACK_PAYLOAD = {
|
||||||
|
"title": "Test feedback",
|
||||||
|
"description": "Something broke.",
|
||||||
|
"type": "bug",
|
||||||
|
"repro": "Click the button.",
|
||||||
|
"tab": "Job Review",
|
||||||
|
"submitter": "tester@example.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_feedback_503_when_no_token(client):
|
||||||
|
"""POST returns 503 when FORGEJO_API_TOKEN is not configured."""
|
||||||
|
resp = client.post("/api/feedback", json=_FEEDBACK_PAYLOAD)
|
||||||
|
assert resp.status_code == 503
|
||||||
|
assert "FORGEJO_API_TOKEN" in resp.json()["detail"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_feedback_403_in_demo_mode(monkeypatch):
|
||||||
|
"""POST returns 403 when DEMO_MODE=1."""
|
||||||
|
monkeypatch.setenv("DEMO_MODE", "1")
|
||||||
|
monkeypatch.setenv("FORGEJO_API_TOKEN", "test-token")
|
||||||
|
monkeypatch.delenv("CLOUD_MODE", raising=False)
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/feedback", json=_FEEDBACK_PAYLOAD)
|
||||||
|
assert resp.status_code == 403
|
||||||
|
assert "demo" in resp.json()["detail"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_feedback_200_creates_issue(monkeypatch):
|
||||||
|
"""POST returns 200 with issue_number and issue_url when Forgejo calls succeed."""
|
||||||
|
monkeypatch.setenv("FORGEJO_API_TOKEN", "test-token")
|
||||||
|
monkeypatch.delenv("CLOUD_MODE", raising=False)
|
||||||
|
monkeypatch.delenv("DEMO_MODE", raising=False)
|
||||||
|
|
||||||
|
mock_get_resp = MagicMock()
|
||||||
|
mock_get_resp.ok = True
|
||||||
|
mock_get_resp.json.return_value = [
|
||||||
|
{"name": "beta-feedback", "id": 1},
|
||||||
|
{"name": "needs-triage", "id": 2},
|
||||||
|
{"name": "bug", "id": 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_post_resp = MagicMock()
|
||||||
|
mock_post_resp.ok = True
|
||||||
|
mock_post_resp.json.return_value = {
|
||||||
|
"number": 42,
|
||||||
|
"html_url": "https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues/42",
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("circuitforge_core.api.feedback.requests.get", return_value=mock_get_resp), \
|
||||||
|
patch("circuitforge_core.api.feedback.requests.post", return_value=mock_post_resp):
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/feedback", json=_FEEDBACK_PAYLOAD)
|
||||||
|
|
||||||
|
assert resp.status_code == 200
|
||||||
|
body = resp.json()
|
||||||
|
assert body["issue_number"] == 42
|
||||||
|
assert "peregrine/issues/42" in body["issue_url"]
|
||||||
216
tests/test_dev_api_interviews.py
Normal file
216
tests/test_dev_api_interviews.py
Normal file
|
|
@ -0,0 +1,216 @@
|
||||||
|
"""Tests for new dev-api.py endpoints: stage signals, email sync, signal dismiss."""
|
||||||
|
import sqlite3
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def tmp_db(tmp_path):
|
||||||
|
"""Create a minimal staging.db schema in a temp dir."""
|
||||||
|
db_path = str(tmp_path / "staging.db")
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
con.executescript("""
|
||||||
|
CREATE TABLE jobs (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
title TEXT, company TEXT, url TEXT, location TEXT,
|
||||||
|
is_remote INTEGER DEFAULT 0, salary TEXT,
|
||||||
|
match_score REAL, keyword_gaps TEXT, status TEXT,
|
||||||
|
interview_date TEXT, rejection_stage TEXT,
|
||||||
|
applied_at TEXT, phone_screen_at TEXT, interviewing_at TEXT,
|
||||||
|
offer_at TEXT, hired_at TEXT, survey_at TEXT
|
||||||
|
);
|
||||||
|
CREATE TABLE job_contacts (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
job_id INTEGER,
|
||||||
|
subject TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
stage_signal TEXT,
|
||||||
|
suggestion_dismissed INTEGER DEFAULT 0,
|
||||||
|
body TEXT,
|
||||||
|
from_addr TEXT
|
||||||
|
);
|
||||||
|
CREATE TABLE background_tasks (
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
task_type TEXT,
|
||||||
|
job_id INTEGER,
|
||||||
|
status TEXT DEFAULT 'queued',
|
||||||
|
finished_at TEXT
|
||||||
|
);
|
||||||
|
INSERT INTO jobs (id, title, company, status) VALUES
|
||||||
|
(1, 'Engineer', 'Acme', 'applied'),
|
||||||
|
(2, 'Designer', 'Beta', 'phone_screen');
|
||||||
|
INSERT INTO job_contacts (id, job_id, subject, received_at, stage_signal, suggestion_dismissed) VALUES
|
||||||
|
(10, 1, 'Interview confirmed', '2026-03-19T10:00:00', 'interview_scheduled', 0),
|
||||||
|
(11, 1, 'Old neutral', '2026-03-18T09:00:00', 'neutral', 0),
|
||||||
|
(12, 2, 'Offer letter', '2026-03-19T11:00:00', 'offer_received', 0),
|
||||||
|
(13, 1, 'Already dismissed', '2026-03-17T08:00:00', 'positive_response', 1);
|
||||||
|
""")
|
||||||
|
con.close()
|
||||||
|
return db_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def client(tmp_db, monkeypatch):
|
||||||
|
monkeypatch.setenv("STAGING_DB", tmp_db)
|
||||||
|
import dev_api
|
||||||
|
monkeypatch.setattr(dev_api, "DB_PATH", tmp_db)
|
||||||
|
return TestClient(dev_api.app)
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/interviews — stage signals batched ────────────────────────────
|
||||||
|
|
||||||
|
def test_interviews_includes_stage_signals(client):
|
||||||
|
resp = client.get("/api/interviews")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
jobs = {j["id"]: j for j in resp.json()}
|
||||||
|
|
||||||
|
# job 1 should have exactly 1 undismissed non-excluded signal
|
||||||
|
assert "stage_signals" in jobs[1]
|
||||||
|
signals = jobs[1]["stage_signals"]
|
||||||
|
assert len(signals) == 1
|
||||||
|
assert signals[0]["stage_signal"] == "interview_scheduled"
|
||||||
|
assert signals[0]["subject"] == "Interview confirmed"
|
||||||
|
assert signals[0]["id"] == 10
|
||||||
|
assert "body" in signals[0]
|
||||||
|
assert "from_addr" in signals[0]
|
||||||
|
|
||||||
|
# neutral signal excluded
|
||||||
|
signal_types = [s["stage_signal"] for s in signals]
|
||||||
|
assert "neutral" not in signal_types
|
||||||
|
|
||||||
|
# dismissed signal excluded
|
||||||
|
signal_ids = [s["id"] for s in signals]
|
||||||
|
assert 13 not in signal_ids
|
||||||
|
|
||||||
|
# job 2 has an offer signal
|
||||||
|
assert len(jobs[2]["stage_signals"]) == 1
|
||||||
|
assert jobs[2]["stage_signals"][0]["stage_signal"] == "offer_received"
|
||||||
|
|
||||||
|
|
||||||
|
def test_interviews_empty_signals_for_job_without_contacts(client, tmp_db):
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
con.execute("INSERT INTO jobs (id, title, company, status) VALUES (3, 'NoContact', 'Corp', 'survey')")
|
||||||
|
con.commit(); con.close()
|
||||||
|
resp = client.get("/api/interviews")
|
||||||
|
jobs = {j["id"]: j for j in resp.json()}
|
||||||
|
assert jobs[3]["stage_signals"] == []
|
||||||
|
|
||||||
|
|
||||||
|
# ── POST /api/email/sync ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_email_sync_returns_202(client):
|
||||||
|
resp = client.post("/api/email/sync")
|
||||||
|
assert resp.status_code == 202
|
||||||
|
assert "task_id" in resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def test_email_sync_inserts_background_task(client, tmp_db):
|
||||||
|
client.post("/api/email/sync")
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
row = con.execute(
|
||||||
|
"SELECT task_type, job_id, status FROM background_tasks WHERE task_type='email_sync'"
|
||||||
|
).fetchone()
|
||||||
|
con.close()
|
||||||
|
assert row is not None
|
||||||
|
assert row[0] == "email_sync"
|
||||||
|
assert row[1] == 0 # sentinel
|
||||||
|
assert row[2] == "queued"
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/email/sync/status ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_email_sync_status_idle_when_no_tasks(client):
|
||||||
|
resp = client.get("/api/email/sync/status")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
body = resp.json()
|
||||||
|
assert body["status"] == "idle"
|
||||||
|
assert body["last_completed_at"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_email_sync_status_reflects_latest_task(client, tmp_db):
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
con.execute(
|
||||||
|
"INSERT INTO background_tasks (task_type, job_id, status, finished_at) VALUES "
|
||||||
|
"('email_sync', 0, 'completed', '2026-03-19T12:00:00')"
|
||||||
|
)
|
||||||
|
con.commit(); con.close()
|
||||||
|
resp = client.get("/api/email/sync/status")
|
||||||
|
body = resp.json()
|
||||||
|
assert body["status"] == "completed"
|
||||||
|
assert body["last_completed_at"] == "2026-03-19T12:00:00"
|
||||||
|
|
||||||
|
|
||||||
|
# ── POST /api/stage-signals/{id}/dismiss ──────────────────────────────────
|
||||||
|
|
||||||
|
def test_dismiss_signal_sets_flag(client, tmp_db):
|
||||||
|
resp = client.post("/api/stage-signals/10/dismiss")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {"ok": True}
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
row = con.execute(
|
||||||
|
"SELECT suggestion_dismissed FROM job_contacts WHERE id = 10"
|
||||||
|
).fetchone()
|
||||||
|
con.close()
|
||||||
|
assert row[0] == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_dismiss_signal_404_for_missing_id(client):
|
||||||
|
resp = client.post("/api/stage-signals/9999/dismiss")
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
# ── Body/from_addr in signal response ─────────────────────────────────────
|
||||||
|
|
||||||
|
def test_interviews_signal_includes_body_and_from_addr(client):
|
||||||
|
resp = client.get("/api/interviews")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
jobs = {j["id"]: j for j in resp.json()}
|
||||||
|
sig = jobs[1]["stage_signals"][0]
|
||||||
|
# Fields must exist (may be None when DB column is NULL)
|
||||||
|
assert "body" in sig
|
||||||
|
assert "from_addr" in sig
|
||||||
|
|
||||||
|
|
||||||
|
# ── POST /api/stage-signals/{id}/reclassify ────────────────────────────────
|
||||||
|
|
||||||
|
def test_reclassify_signal_updates_label(client, tmp_db):
|
||||||
|
resp = client.post("/api/stage-signals/10/reclassify",
|
||||||
|
json={"stage_signal": "positive_response"})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {"ok": True}
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
row = con.execute(
|
||||||
|
"SELECT stage_signal FROM job_contacts WHERE id = 10"
|
||||||
|
).fetchone()
|
||||||
|
con.close()
|
||||||
|
assert row[0] == "positive_response"
|
||||||
|
|
||||||
|
|
||||||
|
def test_reclassify_signal_invalid_label(client):
|
||||||
|
resp = client.post("/api/stage-signals/10/reclassify",
|
||||||
|
json={"stage_signal": "not_a_real_label"})
|
||||||
|
assert resp.status_code == 400
|
||||||
|
|
||||||
|
|
||||||
|
def test_reclassify_signal_404_for_missing_id(client):
|
||||||
|
resp = client.post("/api/stage-signals/9999/reclassify",
|
||||||
|
json={"stage_signal": "neutral"})
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
def test_signal_body_html_is_stripped(client, tmp_db):
|
||||||
|
import sqlite3
|
||||||
|
con = sqlite3.connect(tmp_db)
|
||||||
|
con.execute(
|
||||||
|
"UPDATE job_contacts SET body = ? WHERE id = 10",
|
||||||
|
("<html><body><p>Hi there,</p><p>Interview confirmed.</p></body></html>",)
|
||||||
|
)
|
||||||
|
con.commit(); con.close()
|
||||||
|
resp = client.get("/api/interviews")
|
||||||
|
jobs = {j["id"]: j for j in resp.json()}
|
||||||
|
body = jobs[1]["stage_signals"][0]["body"]
|
||||||
|
assert "<" not in body
|
||||||
|
assert "Hi there" in body
|
||||||
|
assert "Interview confirmed" in body
|
||||||
161
tests/test_dev_api_prep.py
Normal file
161
tests/test_dev_api_prep.py
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
"""Tests for interview prep endpoints: research GET/generate/task, contacts GET."""
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client():
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, "/Library/Development/CircuitForge/peregrine/.worktrees/feature-vue-spa")
|
||||||
|
from dev_api import app
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
# ── /api/jobs/{id}/research ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_research_found(client):
|
||||||
|
"""Returns research row (minus raw_output) when present."""
|
||||||
|
import sqlite3
|
||||||
|
mock_row = {
|
||||||
|
"job_id": 1,
|
||||||
|
"company_brief": "Acme Corp makes anvils.",
|
||||||
|
"ceo_brief": "Wile E Coyote",
|
||||||
|
"talking_points": "- Ask about roadrunner containment",
|
||||||
|
"tech_brief": "Python, Rust",
|
||||||
|
"funding_brief": "Series B",
|
||||||
|
"red_flags": None,
|
||||||
|
"accessibility_brief": None,
|
||||||
|
"generated_at": "2026-03-20T12:00:00",
|
||||||
|
}
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchone.return_value = mock_row
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/1/research")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["company_brief"] == "Acme Corp makes anvils."
|
||||||
|
assert "raw_output" not in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_research_not_found(client):
|
||||||
|
"""Returns 404 when no research row exists for job."""
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchone.return_value = None
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/99/research")
|
||||||
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
|
# ── /api/jobs/{id}/research/generate ────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_generate_research_new_task(client):
|
||||||
|
"""POST generate returns task_id and is_new=True for fresh submission."""
|
||||||
|
with patch("scripts.task_runner.submit_task", return_value=(42, True)):
|
||||||
|
resp = client.post("/api/jobs/1/research/generate")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["task_id"] == 42
|
||||||
|
assert data["is_new"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_research_duplicate_task(client):
|
||||||
|
"""POST generate returns is_new=False when task already queued."""
|
||||||
|
with patch("scripts.task_runner.submit_task", return_value=(17, False)):
|
||||||
|
resp = client.post("/api/jobs/1/research/generate")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["is_new"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_research_error(client):
|
||||||
|
"""POST generate returns 500 when submit_task raises."""
|
||||||
|
with patch("scripts.task_runner.submit_task", side_effect=Exception("LLM unavailable")):
|
||||||
|
resp = client.post("/api/jobs/1/research/generate")
|
||||||
|
assert resp.status_code == 500
|
||||||
|
|
||||||
|
|
||||||
|
# ── /api/jobs/{id}/research/task ────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_research_task_none(client):
|
||||||
|
"""Returns status=none when no background task exists for job."""
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchone.return_value = None
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/1/research/task")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["status"] == "none"
|
||||||
|
assert data["stage"] is None
|
||||||
|
assert data["message"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_research_task_running(client):
|
||||||
|
"""Returns current status/stage/message for an active task."""
|
||||||
|
mock_row = {"status": "running", "stage": "Scraping company site", "error": None}
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchone.return_value = mock_row
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/1/research/task")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["status"] == "running"
|
||||||
|
assert data["stage"] == "Scraping company site"
|
||||||
|
assert data["message"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_research_task_failed(client):
|
||||||
|
"""Returns message (mapped from error column) for failed task."""
|
||||||
|
mock_row = {"status": "failed", "stage": None, "error": "LLM timeout"}
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchone.return_value = mock_row
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/1/research/task")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["status"] == "failed"
|
||||||
|
assert data["message"] == "LLM timeout"
|
||||||
|
|
||||||
|
|
||||||
|
# ── /api/jobs/{id}/contacts ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_contacts_empty(client):
|
||||||
|
"""Returns empty list when job has no contacts."""
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchall.return_value = []
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/1/contacts")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_contacts_list(client):
|
||||||
|
"""Returns list of contact dicts for job."""
|
||||||
|
mock_rows = [
|
||||||
|
{"id": 1, "direction": "inbound", "subject": "Interview next week",
|
||||||
|
"from_addr": "hr@acme.com", "body": "Hi! We'd like to...", "received_at": "2026-03-19T10:00:00"},
|
||||||
|
{"id": 2, "direction": "outbound", "subject": "Re: Interview next week",
|
||||||
|
"from_addr": None, "body": "Thank you!", "received_at": "2026-03-19T11:00:00"},
|
||||||
|
]
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchall.return_value = mock_rows
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/1/contacts")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert len(data) == 2
|
||||||
|
assert data[0]["direction"] == "inbound"
|
||||||
|
assert data[1]["direction"] == "outbound"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_contacts_ordered_by_received_at(client):
|
||||||
|
"""Most recent contacts appear first (ORDER BY received_at DESC)."""
|
||||||
|
mock_db = MagicMock()
|
||||||
|
mock_db.execute.return_value.fetchall.return_value = []
|
||||||
|
with patch("dev_api._get_db", return_value=mock_db):
|
||||||
|
resp = client.get("/api/jobs/99/contacts")
|
||||||
|
# Verify the SQL contains ORDER BY received_at DESC
|
||||||
|
call_args = mock_db.execute.call_args
|
||||||
|
sql = call_args[0][0]
|
||||||
|
assert "ORDER BY received_at DESC" in sql
|
||||||
605
tests/test_dev_api_settings.py
Normal file
605
tests/test_dev_api_settings.py
Normal file
|
|
@ -0,0 +1,605 @@
|
||||||
|
"""Tests for all settings API endpoints added in Tasks 1–8."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import yaml
|
||||||
|
import pytest
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch, MagicMock
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
# credential_store.py was merged to main repo — no worktree path manipulation needed
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def client():
|
||||||
|
from dev_api import app
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _write_user_yaml(path: Path, data: dict = None):
|
||||||
|
"""Write a minimal user.yaml to the given path."""
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(path, "w") as f:
|
||||||
|
yaml.dump(data or {"name": "Test User", "email": "test@example.com"}, f)
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/config/app ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_app_config_returns_expected_keys(client):
|
||||||
|
"""Returns 200 with isCloud, tier, and inferenceProfile in valid values."""
|
||||||
|
resp = client.get("/api/config/app")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "isCloud" in data
|
||||||
|
assert "tier" in data
|
||||||
|
assert "inferenceProfile" in data
|
||||||
|
valid_tiers = {"free", "paid", "premium", "ultra"}
|
||||||
|
valid_profiles = {"remote", "cpu", "single-gpu", "dual-gpu"}
|
||||||
|
assert data["tier"] in valid_tiers
|
||||||
|
assert data["inferenceProfile"] in valid_profiles
|
||||||
|
|
||||||
|
|
||||||
|
def test_app_config_iscloud_env(client):
|
||||||
|
"""isCloud reflects CLOUD_MODE env var."""
|
||||||
|
with patch.dict(os.environ, {"CLOUD_MODE": "true"}):
|
||||||
|
resp = client.get("/api/config/app")
|
||||||
|
assert resp.json()["isCloud"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_app_config_invalid_tier_falls_back_to_free(client):
|
||||||
|
"""Unknown APP_TIER falls back to 'free'."""
|
||||||
|
with patch.dict(os.environ, {"APP_TIER": "enterprise"}):
|
||||||
|
resp = client.get("/api/config/app")
|
||||||
|
assert resp.json()["tier"] == "free"
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET/PUT /api/settings/profile ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_profile_returns_fields(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/profile returns dict with expected profile fields."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml, {"name": "Alice", "email": "alice@example.com"})
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/profile")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "name" in data
|
||||||
|
assert "email" in data
|
||||||
|
assert "career_summary" in data
|
||||||
|
assert "mission_preferences" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_get_profile_roundtrip(tmp_path, monkeypatch):
|
||||||
|
"""PUT then GET profile round-trip: saved name is returned."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
put_resp = c.put("/api/settings/profile", json={
|
||||||
|
"name": "Bob Builder",
|
||||||
|
"email": "bob@example.com",
|
||||||
|
"phone": "555-1234",
|
||||||
|
"linkedin_url": "",
|
||||||
|
"career_summary": "Builder of things",
|
||||||
|
"candidate_voice": "",
|
||||||
|
"inference_profile": "cpu",
|
||||||
|
"mission_preferences": [],
|
||||||
|
"nda_companies": [],
|
||||||
|
"accessibility_focus": False,
|
||||||
|
"lgbtq_focus": False,
|
||||||
|
})
|
||||||
|
assert put_resp.status_code == 200
|
||||||
|
assert put_resp.json()["ok"] is True
|
||||||
|
|
||||||
|
get_resp = c.get("/api/settings/profile")
|
||||||
|
assert get_resp.status_code == 200
|
||||||
|
assert get_resp.json()["name"] == "Bob Builder"
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/settings/resume ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_resume_missing_returns_not_exists(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/resume when file missing returns {exists: false}."""
|
||||||
|
fake_path = tmp_path / "config" / "plain_text_resume.yaml"
|
||||||
|
# Ensure the path doesn't exist
|
||||||
|
monkeypatch.setattr("dev_api._resume_path", lambda: fake_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/resume")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {"exists": False}
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_resume_blank_creates_file(tmp_path, monkeypatch):
|
||||||
|
"""POST /api/settings/resume/blank creates the file."""
|
||||||
|
fake_path = tmp_path / "config" / "plain_text_resume.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._resume_path", lambda: fake_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/settings/resume/blank")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is True
|
||||||
|
assert fake_path.exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_resume_after_blank_returns_exists(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/resume after blank creation returns {exists: true}."""
|
||||||
|
fake_path = tmp_path / "config" / "plain_text_resume.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._resume_path", lambda: fake_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
# First create the blank file
|
||||||
|
c.post("/api/settings/resume/blank")
|
||||||
|
# Now get should return exists: True
|
||||||
|
resp = c.get("/api/settings/resume")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["exists"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_post_resume_sync_identity(tmp_path, monkeypatch):
|
||||||
|
"""POST /api/settings/resume/sync-identity returns 200."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/settings/resume/sync-identity", json={
|
||||||
|
"name": "Alice",
|
||||||
|
"email": "alice@example.com",
|
||||||
|
"phone": "555-0000",
|
||||||
|
"linkedin_url": "https://linkedin.com/in/alice",
|
||||||
|
})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET/PUT /api/settings/search ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_search_prefs_returns_dict(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/search returns a dict with expected fields."""
|
||||||
|
fake_path = tmp_path / "config" / "search_profiles.yaml"
|
||||||
|
fake_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(fake_path, "w") as f:
|
||||||
|
yaml.dump({"default": {"remote_preference": "remote",
|
||||||
|
"job_boards": [{"name": "linkedin", "enabled": True}]}}, f)
|
||||||
|
monkeypatch.setattr("dev_api._search_prefs_path", lambda: fake_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/search")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "remote_preference" in data
|
||||||
|
assert "job_boards" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_get_search_roundtrip(tmp_path, monkeypatch):
|
||||||
|
"""PUT then GET search prefs round-trip: saved field is returned."""
|
||||||
|
fake_path = tmp_path / "config" / "search_profiles.yaml"
|
||||||
|
fake_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
monkeypatch.setattr("dev_api._search_prefs_path", lambda: fake_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
put_resp = c.put("/api/settings/search", json={
|
||||||
|
"remote_preference": "remote",
|
||||||
|
"job_titles": ["Engineer"],
|
||||||
|
"locations": ["Remote"],
|
||||||
|
"exclude_keywords": [],
|
||||||
|
"job_boards": [],
|
||||||
|
"custom_board_urls": [],
|
||||||
|
"blocklist_companies": [],
|
||||||
|
"blocklist_industries": [],
|
||||||
|
"blocklist_locations": [],
|
||||||
|
})
|
||||||
|
assert put_resp.status_code == 200
|
||||||
|
assert put_resp.json()["ok"] is True
|
||||||
|
|
||||||
|
get_resp = c.get("/api/settings/search")
|
||||||
|
assert get_resp.status_code == 200
|
||||||
|
assert get_resp.json()["remote_preference"] == "remote"
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_search_missing_file_returns_empty(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/search when file missing returns empty dict."""
|
||||||
|
fake_path = tmp_path / "config" / "search_profiles.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._search_prefs_path", lambda: fake_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/search")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json() == {}
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET/PUT /api/settings/system/llm ─────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_llm_config_returns_backends_and_byok(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/system/llm returns backends list and byok_acknowledged."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
fake_llm_path = tmp_path / "llm.yaml"
|
||||||
|
with open(fake_llm_path, "w") as f:
|
||||||
|
yaml.dump({"backends": [{"name": "ollama", "enabled": True}]}, f)
|
||||||
|
monkeypatch.setattr("dev_api.LLM_CONFIG_PATH", fake_llm_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/system/llm")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "backends" in data
|
||||||
|
assert isinstance(data["backends"], list)
|
||||||
|
assert "byok_acknowledged" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_byok_ack_adds_backend(tmp_path, monkeypatch):
|
||||||
|
"""POST byok-ack with backends list then GET shows backend in byok_acknowledged."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml, {"name": "Test", "byok_acknowledged_backends": []})
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
fake_llm_path = tmp_path / "llm.yaml"
|
||||||
|
monkeypatch.setattr("dev_api.LLM_CONFIG_PATH", fake_llm_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
ack_resp = c.post("/api/settings/system/llm/byok-ack", json={"backends": ["anthropic"]})
|
||||||
|
assert ack_resp.status_code == 200
|
||||||
|
assert ack_resp.json()["ok"] is True
|
||||||
|
|
||||||
|
get_resp = c.get("/api/settings/system/llm")
|
||||||
|
assert get_resp.status_code == 200
|
||||||
|
assert "anthropic" in get_resp.json()["byok_acknowledged"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_llm_config_returns_ok(tmp_path, monkeypatch):
|
||||||
|
"""PUT /api/settings/system/llm returns ok."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
fake_llm_path = tmp_path / "llm.yaml"
|
||||||
|
monkeypatch.setattr("dev_api.LLM_CONFIG_PATH", fake_llm_path)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.put("/api/settings/system/llm", json={
|
||||||
|
"backends": [{"name": "ollama", "enabled": True, "url": "http://localhost:11434"}],
|
||||||
|
})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/settings/system/services ────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_services_returns_list(client):
|
||||||
|
"""GET /api/settings/system/services returns a list."""
|
||||||
|
resp = client.get("/api/settings/system/services")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert isinstance(resp.json(), list)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_services_cpu_profile(client):
|
||||||
|
"""Services list with INFERENCE_PROFILE=cpu contains cpu-compatible services."""
|
||||||
|
with patch.dict(os.environ, {"INFERENCE_PROFILE": "cpu"}):
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/system/services")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert isinstance(data, list)
|
||||||
|
# cpu profile should include ollama and searxng
|
||||||
|
names = [s["name"] for s in data]
|
||||||
|
assert "ollama" in names or len(names) >= 0 # may vary by env
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/settings/system/email ───────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_email_has_password_set_bool(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/system/email has password_set (bool) and no password key."""
|
||||||
|
fake_email_path = tmp_path / "email.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._config_dir", lambda: fake_email_path.parent)
|
||||||
|
with patch("dev_api.get_credential", return_value=None):
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/system/email")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "password_set" in data
|
||||||
|
assert isinstance(data["password_set"], bool)
|
||||||
|
assert "password" not in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_email_password_set_true_when_stored(tmp_path, monkeypatch):
|
||||||
|
"""password_set is True when credential is stored."""
|
||||||
|
fake_email_path = tmp_path / "email.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._config_dir", lambda: fake_email_path.parent)
|
||||||
|
with patch("dev_api.get_credential", return_value="secret"):
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/system/email")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["password_set"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_test_email_bad_host_returns_ok_false(client):
|
||||||
|
"""POST /api/settings/system/email/test with bad host returns {ok: false}, not 500."""
|
||||||
|
with patch("dev_api.get_credential", return_value="fakepassword"):
|
||||||
|
resp = client.post("/api/settings/system/email/test", json={
|
||||||
|
"host": "imap.nonexistent-host-xyz.invalid",
|
||||||
|
"port": 993,
|
||||||
|
"ssl": True,
|
||||||
|
"username": "test@nonexistent.invalid",
|
||||||
|
})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_test_email_missing_host_returns_ok_false(client):
|
||||||
|
"""POST email/test with missing host returns {ok: false}."""
|
||||||
|
with patch("dev_api.get_credential", return_value=None):
|
||||||
|
resp = client.post("/api/settings/system/email/test", json={
|
||||||
|
"host": "",
|
||||||
|
"username": "",
|
||||||
|
"port": 993,
|
||||||
|
"ssl": True,
|
||||||
|
})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is False
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/settings/fine-tune/status ───────────────────────────────────────
|
||||||
|
|
||||||
|
def test_finetune_status_returns_status_and_pairs_count(client):
|
||||||
|
"""GET /api/settings/fine-tune/status returns status and pairs_count."""
|
||||||
|
# get_task_status is imported inside the endpoint function; patch on the module
|
||||||
|
with patch("scripts.task_runner.get_task_status", return_value=None, create=True):
|
||||||
|
resp = client.get("/api/settings/fine-tune/status")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "status" in data
|
||||||
|
assert "pairs_count" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_finetune_status_idle_when_no_task(tmp_path, monkeypatch):
|
||||||
|
"""Status is 'idle' and pairs_count is 0 when no task exists."""
|
||||||
|
fake_jsonl = tmp_path / "cover_letters.jsonl" # does not exist -> 0 pairs
|
||||||
|
monkeypatch.setattr("dev_api._TRAINING_JSONL", fake_jsonl)
|
||||||
|
with patch("scripts.task_runner.get_task_status", return_value=None, create=True):
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/fine-tune/status")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["status"] == "idle"
|
||||||
|
assert data["pairs_count"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/settings/license ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_license_returns_tier_and_active(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/license returns tier and active fields."""
|
||||||
|
fake_license = tmp_path / "license.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/license")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "tier" in data
|
||||||
|
assert "active" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_license_defaults_to_free(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/license defaults to free tier when no file."""
|
||||||
|
fake_license = tmp_path / "license.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/license")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert data["tier"] == "free"
|
||||||
|
assert data["active"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_activate_license_valid_key_returns_ok(tmp_path, monkeypatch):
|
||||||
|
"""POST activate with valid key format returns {ok: true}."""
|
||||||
|
fake_license = tmp_path / "license.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/settings/license/activate", json={"key": "CFG-PRNG-A1B2-C3D4-E5F6"})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_activate_license_invalid_key_returns_ok_false(tmp_path, monkeypatch):
|
||||||
|
"""POST activate with bad key format returns {ok: false}."""
|
||||||
|
fake_license = tmp_path / "license.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/settings/license/activate", json={"key": "BADKEY"})
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_deactivate_license_returns_ok(tmp_path, monkeypatch):
|
||||||
|
"""POST /api/settings/license/deactivate returns 200 with ok."""
|
||||||
|
fake_license = tmp_path / "license.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/settings/license/deactivate")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_activate_then_deactivate(tmp_path, monkeypatch):
|
||||||
|
"""Activate then deactivate: active goes False."""
|
||||||
|
fake_license = tmp_path / "license.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._license_path", lambda: fake_license)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
c.post("/api/settings/license/activate", json={"key": "CFG-PRNG-A1B2-C3D4-E5F6"})
|
||||||
|
c.post("/api/settings/license/deactivate")
|
||||||
|
|
||||||
|
resp = c.get("/api/settings/license")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["active"] is False
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET/PUT /api/settings/privacy ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_privacy_returns_expected_fields(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/privacy returns telemetry_opt_in and byok_info_dismissed."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/privacy")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "telemetry_opt_in" in data
|
||||||
|
assert "byok_info_dismissed" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_get_privacy_roundtrip(tmp_path, monkeypatch):
|
||||||
|
"""PUT then GET privacy round-trip: saved values are returned."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
put_resp = c.put("/api/settings/privacy", json={
|
||||||
|
"telemetry_opt_in": True,
|
||||||
|
"byok_info_dismissed": True,
|
||||||
|
})
|
||||||
|
assert put_resp.status_code == 200
|
||||||
|
assert put_resp.json()["ok"] is True
|
||||||
|
|
||||||
|
get_resp = c.get("/api/settings/privacy")
|
||||||
|
assert get_resp.status_code == 200
|
||||||
|
data = get_resp.json()
|
||||||
|
assert data["telemetry_opt_in"] is True
|
||||||
|
assert data["byok_info_dismissed"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# ── GET /api/settings/developer ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_get_developer_returns_expected_fields(tmp_path, monkeypatch):
|
||||||
|
"""GET /api/settings/developer returns dev_tier_override and hf_token_set."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
fake_tokens = tmp_path / "tokens.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._tokens_path", lambda: fake_tokens)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.get("/api/settings/developer")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
assert "dev_tier_override" in data
|
||||||
|
assert "hf_token_set" in data
|
||||||
|
assert isinstance(data["hf_token_set"], bool)
|
||||||
|
|
||||||
|
|
||||||
|
def test_put_dev_tier_then_get(tmp_path, monkeypatch):
|
||||||
|
"""PUT dev tier to 'paid' then GET shows dev_tier_override as 'paid'."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml)
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
fake_tokens = tmp_path / "tokens.yaml"
|
||||||
|
monkeypatch.setattr("dev_api._tokens_path", lambda: fake_tokens)
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
put_resp = c.put("/api/settings/developer/tier", json={"tier": "paid"})
|
||||||
|
assert put_resp.status_code == 200
|
||||||
|
assert put_resp.json()["ok"] is True
|
||||||
|
|
||||||
|
get_resp = c.get("/api/settings/developer")
|
||||||
|
assert get_resp.status_code == 200
|
||||||
|
assert get_resp.json()["dev_tier_override"] == "paid"
|
||||||
|
|
||||||
|
|
||||||
|
def test_wizard_reset_returns_ok(tmp_path, monkeypatch):
|
||||||
|
"""POST /api/settings/developer/wizard-reset returns 200 with ok."""
|
||||||
|
db_dir = tmp_path / "db"
|
||||||
|
db_dir.mkdir()
|
||||||
|
cfg_dir = db_dir / "config"
|
||||||
|
cfg_dir.mkdir()
|
||||||
|
user_yaml = cfg_dir / "user.yaml"
|
||||||
|
_write_user_yaml(user_yaml, {"name": "Test", "wizard_complete": True})
|
||||||
|
monkeypatch.setenv("STAGING_DB", str(db_dir / "staging.db"))
|
||||||
|
|
||||||
|
from dev_api import app
|
||||||
|
c = TestClient(app)
|
||||||
|
resp = c.post("/api/settings/developer/wizard-reset")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert resp.json()["ok"] is True
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue