Compare commits
No commits in common. "main" and "feature/vue-streamlit-parity" have entirely different histories.
main
...
feature/vu
44
.cliff.toml
|
|
@ -1,44 +0,0 @@
|
|||
# git-cliff changelog configuration for Peregrine
|
||||
# See: https://git-cliff.org/docs/configuration
|
||||
|
||||
[changelog]
|
||||
header = """
|
||||
# Changelog\n
|
||||
"""
|
||||
body = """
|
||||
{% if version %}\
|
||||
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
|
||||
{% else %}\
|
||||
## [Unreleased]
|
||||
{% endif %}\
|
||||
{% for group, commits in commits | group_by(attribute="group") %}
|
||||
### {{ group | upper_first }}
|
||||
{% for commit in commits %}
|
||||
- {% if commit.scope %}**{{ commit.scope }}:** {% endif %}{{ commit.message | upper_first }}\
|
||||
{% endfor %}
|
||||
{% endfor %}\n
|
||||
"""
|
||||
trim = true
|
||||
|
||||
[git]
|
||||
conventional_commits = true
|
||||
filter_unconventional = true
|
||||
split_commits = false
|
||||
commit_preprocessors = []
|
||||
commit_parsers = [
|
||||
{ message = "^feat", group = "Features" },
|
||||
{ message = "^fix", group = "Bug Fixes" },
|
||||
{ message = "^perf", group = "Performance" },
|
||||
{ message = "^refactor", group = "Refactoring" },
|
||||
{ message = "^docs", group = "Documentation" },
|
||||
{ message = "^test", group = "Testing" },
|
||||
{ message = "^chore", group = "Chores" },
|
||||
{ message = "^ci", group = "CI/CD" },
|
||||
{ message = "^revert", group = "Reverts" },
|
||||
]
|
||||
filter_commits = false
|
||||
tag_pattern = "v[0-9].*"
|
||||
skip_tags = ""
|
||||
ignore_tags = ""
|
||||
topo_order = false
|
||||
sort_commits = "oldest"
|
||||
32
.env.example
|
|
@ -2,10 +2,9 @@
|
|||
# Auto-generated by the setup wizard, or fill in manually.
|
||||
# NEVER commit .env to git.
|
||||
|
||||
STREAMLIT_PORT=8502
|
||||
STREAMLIT_PORT=8501
|
||||
OLLAMA_PORT=11434
|
||||
VLLM_PORT=8000
|
||||
CF_TEXT_PORT=8006
|
||||
SEARXNG_PORT=8888
|
||||
VISION_PORT=8002
|
||||
VISION_MODEL=vikhyatk/moondream2
|
||||
|
|
@ -16,19 +15,10 @@ OLLAMA_MODELS_DIR=~/models/ollama
|
|||
VLLM_MODELS_DIR=~/models/vllm # override with full path to your model dir
|
||||
VLLM_MODEL=Ouro-1.4B # cover letters — fast 1.4B model
|
||||
VLLM_RESEARCH_MODEL=Ouro-2.6B-Thinking # research — reasoning 2.6B model; restart vllm to switch
|
||||
CF_TEXT_MODEL=/Library/Assets/LLM/qwen2.5-3b-instruct-q4_k_m.gguf # cf-text GGUF model; set to "mock" to disable
|
||||
VLLM_MAX_MODEL_LEN=4096 # increase to 8192 for Thinking models with long CoT
|
||||
VLLM_GPU_MEM_UTIL=0.75 # lower to 0.6 if sharing GPU with other services
|
||||
OLLAMA_DEFAULT_MODEL=llama3.2:3b
|
||||
|
||||
# ── LLM env-var auto-config (alternative to config/llm.yaml) ─────────────────
|
||||
# Set any of these to configure LLM backends without needing a config/llm.yaml.
|
||||
# Priority: Anthropic > OpenAI-compat > Ollama (always tried as local fallback).
|
||||
OLLAMA_HOST=http://localhost:11434 # Ollama host; override if on a different machine
|
||||
OLLAMA_MODEL=llama3.2:3b # model to request from Ollama
|
||||
OPENAI_MODEL=gpt-4o-mini # model override for OpenAI-compat backend
|
||||
ANTHROPIC_MODEL=claude-haiku-4-5-20251001 # model override for Anthropic backend
|
||||
|
||||
# API keys (required for remote profile)
|
||||
ANTHROPIC_API_KEY=
|
||||
OPENAI_COMPAT_URL=
|
||||
|
|
@ -41,26 +31,6 @@ FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
|||
# GITHUB_TOKEN= # future — enable when public mirror is active
|
||||
# GITHUB_REPO= # future
|
||||
|
||||
# ── CF-hosted coordinator (Paid+ tier) ───────────────────────────────────────
|
||||
# Set CF_LICENSE_KEY to authenticate with the hosted coordinator.
|
||||
# Leave both blank for local self-hosted cf-orch or bare-metal inference.
|
||||
CF_LICENSE_KEY=
|
||||
GPU_SERVER_URL=https://orch.circuitforge.tech
|
||||
# CF_ORCH_URL is also accepted as a backward-compat alias for GPU_SERVER_URL
|
||||
|
||||
# cf-orch agent — GPU profiles only (single-gpu, dual-gpu-*)
|
||||
# The agent registers this node with the cf-orch coordinator and reports VRAM stats.
|
||||
# CF_ORCH_COORDINATOR_URL: coordinator the agent registers with
|
||||
# CF_ORCH_NODE_ID: name shown on the dashboard (default: peregrine)
|
||||
# CF_ORCH_AGENT_PORT: host port for the agent HTTP server (default: 7701)
|
||||
# CF_ORCH_ADVERTISE_HOST: IP the coordinator uses to reach back to this agent.
|
||||
# Defaults to 127.0.0.1 (same-host coordinator).
|
||||
# Set to your host LAN IP for a remote coordinator.
|
||||
CF_ORCH_COORDINATOR_URL=http://localhost:7700
|
||||
CF_ORCH_NODE_ID=peregrine
|
||||
CF_ORCH_AGENT_PORT=7701
|
||||
#CF_ORCH_ADVERTISE_HOST=10.1.10.71
|
||||
|
||||
# Cloud multi-tenancy (compose.cloud.yml only — do not set for local installs)
|
||||
CLOUD_MODE=false
|
||||
CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||
|
|
|
|||
|
|
@ -1,63 +0,0 @@
|
|||
# Peregrine CI — lint, type-check, test on PR/push
|
||||
# Full-stack: FastAPI (Python) + Vue 3 SPA (Node)
|
||||
# Adapted from Circuit-Forge/cf-agents workflows/ci.yml (cf-agents#4 tracks the
|
||||
# upstream ci-fullstack.yml variant; update this file when that lands).
|
||||
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, 'feature/**', 'fix/**', 'freeze/**']
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
backend:
|
||||
name: Backend (Python)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
cache: pip
|
||||
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: Install lint tools
|
||||
run: pip install ruff
|
||||
|
||||
- name: Lint
|
||||
run: ruff check .
|
||||
|
||||
- name: Test
|
||||
run: pytest tests/ -v --tb=short
|
||||
|
||||
frontend:
|
||||
name: Frontend (Vue)
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: web
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: npm
|
||||
cache-dependency-path: web/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Type check
|
||||
run: npx vue-tsc --noEmit
|
||||
|
||||
- name: Test
|
||||
run: npm run test
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
# Mirror push to GitHub and Codeberg on every push to main or tag.
|
||||
# Copied from Circuit-Forge/cf-agents workflows/mirror.yml
|
||||
# Required secrets: GH_MIRROR_TOKEN, CODEBERG_MIRROR_TOKEN
|
||||
# Note: Forgejo reserves the GITHUB_* prefix for secret names — use GH_* instead.
|
||||
|
||||
name: Mirror
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
tags: ['v*']
|
||||
|
||||
jobs:
|
||||
mirror:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Mirror to GitHub
|
||||
env:
|
||||
GH_MIRROR_PAT: ${{ secrets.GH_MIRROR_TOKEN }}
|
||||
REPO: ${{ github.event.repository.name }}
|
||||
run: |
|
||||
git remote add github "https://x-access-token:${GH_MIRROR_PAT}@github.com/CircuitForgeLLC/${REPO}.git"
|
||||
git push github --mirror
|
||||
|
||||
- name: Mirror to Codeberg
|
||||
env:
|
||||
CODEBERG_TOKEN: ${{ secrets.CODEBERG_MIRROR_TOKEN }}
|
||||
REPO: ${{ github.event.repository.name }}
|
||||
run: |
|
||||
git remote add codeberg "https://CircuitForge:${CODEBERG_TOKEN}@codeberg.org/CircuitForge/${REPO}.git"
|
||||
git push codeberg --mirror
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
# Tag-triggered release workflow.
|
||||
# Generates changelog and creates Forgejo release on v* tags.
|
||||
# Copied from Circuit-Forge/cf-agents workflows/release.yml
|
||||
#
|
||||
# Docker push is intentionally disabled — BSL 1.1 registry policy not yet resolved.
|
||||
# Tracked in Circuit-Forge/cf-agents#3. Re-enable the Docker steps when that lands.
|
||||
#
|
||||
# Required secrets: FORGEJO_RELEASE_TOKEN
|
||||
# (GHCR_TOKEN not needed until Docker push is enabled)
|
||||
|
||||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags: ['v*']
|
||||
|
||||
jobs:
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
# ── Changelog ────────────────────────────────────────────────────────────
|
||||
- name: Generate changelog
|
||||
uses: orhun/git-cliff-action@v3
|
||||
id: cliff
|
||||
with:
|
||||
config: .cliff.toml
|
||||
args: --latest --strip header
|
||||
env:
|
||||
OUTPUT: CHANGES.md
|
||||
|
||||
# ── Docker (disabled — BSL registry policy pending cf-agents#3) ──────────
|
||||
# - name: Set up QEMU
|
||||
# uses: docker/setup-qemu-action@v3
|
||||
# - name: Set up Buildx
|
||||
# uses: docker/setup-buildx-action@v3
|
||||
# - name: Log in to GHCR
|
||||
# uses: docker/login-action@v3
|
||||
# with:
|
||||
# registry: ghcr.io
|
||||
# username: ${{ github.actor }}
|
||||
# password: ${{ secrets.GHCR_TOKEN }}
|
||||
# - name: Build and push Docker image
|
||||
# uses: docker/build-push-action@v6
|
||||
# with:
|
||||
# context: .
|
||||
# push: true
|
||||
# platforms: linux/amd64,linux/arm64
|
||||
# tags: |
|
||||
# ghcr.io/circuitforgellc/peregrine:${{ github.ref_name }}
|
||||
# ghcr.io/circuitforgellc/peregrine:latest
|
||||
# cache-from: type=gha
|
||||
# cache-to: type=gha,mode=max
|
||||
|
||||
# ── Forgejo Release ───────────────────────────────────────────────────────
|
||||
- name: Create Forgejo release
|
||||
env:
|
||||
FORGEJO_TOKEN: ${{ secrets.FORGEJO_RELEASE_TOKEN }}
|
||||
REPO: ${{ github.event.repository.name }}
|
||||
TAG: ${{ github.ref_name }}
|
||||
NOTES: ${{ steps.cliff.outputs.content }}
|
||||
run: |
|
||||
curl -sS -X POST \
|
||||
"https://git.opensourcesolarpunk.com/api/v1/repos/Circuit-Forge/${REPO}/releases" \
|
||||
-H "Authorization: token ${FORGEJO_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg tag "$TAG" --arg body "$NOTES" \
|
||||
'{tag_name: $tag, name: $tag, body: $body}')"
|
||||
51
.github/workflows/ci.yml
vendored
|
|
@ -1,7 +1,3 @@
|
|||
# Peregrine CI — runs on GitHub mirror for public credibility badge.
|
||||
# Forgejo (.forgejo/workflows/ci.yml) is the canonical CI — keep these in sync.
|
||||
# No Forgejo-specific secrets used here; circuitforge-core is public on Forgejo.
|
||||
|
||||
name: CI
|
||||
|
||||
on:
|
||||
|
|
@ -11,46 +7,29 @@ on:
|
|||
branches: [main]
|
||||
|
||||
jobs:
|
||||
backend:
|
||||
name: Backend (Python)
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
- name: Install system dependencies
|
||||
run: sudo apt-get update -q && sudo apt-get install -y libsqlcipher-dev
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
python-version: "3.11"
|
||||
cache: pip
|
||||
|
||||
- name: Configure git credentials for Forgejo
|
||||
env:
|
||||
FORGEJO_TOKEN: ${{ secrets.FORGEJO_TOKEN }}
|
||||
run: |
|
||||
git config --global url."https://oauth2:${FORGEJO_TOKEN}@git.opensourcesolarpunk.com/".insteadOf "https://git.opensourcesolarpunk.com/"
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: Lint
|
||||
run: ruff check .
|
||||
|
||||
- name: Test
|
||||
- name: Run tests
|
||||
run: pytest tests/ -v --tb=short
|
||||
|
||||
frontend:
|
||||
name: Frontend (Vue)
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: web
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: npm
|
||||
cache-dependency-path: web/package-lock.json
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Type check
|
||||
run: npx vue-tsc --noEmit
|
||||
|
||||
- name: Test
|
||||
run: npm run test
|
||||
|
|
|
|||
3
.gitignore
vendored
|
|
@ -40,11 +40,8 @@ pytest-output.txt
|
|||
docs/superpowers/
|
||||
|
||||
data/email_score.jsonl
|
||||
data/email_score.jsonl.bad-labels
|
||||
data/email_label_queue.jsonl
|
||||
data/email_compare_sample.jsonl
|
||||
data/.feedback_ratelimit.json
|
||||
data/config/
|
||||
|
||||
config/label_tool.yaml
|
||||
config/server.yaml
|
||||
|
|
|
|||
257
CHANGELOG.md
|
|
@ -9,263 +9,6 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|||
|
||||
---
|
||||
|
||||
## [0.9.5] — 2026-05-08
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Theme: dark/explicit themes show correct page background** — `index.html` inline style
|
||||
set `html, body { background: #eaeff8 }` hardcoded. `body` paints on top of `html`, so
|
||||
even when `html { background: var(--color-surface) }` correctly resolved to `#16202e` in
|
||||
dark mode, the body's hardcoded light background covered it — producing dark cards on a
|
||||
light page. Fixed by: (1) removing body background from the inline style; (2) adding a
|
||||
tiny blocking `<script>` that reads `cf-theme` / `cf-hacker-mode` from localStorage and
|
||||
sets `data-theme` on `<html>` before first paint; (3) adding
|
||||
`html[data-theme="dark"|"solarized-dark"|"hacker"]` rules so FOUT prevention fires the
|
||||
right background immediately on load.
|
||||
|
||||
---
|
||||
|
||||
## [0.9.4] — 2026-05-08
|
||||
|
||||
### Added
|
||||
|
||||
- **Messages view — expandable email timeline** — click any email item to lazy-load
|
||||
and read the full body inline (HTML stripped to plain text via `DOMParser`).
|
||||
Bodies are fetched on-demand via the new `GET /api/contacts/{id}` endpoint to avoid
|
||||
loading 50KB+ email bodies on every page view.
|
||||
- **Messages view — compose bar** — action buttons (Log call, Log note, Use template,
|
||||
Draft reply with LLM, Call via Osprey) moved from the always-visible header into a
|
||||
sticky bottom compose bar triggered by a + New toggle. Reduces visual clutter when
|
||||
just reading the thread.
|
||||
- **Home view — "Skip review" checkbox** — when adding jobs by URL, a checkbox (default
|
||||
on) sends them directly to the Apply queue, bypassing Job Review.
|
||||
- **ContactsView — sync status** — shows last completed sync time and a spinner when
|
||||
an email sync is running.
|
||||
- **imap_sync: Indeed alert parser** — `parse_indeed_alert()` extracts job title,
|
||||
company, location, salary, and canonical URL from Indeed Job Alert digest emails.
|
||||
- **scrape_url: Oracle HCM support** — Playwright-based scraper for Oracle HCM
|
||||
CandidateExperience portals (React SPAs requiring JS execution).
|
||||
- **manage.sh** — compose engine auto-detection (docker compose / podman compose /
|
||||
podman-compose), `build` command, and cloud/demo stack shortcuts.
|
||||
- **theme.css** — `--color-overlay` token for modal/dialog backdrops.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Messages view layout** — changed `height: 100%` to `height: 100dvh` with a mobile
|
||||
override for the 56px tab bar. `height: 100%` was resolving to "shrink-wrap" because
|
||||
`.app-main` has no explicit height; compose bar is now correctly pinned to the bottom.
|
||||
- **Accessibility: danger button contrast** — `btn--danger` used `color: white` on
|
||||
`--app-accent` (Talon Orange), yielding 2.8:1 contrast (fails WCAG AA 4.5:1 for
|
||||
normal text). Fixed to `color: var(--app-accent-text)` (dark navy, 5.5:1).
|
||||
- **Accessibility: warning badge contrast** — `tab-badge` in Job Review used `color: white`
|
||||
on `--color-warning` (amber). Same fix applied.
|
||||
- **Theme: Interviews signal banners** — hardcoded `rgba(245,158,11,…)` / `rgba(39,174,…)`
|
||||
/ `rgba(192,57,…)` replaced with `color-mix()` against `--color-warning/success/error`.
|
||||
- **Theme: Interviews signal count** — `color: #e67e22` hardcode replaced with
|
||||
`var(--app-accent)`.
|
||||
- **Theme: References academic tag chip** — `color: #7c3aed` hardcode replaced with
|
||||
`var(--status-synced)`; background uses `color-mix()` with the same token.
|
||||
- **Theme: Interviews signal-move button** — `color: #fff` on `--color-primary` fails
|
||||
in dark mode (light green bg); fixed to `var(--color-text-inverse)`.
|
||||
- **Modal backdrops** — `rgba(0,0,0,0.5)` replaced with `var(--color-overlay)` for
|
||||
theme consistency.
|
||||
|
||||
---
|
||||
|
||||
## [0.9.3] — 2026-05-05
|
||||
|
||||
### Added
|
||||
|
||||
- **Editable resume review** — proposed summary and experience bullets in the review modal
|
||||
are now editable text areas. Edits flow through `apply_review_decisions()` and override
|
||||
the LLM output in the final resume struct. Preview textarea in Apply Workspace is also
|
||||
editable, with manual changes preserved through the approve step via `preview_text_override`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Double bullets in resume optimizer** — `_section_text_for_prompt` now strips existing
|
||||
bullet characters before prefixing with `•`, and `_reparse_experience_bullets` uses a
|
||||
greedy strip regex so `• •` patterns can no longer survive parsing.
|
||||
- **Asterisk markup in summary** — added `_clean_summary_markup()` to strip LLM-generated
|
||||
markdown bullet chars (`*`, `-`, etc.) from career summary output; injected no-markdown
|
||||
rule into the LLM prompt's CRITICAL RULES list.
|
||||
- **Light theme dark CSS bleed** — `peregrine.css` media dark override now scoped to
|
||||
`:root:not([data-theme])` (auto mode only) instead of `:root:not([data-theme="hacker"])`.
|
||||
Fixes dark navy `--app-primary-light`/`--app-accent-light` bleeding into light themes
|
||||
(light, solarized-light, colorblind) on dark-OS machines.
|
||||
|
||||
---
|
||||
|
||||
## [0.9.2] — 2026-05-02
|
||||
|
||||
### Added
|
||||
|
||||
- **Cover letter training export** (#111) — opt-in consent gate (`training_export_opt_in`
|
||||
in `user.yaml`, default off) lets users export applied-job cover letters as Alpaca-format
|
||||
JSONL for local fine-tuning. Per-job exclude/restore curation in Settings → Fine-Tune.
|
||||
Streaming JSONL download merges DB pairs with any previously uploaded file pairs.
|
||||
Cloud fine-tune Phase 2 stub (501) reserved for cf-orch integration.
|
||||
- **WizardTrainingStep** — new onboarding consent step inserted between Resume and Identity;
|
||||
skippable, opt-in default off, cloud-aware privacy copy.
|
||||
- **a11y:** confirmed-state toggle (no optimistic DOM divergence), visible Premium tier gate
|
||||
with upgrade link, `aria-live` region on pairs list, cloud-aware consent copy.
|
||||
|
||||
---
|
||||
|
||||
## [0.9.0] — 2026-04-20
|
||||
|
||||
### Added
|
||||
|
||||
- **Messaging tab** (#74) — per-job communication timeline replacing `/contacts`.
|
||||
Unified view of IMAP emails (`job_contacts`) and manually logged entries (`messages`).
|
||||
Log calls and in-person notes with timestamp. Message template library with 4 built-in
|
||||
templates (follow-up, thank-you, accommodation request, withdrawal) and user-created
|
||||
templates with `{{token}}` substitution. LLM draft reply for inbound emails (BYOK-unlockable,
|
||||
BSL 1.1). Draft approval flow with inline editing and one-click clipboard copy. Osprey
|
||||
IVR stub button (Phase 2 placeholder with easter egg). `migrations/008_messaging.sql`.
|
||||
- **Public demo experience** (#103) — full read-only demo mode at `demo.circuitforge.tech/peregrine`.
|
||||
`IS_DEMO=true` write-blocks all mutating API endpoints with a toast notification.
|
||||
Ephemeral seed data via tmpfs + `demo/seed.sql` (resets on container start). WelcomeModal
|
||||
on first visit (localStorage-gated). Per-view HintChips guiding new users through the
|
||||
job search flow (localStorage-dismissed). DemoBanner with accessible CTA buttons
|
||||
(WCAG-compliant contrast in light and dark themes). `migrations/006_missing_columns.sql`.
|
||||
- **References tracker and recommendation letter system** (#96) — track professional
|
||||
references and generate LLM-drafted recommendation request letters.
|
||||
- **Shadow listing detector** — flags duplicate or aggregator-reposted job listings.
|
||||
- **Hired feedback widget** — capture post-hire notes and retrospective feedback on jobs.
|
||||
- **Interview prep Q&A** — LLM-generated practice questions for the selected job.
|
||||
- **Resume library ↔ profile sync** — `POST /api/resumes/{id}/apply-to-profile` pushes
|
||||
a library resume into the active profile; `PUT /api/settings/resume` syncs edits back
|
||||
to the default library entry. `ResumeSyncConfirmModal` shows a before/after diff.
|
||||
`ResumeProfileView` extended with career summary, education, and achievements sections.
|
||||
`migrations/007_resume_sync.sql` adds `synced_at` to `resumes`.
|
||||
- **Plausible analytics** — lightweight privacy-preserving analytics in Vue SPA and docs.
|
||||
- **cf_text / cf_voice LLM backends** — wire trunk service backends in `llm.yaml`.
|
||||
- **Mission alignment domains** — load preferred company domains from
|
||||
`config/mission_domains.yaml` rather than hardcoded values.
|
||||
- **GitHub Actions CI** — workflow for public credibility badge (`ci.yml`).
|
||||
- **`CF_APP_NAME` cloud annotation** — coordinator pipeline attribution for multi-product
|
||||
cloud deployments.
|
||||
|
||||
### Changed
|
||||
|
||||
- `/contacts` route now redirects to `/messages`; nav item renamed "Messages" → "Contacts"
|
||||
label removed. `ContactsView.vue` preserved for reference, router points to `MessagingView`.
|
||||
- Survey `/analyze` endpoint is now fully async via the task queue (no blocking LLM call
|
||||
on the request thread).
|
||||
- nginx config adds `/peregrine/` base-path routing for subdirectory deployments.
|
||||
- `compose.demo.yml` updated for Vue/FastAPI architecture with tmpfs demo volume.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Tier bypass and draft body persistence after page navigation.
|
||||
- `canDraftLlm` cleanup and message list `limit` cap.
|
||||
- DemoBanner button contrast — semantic surface token instead of hardcoded white.
|
||||
- Period split in `profile_to_library` now handles ISO date strings containing hyphens.
|
||||
- Cloud startup sweeps all user DBs for pending migrations on deploy.
|
||||
- Resume import strips CID glyph references via `resume_parser` extractors.
|
||||
- Survey and interview tests updated for `hired_feedback` column and async analyze flow.
|
||||
|
||||
---
|
||||
|
||||
## [0.8.6] — 2026-04-12
|
||||
|
||||
### Added
|
||||
|
||||
- **Resume Review Modal** — paged tabbed dialog replaces the inline resume review
|
||||
section in the Apply workspace. Pages through Skills diff, Summary diff, one page
|
||||
per experience entry, and a Confirm summary. Color-coded tab status: unvisited
|
||||
(gray), in-progress (indigo), accepted (green), partial (amber), skipped (slate).
|
||||
Full ARIA tabs pattern with focus trap and `Teleport to body`.
|
||||
- **Resume Library** — new `/resumes` page for managing saved resumes. Two-column
|
||||
layout: list sidebar + full-text preview pane. Supports import (.txt, .pdf, .docx,
|
||||
.odt, .yaml), rename (Edit), set as default, download (txt/pdf/yaml), and delete
|
||||
(guarded: disabled when only resume or is default). 5 MB upload limit.
|
||||
- **ResumeLibraryCard** — compact widget shown above the ATS Resume Optimizer in the
|
||||
Apply workspace. Displays the currently active resume for the job (job-specific or
|
||||
global default), with Switch and Manage deep links.
|
||||
- **Resume library API** — `GET/POST /api/resumes`, `GET/PATCH/DELETE /api/resumes/{id}`,
|
||||
`POST /api/resumes/{id}/set-default`, `POST /api/resumes/import`,
|
||||
`GET/PATCH /api/jobs/{job_id}/resume`. `approve_resume` extended with
|
||||
`save_to_library` + `resume_name` params to save optimized resumes directly.
|
||||
- **`resumes` DB migration** — `migrations/005_resumes_table.sql` adds `resumes` table
|
||||
(10 columns) and `resume_id` FK on `jobs`.
|
||||
- **Resumes nav link** — Document icon entry added after Apply in the main nav.
|
||||
|
||||
### Changed
|
||||
|
||||
- Resume optimizer "Awaiting review" state now triggers the Review Modal instead of
|
||||
rendering an inline diff; save-to-library checkbox and name input surfaced on the
|
||||
preview confirmation step.
|
||||
|
||||
---
|
||||
|
||||
## [0.8.5] — 2026-04-02
|
||||
|
||||
### Added
|
||||
|
||||
- **Vue onboarding wizard** — 7-step first-run setup replaces the Streamlit wizard
|
||||
in the Vue SPA: Hardware detection → Tier → Resume upload/build → Identity →
|
||||
Inference & API keys → Search preferences → Integrations. Progress saves to
|
||||
`user.yaml` on every step; crash-recovery resumes from the last completed step.
|
||||
- **Wizard API endpoints** — `GET /api/wizard/status`, `POST /api/wizard/step`,
|
||||
`GET /api/wizard/hardware`, `POST /api/wizard/inference/test`,
|
||||
`POST /api/wizard/complete`. Inference test always soft-fails so Ollama being
|
||||
unreachable never blocks setup completion.
|
||||
- **Cloud auto-skip** — cloud instances automatically complete steps 1 (hardware),
|
||||
2 (tier), and 5 (inference) and drop the user directly on the Resume step.
|
||||
- **`wizardGuard` router gate** — all Vue routes require wizard completion; completed
|
||||
users are bounced away from `/setup` to `/`.
|
||||
- **Chip-input search step** — job titles and locations entered as press-Enter/comma
|
||||
chips; validates at least one title before advancing.
|
||||
- **Integrations tile grid** — optional step 7 shows Notion, Calendar, Slack, Discord,
|
||||
Drive with paid-tier badges; skippable on Finish.
|
||||
|
||||
### Fixed
|
||||
|
||||
- **User config isolation: dangerous fallback removed** — `_user_yaml_path()` fell
|
||||
back to `/devl/job-seeker/config/user.yaml` (legacy profile) when `user.yaml`
|
||||
didn't exist at the expected path; new users now get an empty dict instead of
|
||||
another user's data. Affects profile, resume, search, and all wizard endpoints.
|
||||
- **Resume path not user-isolated** — `RESUME_PATH = Path("config/plain_text_resume.yaml")`
|
||||
was a relative CWD path shared across all users. Replaced with `_resume_path()`
|
||||
derived from `_user_yaml_path()` / `STAGING_DB`.
|
||||
- **Resume upload silently returned empty data** — `upload_resume` was passing a
|
||||
file path string to `structure_resume()` which expects raw text; now reads bytes
|
||||
and dispatches to the correct extractor (`extract_text_from_pdf` / `_docx` / `_odt`).
|
||||
- **Wizard resume step read wrong envelope field** — `WizardResumeStep.vue` read
|
||||
`data.experience` but the upload response wraps parsed data under `data.data`.
|
||||
|
||||
---
|
||||
|
||||
## [0.8.4] — 2026-04-02
|
||||
|
||||
### Fixed
|
||||
|
||||
- **Cloud: cover letter used wrong user's profile** — `generate_cover_letter.generate()`
|
||||
loaded `_profile` from the global `config/user.yaml` at module import time, so all
|
||||
cloud users got the default user's name, voice, and mission preferences in their
|
||||
generated letters. `generate()` now accepts a `user_yaml_path` parameter; `task_runner`
|
||||
derives it from the per-user config directory (`db_path/../config/user.yaml`) and
|
||||
passes it through. `_build_system_context`, `_build_mission_notes`, `detect_mission_alignment`,
|
||||
`build_prompt`, and `_trim_to_letter_end` all accept a `profile` override so the
|
||||
per-call profile is used end-to-end without breaking CLI mode.
|
||||
- **Apply Workspace: hardcoded config paths in cloud mode** — `4_Apply.py` was loading
|
||||
`_USER_YAML` and `RESUME_YAML` from the repo-root `config/` before `resolve_session()`
|
||||
ran, so cloud users saw the global (Meg's) resume in the Apply tab. Both paths now
|
||||
derive from `get_config_dir()` after session resolution.
|
||||
|
||||
### Changed
|
||||
|
||||
- **Vue SPA open to all tiers** — Vue 3 frontend is no longer gated behind the beta
|
||||
flag; all tier users can switch to the Vue UI from Settings.
|
||||
- **LLM model candidates** — vllm backend now tries Qwen2.5-3B first, Phi-4-mini
|
||||
as fallback (was reversed). cf_orch allocation block added to vllm config.
|
||||
- **Preflight** — removed `vllm` from Docker adoption list; vllm is now managed
|
||||
entirely by cf-orch and should not be stubbed by preflight.
|
||||
|
||||
---
|
||||
|
||||
## [0.8.3] — 2026-04-01
|
||||
|
||||
### Fixed
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ full instructions.
|
|||
```bash
|
||||
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine.git
|
||||
cd peregrine
|
||||
./install.sh # installs deps, activates git hooks
|
||||
./setup.sh # installs deps, activates git hooks
|
||||
./manage.sh start
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ WORKDIR /app
|
|||
# System deps for companyScraper (beautifulsoup4, fake-useragent, lxml) and PDF gen
|
||||
# libsqlcipher-dev: required to build pysqlcipher3 (SQLCipher AES-256 encryption for cloud mode)
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc libffi-dev curl libsqlcipher-dev git \
|
||||
gcc libffi-dev curl libsqlcipher-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
|
|
|
|||
|
|
@ -26,12 +26,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
COPY circuitforge-core/ /circuitforge-core/
|
||||
RUN pip install --no-cache-dir /circuitforge-core
|
||||
|
||||
# circuitforge-orch client — needed for LLMRouter cf_orch allocation.
|
||||
# Optional: if the directory doesn't exist the COPY will fail at build time; keep
|
||||
# cf-orch as a sibling of peregrine in the build context.
|
||||
COPY circuitforge-orch/ /circuitforge-orch/
|
||||
RUN pip install --no-cache-dir /circuitforge-orch
|
||||
|
||||
COPY peregrine/requirements.txt .
|
||||
# Skip the cfcore line — already installed above from the local copy
|
||||
RUN grep -v 'circuitforge-core' requirements.txt | pip install --no-cache-dir -r /dev/stdin
|
||||
|
|
@ -45,13 +39,6 @@ COPY peregrine/scrapers/ /app/scrapers/
|
|||
|
||||
COPY peregrine/ .
|
||||
|
||||
# Remove per-user config files that are gitignored but may exist locally.
|
||||
# Defense-in-depth: the parent .dockerignore should already exclude these,
|
||||
# but an explicit rm guarantees they never end up in the cloud image.
|
||||
RUN rm -f config/user.yaml config/plain_text_resume.yaml config/notion.yaml \
|
||||
config/email.yaml config/tokens.yaml config/craigslist.yaml \
|
||||
config/adzuna.yaml .env
|
||||
|
||||
EXPOSE 8501
|
||||
|
||||
CMD ["streamlit", "run", "app/app.py", \
|
||||
|
|
|
|||
|
|
@ -1,153 +0,0 @@
|
|||
# Peregrine → xanderland.tv Setup Handoff
|
||||
|
||||
**Written from:** dev machine (CircuitForge dev env)
|
||||
**Target:** xanderland.tv (beta tester, rootful Podman + systemd)
|
||||
**Date:** 2026-02-27
|
||||
|
||||
---
|
||||
|
||||
## What we're doing
|
||||
|
||||
Getting Peregrine running on the beta tester's server as a Podman container managed by systemd. He already runs SearXNG and other services in the same style — rootful Podman with `--net=host`, `--restart=unless-stopped`, registered as systemd units.
|
||||
|
||||
The script `podman-standalone.sh` in the repo root handles the container setup.
|
||||
|
||||
---
|
||||
|
||||
## Step 1 — Get the repo onto xanderland.tv
|
||||
|
||||
From navi (or directly if you have a route):
|
||||
|
||||
```bash
|
||||
ssh xanderland.tv "sudo git clone <repo-url> /opt/peregrine"
|
||||
```
|
||||
|
||||
Or if it's already there, just pull:
|
||||
|
||||
```bash
|
||||
ssh xanderland.tv "cd /opt/peregrine && sudo git pull"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 2 — Verify /opt/peregrine looks right
|
||||
|
||||
```bash
|
||||
ssh xanderland.tv "ls /opt/peregrine"
|
||||
```
|
||||
|
||||
Expect to see: `Dockerfile`, `compose.yml`, `manage.sh`, `podman-standalone.sh`, `config/`, `app/`, `scripts/`, etc.
|
||||
|
||||
---
|
||||
|
||||
## Step 3 — Config
|
||||
|
||||
```bash
|
||||
ssh xanderland.tv
|
||||
cd /opt/peregrine
|
||||
sudo mkdir -p data
|
||||
sudo cp config/llm.yaml.example config/llm.yaml
|
||||
sudo cp config/notion.yaml.example config/notion.yaml # only if he wants Notion sync
|
||||
```
|
||||
|
||||
Then edit `config/llm.yaml` and set `searxng_url` to his existing SearXNG instance
|
||||
(default is `http://localhost:8888` — confirm his actual port).
|
||||
|
||||
He won't need Anthropic/OpenAI keys to start — the setup wizard lets him pick local Ollama
|
||||
or whatever he has running.
|
||||
|
||||
---
|
||||
|
||||
## Step 4 — Fix DOCS_DIR in the script
|
||||
|
||||
The script defaults `DOCS_DIR=/Library/Documents/JobSearch` which is the original user's path.
|
||||
Update it to wherever his job search documents actually live, or a placeholder empty dir:
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /opt/peregrine/docs # placeholder if he has no docs yet
|
||||
```
|
||||
|
||||
Then edit the script:
|
||||
```bash
|
||||
sudo sed -i 's|DOCS_DIR=.*|DOCS_DIR=/opt/peregrine/docs|' /opt/peregrine/podman-standalone.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5 — Build the image
|
||||
|
||||
```bash
|
||||
ssh xanderland.tv "cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest ."
|
||||
```
|
||||
|
||||
Takes a few minutes on first run (downloads python:3.11-slim, installs deps).
|
||||
|
||||
---
|
||||
|
||||
## Step 6 — Run the script
|
||||
|
||||
```bash
|
||||
ssh xanderland.tv "sudo bash /opt/peregrine/podman-standalone.sh"
|
||||
```
|
||||
|
||||
This starts a single container (`peregrine`) with `--net=host` and `--restart=unless-stopped`.
|
||||
SearXNG is NOT included — his existing instance is used.
|
||||
|
||||
Verify it came up:
|
||||
```bash
|
||||
ssh xanderland.tv "sudo podman ps | grep peregrine"
|
||||
ssh xanderland.tv "sudo podman logs peregrine"
|
||||
```
|
||||
|
||||
Health check endpoint: `http://xanderland.tv:8501/_stcore/health`
|
||||
|
||||
---
|
||||
|
||||
## Step 7 — Register as a systemd service
|
||||
|
||||
```bash
|
||||
ssh xanderland.tv
|
||||
sudo podman generate systemd --new --name peregrine \
|
||||
| sudo tee /etc/systemd/system/peregrine.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now peregrine
|
||||
```
|
||||
|
||||
Confirm:
|
||||
```bash
|
||||
sudo systemctl status peregrine
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 8 — First-run wizard
|
||||
|
||||
Open `http://xanderland.tv:8501` in a browser.
|
||||
|
||||
The setup wizard (page 0) will gate the app until `config/user.yaml` is created.
|
||||
He'll fill in his profile — name, resume, LLM backend preferences. This writes
|
||||
`config/user.yaml` and unlocks the rest of the UI.
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Symptom | Check |
|
||||
|---------|-------|
|
||||
| Container exits immediately | `sudo podman logs peregrine` — usually a missing config file |
|
||||
| Port 8501 already in use | `sudo ss -tlnp \| grep 8501` — something else on that port |
|
||||
| SearXNG not reachable | Confirm `searxng_url` in `config/llm.yaml` and that JSON format is enabled in SearXNG settings |
|
||||
| Wizard loops / won't save | `config/` volume mount permissions — `sudo chown -R 1000:1000 /opt/peregrine/config` |
|
||||
|
||||
---
|
||||
|
||||
## To update Peregrine later
|
||||
|
||||
```bash
|
||||
cd /opt/peregrine
|
||||
sudo git pull
|
||||
sudo podman build -t localhost/peregrine:latest .
|
||||
sudo podman restart peregrine
|
||||
```
|
||||
|
||||
No need to touch the systemd unit — it launches fresh via `--new` in the generate step.
|
||||
2
Makefile
|
|
@ -45,7 +45,7 @@ endif
|
|||
PROFILE_ARG := $(if $(filter remote,$(PROFILE)),,--profile $(PROFILE))
|
||||
|
||||
setup: ## Install dependencies (Docker or Podman + NVIDIA toolkit)
|
||||
@bash install.sh
|
||||
@bash setup.sh
|
||||
|
||||
preflight: ## Check ports + system resources; write .env
|
||||
@$(PYTHON) scripts/preflight.py
|
||||
|
|
|
|||
253
README.md
|
|
@ -1,143 +1,195 @@
|
|||
<div align="center">
|
||||
<img src="web/public/peregrine.svg" alt="Peregrine" width="120" />
|
||||
# Peregrine
|
||||
|
||||
<h1>Peregrine</h1>
|
||||
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
|
||||
|
||||
<p><strong>Job search pipeline — by <a href="https://circuitforge.tech">Circuit Forge LLC</a></strong></p>
|
||||
[](./LICENSE-BSL)
|
||||
[](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml)
|
||||
|
||||
<p><em>AI for the tasks the system made hard on purpose.</em></p>
|
||||
**Job search pipeline — by [Circuit Forge LLC](https://circuitforge.tech)**
|
||||
|
||||
[](#license)
|
||||
[](https://github.com/CircuitForgeLLC/peregrine/actions/workflows/ci.yml)
|
||||
[](https://docs.circuitforge.tech/peregrine/)
|
||||
[](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/releases)
|
||||
|
||||
<p>
|
||||
<a href="https://demo.circuitforge.tech/peregrine"><strong>Live Demo</strong></a> —
|
||||
no account required, nothing saved |
|
||||
<a href="https://docs.circuitforge.tech/peregrine/">Docs</a> |
|
||||
<a href="https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues">Issues</a>
|
||||
</p>
|
||||
|
||||
<blockquote>
|
||||
<strong>Primary development</strong> happens at
|
||||
<a href="https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine">git.opensourcesolarpunk.com/Circuit-Forge/peregrine</a>.
|
||||
GitHub and Codeberg are push mirrors. Issues and PRs are welcome on any platform.
|
||||
</blockquote>
|
||||
</div>
|
||||
> *"Tools for the jobs that the system made hard on purpose."*
|
||||
|
||||
---
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><img src="docs/screenshots/01-dashboard.png" alt="Dashboard with pipeline stats and discovery controls"/></td>
|
||||
<td><img src="docs/screenshots/02-review.png" alt="Job review — approve, skip, or reject with keyboard shortcuts"/></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><img src="docs/screenshots/03-apply.png" alt="Apply workspace with LLM-drafted cover letter"/></td>
|
||||
<td><img src="docs/screenshots/04-interviews.png" alt="Interview kanban with company research and recruiter emails"/></td>
|
||||
</tr>
|
||||
</table>
|
||||
Job search is a second job nobody hired you for.
|
||||
|
||||
---
|
||||
ATS filters designed to reject. Job boards that show the same listing eight times. Cover letter number forty-seven for a role that might already be filled. Hours of prep for a phone screen that lasts twelve minutes.
|
||||
|
||||
## Why Peregrine?
|
||||
Peregrine handles the pipeline — discovery, matching, tracking, drafting, and prep — so you can spend your time doing the work you actually want to be doing.
|
||||
|
||||
Job search is a second job nobody hired you for. ATS (applicant tracking system) filters designed to reject. Boards that show the same listing eight times. Cover letter number forty-seven for a role that might already be filled. Hours of prep for a phone screen that lasts twelve minutes.
|
||||
**LLM support is optional.** The full discovery and tracking pipeline works without one. When you do configure a backend, the LLM drafts the parts that are genuinely miserable — cover letters, company research briefs, interview prep sheets — and waits for your approval before anything goes anywhere.
|
||||
|
||||
- **Handles the full pipeline.** Discover, filter, match, draft, track — one tool, one database, no duct tape.
|
||||
- **LLM is optional and local-first.** Discovery and tracking work with no LLM at all. When you do configure one, it runs on your hardware by default. Cloud inference is a fallback, not the default path.
|
||||
- **Ghost-post detection baked in.** Listings that have been open too long or look like sourcing traps get flagged before you spend time on them.
|
||||
- **Human approval at every step.** LLM drafts cover letters and research briefs; you approve before anything goes anywhere. Peregrine never submits an application on your behalf.
|
||||
- **Privacy · Safety · Accessibility** are architectural constraints, not aspirational copy. No PII (personally identifiable information) logging, no behavioral profiling, no dark patterns.
|
||||
### What Peregrine does not do
|
||||
|
||||
Peregrine does **not** submit job applications for you. You still have to go to each employer's site and click apply yourself.
|
||||
|
||||
This is intentional. Automated mass-applying is a bad experience for everyone — it's also a trust violation with employers who took the time to post a real role. Peregrine is a preparation and organization tool, not a bot.
|
||||
|
||||
What it *does* cover is everything before and after that click: finding the jobs, matching them against your resume, generating cover letters and prep materials, and once you've applied — tracking where you stand, classifying the emails that come back, and surfacing company research when an interview lands on your calendar. The submit button is yours. The rest of the grind is ours.
|
||||
|
||||
> **Exception:** [AIHawk](https://github.com/nicolomantini/LinkedIn-Easy-Apply) is a separate, optional tool that handles LinkedIn Easy Apply automation. Peregrine integrates with it for AIHawk-compatible profiles, but it is not part of Peregrine's core pipeline.
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
One-line install:
|
||||
|
||||
```bash
|
||||
bash <(curl -fsSL https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/raw/branch/main/install.sh)
|
||||
```
|
||||
|
||||
Or clone and run manually:
|
||||
**1. Clone and install dependencies** (Docker, NVIDIA toolkit if needed):
|
||||
|
||||
```bash
|
||||
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
|
||||
cd peregrine
|
||||
./manage.sh setup
|
||||
./manage.sh start
|
||||
```
|
||||
|
||||
Open **http://localhost:8502** — the setup wizard walks you through the rest.
|
||||
|
||||
> **macOS / Apple Silicon:** install Ollama natively via Homebrew before starting for Metal GPU-accelerated inference. `install.sh` handles this automatically.
|
||||
> **Windows:** use WSL2 with Ubuntu.
|
||||
|
||||
### Inference profiles
|
||||
**2. Start Peregrine:**
|
||||
|
||||
```bash
|
||||
./manage.sh start # remote — no GPU; LLM calls go to Anthropic / OpenAI
|
||||
./manage.sh start --profile cpu # local Ollama on CPU (or Metal via native Ollama on macOS)
|
||||
./manage.sh start --profile single-gpu # Ollama + vision on GPU 0 (NVIDIA only)
|
||||
./manage.sh start --profile dual-gpu # Ollama + vLLM on two NVIDIA GPUs
|
||||
./manage.sh start # remote profile (API-only, no GPU)
|
||||
./manage.sh start --profile cpu # local Ollama (CPU, or Metal GPU on Apple Silicon — see below)
|
||||
./manage.sh start --profile single-gpu # Ollama + Vision on GPU 0 (NVIDIA only)
|
||||
./manage.sh start --profile dual-gpu # Ollama + Vision + vLLM (GPU 0 + 1) (NVIDIA only)
|
||||
```
|
||||
|
||||
Or use `make` directly:
|
||||
|
||||
```bash
|
||||
make start # remote profile
|
||||
make start PROFILE=single-gpu
|
||||
```
|
||||
|
||||
**3.** Open http://localhost:8501 — the setup wizard guides you through the rest.
|
||||
|
||||
> **macOS / Apple Silicon:** Docker Desktop must be running. For Metal GPU-accelerated inference, install Ollama natively before starting — `setup.sh` will prompt you to do this. See [Apple Silicon GPU](#apple-silicon-gpu) below.
|
||||
> **Windows:** Not supported — use WSL2 with Ubuntu.
|
||||
|
||||
### Installing to `/opt` or other system directories
|
||||
|
||||
If you clone into a root-owned directory (e.g. `sudo git clone ... /opt/peregrine`), two things need fixing:
|
||||
|
||||
**1. Git ownership warning** (`fatal: detected dubious ownership`) — `./manage.sh setup` fixes this automatically. If you need git to work *before* running setup:
|
||||
|
||||
```bash
|
||||
git config --global --add safe.directory /opt/peregrine
|
||||
```
|
||||
|
||||
**2. Preflight write access** — preflight writes `.env` and `compose.override.yml` into the repo directory. Fix ownership once:
|
||||
|
||||
```bash
|
||||
sudo chown -R $USER:$USER /opt/peregrine
|
||||
```
|
||||
|
||||
After that, run everything without `sudo`.
|
||||
|
||||
### Podman
|
||||
|
||||
Podman is rootless by default — **no `sudo` needed.** `./manage.sh setup` will configure `podman-compose` if it isn't already present.
|
||||
|
||||
### Docker
|
||||
|
||||
After `./manage.sh setup`, log out and back in for docker group membership to take effect. Until then, prefix commands with `sudo`. After re-login, `sudo` is no longer required.
|
||||
|
||||
---
|
||||
|
||||
## Inference Profiles
|
||||
|
||||
| Profile | Services started | Use case |
|
||||
|---------|-----------------|----------|
|
||||
| `remote` | app + searxng | No GPU; LLM calls go to Anthropic / OpenAI |
|
||||
| `cpu` | app + ollama + searxng | No GPU; local models on CPU. On Apple Silicon, use with native Ollama for Metal acceleration — see below. |
|
||||
| `single-gpu` | app + ollama + vision + searxng | One **NVIDIA** GPU: cover letters, research, vision |
|
||||
| `dual-gpu` | app + ollama + vllm + vision + searxng | Two **NVIDIA** GPUs: GPU 0 = Ollama, GPU 1 = vLLM |
|
||||
|
||||
### Apple Silicon GPU
|
||||
|
||||
Docker Desktop on macOS runs in a Linux VM — it cannot access the Apple GPU. Metal-accelerated inference requires Ollama to run **natively** on the host.
|
||||
|
||||
`setup.sh` handles this automatically: it offers to install Ollama via Homebrew, starts it as a background service, and explains what happens next. If Ollama is running on port 11434 when you start Peregrine, preflight detects it, stubs out the Docker Ollama container, and routes inference through the native process — which uses Metal automatically.
|
||||
|
||||
To do it manually:
|
||||
|
||||
```bash
|
||||
brew install ollama
|
||||
brew services start ollama # starts at login, uses Metal GPU
|
||||
./manage.sh start --profile cpu # preflight adopts native Ollama; Docker container is skipped
|
||||
```
|
||||
|
||||
The `cpu` profile label is a slight misnomer in this context — Ollama will be running on the GPU. `single-gpu` and `dual-gpu` profiles are NVIDIA-specific and not applicable on Mac.
|
||||
|
||||
---
|
||||
|
||||
## First-Run Wizard
|
||||
|
||||
On first launch the setup wizard walks through seven steps:
|
||||
|
||||
1. **Hardware** — detects NVIDIA GPUs (Linux) or Apple Silicon GPU (macOS) and recommends a profile
|
||||
2. **Tier** — choose free, paid, or premium (or use `dev_tier_override` for local testing)
|
||||
3. **Identity** — name, email, phone, LinkedIn, career summary
|
||||
4. **Resume** — upload a PDF/DOCX for LLM parsing, or use the guided form builder
|
||||
5. **Inference** — configure LLM backends and API keys
|
||||
6. **Search** — job titles, locations, boards, keywords, blocklist
|
||||
7. **Integrations** — optional cloud storage, calendar, and notification services
|
||||
|
||||
Wizard state is saved after each step — a crash or browser close resumes where you left off.
|
||||
Re-enter the wizard any time via **Settings → Developer → Reset wizard**.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
| Feature | Tier |
|
||||
|---------|------|
|
||||
| Job discovery — LinkedIn, Indeed, Glassdoor, Adzuna, The Ladders | Free |
|
||||
| Ghost-post detection | Free |
|
||||
| Resume keyword matching and gap analysis | Free |
|
||||
| Document storage sync (Google Drive, Dropbox, OneDrive, Nextcloud) | Free |
|
||||
| Job discovery (JobSpy + custom boards) | Free |
|
||||
| Resume keyword matching & gap analysis | Free |
|
||||
| Document storage sync (Google Drive, Dropbox, OneDrive, MEGA, Nextcloud) | Free |
|
||||
| Webhook notifications (Discord, Home Assistant) | Free |
|
||||
| Vue 3 SPA — full UI with onboarding wizard, job board, apply workspace, interview kanban | Free |
|
||||
| **Cover letter generation** | Free with LLM¹ |
|
||||
| **Company research briefs** | Free with LLM¹ |
|
||||
| **Interview prep and practice Q&A** | Free with LLM ¹ |
|
||||
| **Interview prep & practice Q&A** | Free with LLM¹ |
|
||||
| **Survey assistant** (culture-fit Q&A, screenshot analysis) | Free with LLM¹ |
|
||||
| **Wizard helpers** (career summary, bullet expansion, skill suggestions, job title suggestions, mission notes) | Free with LLM¹ |
|
||||
| Managed cloud LLM (no API key needed) | Paid |
|
||||
| Email sync and auto-classification | Paid |
|
||||
| Email sync & auto-classification | Paid |
|
||||
| LLM-powered keyword blocklist | Paid |
|
||||
| Job tracking integrations (Notion, Airtable, Google Sheets) | Paid |
|
||||
| Calendar sync (Google, Apple) | Paid |
|
||||
| Slack notifications | Paid |
|
||||
| CircuitForge shared cover-letter model | Paid |
|
||||
| **Voice guidelines** (custom writing style and tone) | Premium with LLM ¹ |
|
||||
| Cover letter model fine-tuning — your writing, your model | Premium |
|
||||
| Vue 3 SPA beta UI | Paid |
|
||||
| **Voice guidelines** (custom writing style & tone) | Premium with LLM¹ ² |
|
||||
| Cover letter model fine-tuning (your writing, your model) | Premium |
|
||||
| Multi-user support | Premium |
|
||||
| Human-in-the-loop operator (CAPTCHAs, phone calls, wet signatures) | Ultra |
|
||||
|
||||
¹ **BYOK (bring your own key) unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance, or your own API key (Anthropic, OpenAI-compatible) — and all "Free with LLM" and "Premium with LLM" features unlock at no charge.
|
||||
¹ **BYOK (bring your own key/backend) unlock:** configure any LLM backend — a local [Ollama](https://ollama.com) or vLLM instance,
|
||||
or your own API key (Anthropic, OpenAI-compatible) — and all features marked **Free with LLM** or **Premium with LLM**
|
||||
unlock at no charge. The paid tier earns its price by providing managed cloud inference so you
|
||||
don't need a key at all, plus integrations and email sync.
|
||||
|
||||
² **Voice guidelines** requires Premium tier without a configured LLM backend. With BYOK, it unlocks at any tier.
|
||||
|
||||
---
|
||||
|
||||
## What Peregrine does not do
|
||||
## Email Sync
|
||||
|
||||
Peregrine does **not** submit job applications for you. You still click apply on the employer's site.
|
||||
Monitors your inbox for job-related emails and automatically updates job stages (interview requests, rejections, survey links, offers).
|
||||
|
||||
This is intentional. Automated mass-applying is a bad experience for everyone and a trust violation with employers who posted a real role. The submit button is yours. The rest of the grind is ours.
|
||||
Configure in **Settings → Email**. Requires IMAP access and, for Gmail, an App Password.
|
||||
|
||||
---
|
||||
|
||||
## Stack
|
||||
## Integrations
|
||||
|
||||
| Layer | Technology |
|
||||
|-------|-----------|
|
||||
| Frontend | Vue 3 SPA (Vite) |
|
||||
| Backend | FastAPI + Python |
|
||||
| Database | SQLite (local, per-user) |
|
||||
| Job scraping | [JobSpy](https://github.com/Bunsly/JobSpy) + custom board scrapers |
|
||||
| LLM inference | Ollama, vLLM, Anthropic, OpenAI-compatible — configurable fallback chain |
|
||||
| Vision | moondream2 (survey screenshot analysis) |
|
||||
| Container | Docker / Podman |
|
||||
Connect external services in **Settings → Integrations**:
|
||||
|
||||
- **Job tracking:** Notion, Airtable, Google Sheets
|
||||
- **Document storage:** Google Drive, Dropbox, OneDrive, MEGA, Nextcloud
|
||||
- **Calendar:** Google Calendar, Apple Calendar (CalDAV)
|
||||
- **Notifications:** Slack, Discord (webhook), Home Assistant
|
||||
|
||||
---
|
||||
|
||||
## manage.sh reference
|
||||
## CLI Reference (`manage.sh`)
|
||||
|
||||
`manage.sh` is the single entry point for all common operations — no need to remember Make targets or Docker commands.
|
||||
|
||||
```
|
||||
./manage.sh setup Install Docker/Podman + NVIDIA toolkit
|
||||
|
|
@ -146,38 +198,31 @@ This is intentional. Automated mass-applying is a bad experience for everyone an
|
|||
./manage.sh restart Restart all services
|
||||
./manage.sh status Show running containers
|
||||
./manage.sh logs [service] Tail logs (default: app)
|
||||
./manage.sh update Pull latest images and rebuild app container
|
||||
./manage.sh update Pull latest images + rebuild app container
|
||||
./manage.sh preflight Check ports + resources; write .env
|
||||
./manage.sh test Run test suite
|
||||
./manage.sh prepare-training Scan docs for cover letters — outputs training JSONL
|
||||
./manage.sh finetune Run LoRA fine-tune (requires single-gpu profile or higher)
|
||||
./manage.sh prepare-training Scan docs for cover letters → training JSONL
|
||||
./manage.sh finetune Run LoRA fine-tune (needs --profile single-gpu+)
|
||||
./manage.sh open Open the web UI in your browser
|
||||
./manage.sh clean Remove containers, images, volumes (asks to confirm)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
## Developer Docs
|
||||
|
||||
Full docs at **[docs.circuitforge.tech/peregrine](https://docs.circuitforge.tech/peregrine)**
|
||||
Full documentation at: https://docs.circuitforge.tech/peregrine
|
||||
|
||||
Bug reports and feature requests: [Forgejo issues](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/issues)
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome. The discovery pipeline — scrapers, board integrations, matching logic — is MIT-licensed. Fork it, extend it, send PRs. AI features are BSL 1.1. See the [contributing guide](https://docs.circuitforge.tech/peregrine/developer-guide/contributing/) for conventions.
|
||||
- [Installation guide](https://docs.circuitforge.tech/peregrine/getting-started/installation/)
|
||||
- [Adding a custom job board scraper](https://docs.circuitforge.tech/peregrine/developer-guide/adding-scrapers/)
|
||||
- [Adding an integration](https://docs.circuitforge.tech/peregrine/developer-guide/adding-integrations/)
|
||||
- [Contributing](https://docs.circuitforge.tech/peregrine/developer-guide/contributing/)
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
Peregrine uses a split license:
|
||||
|
||||
| Component | License |
|
||||
|-----------|---------|
|
||||
| Discovery pipeline — scrapers, matching, tracking | [MIT](LICENSE-MIT) |
|
||||
| LLM features — cover letter generation, company research, interview prep, survey assistant, fine-tuning | [BSL 1.1](LICENSE-BSL) — free for personal non-commercial self-hosting; commercial use or SaaS re-hosting requires a paid license; converts to MIT after four years |
|
||||
|
||||
Fine-tuned model weights are proprietary and per-user — not redistributable.
|
||||
Core discovery pipeline: [MIT](LICENSE-MIT)
|
||||
LLM features (cover letter generation, company research, interview prep, UI): [BSL 1.1](LICENSE-BSL)
|
||||
|
||||
© 2026 Circuit Forge LLC
|
||||
|
|
|
|||
297
app/Home.py
|
|
@ -14,22 +14,24 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
|
||||
from scripts.user_profile import UserProfile
|
||||
|
||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
_name = _profile.name if _profile else "Job Seeker"
|
||||
|
||||
from scripts.db import init_db, get_job_counts, purge_jobs, purge_email_data, \
|
||||
purge_non_remote, archive_jobs, kill_stuck_tasks, cancel_task, \
|
||||
get_task_for_job, get_active_tasks, insert_job, get_existing_urls
|
||||
purge_non_remote, archive_jobs, kill_stuck_tasks, get_task_for_job, get_active_tasks, \
|
||||
insert_job, get_existing_urls
|
||||
from scripts.task_runner import submit_task
|
||||
from app.cloud_session import resolve_session, get_db_path, get_config_dir
|
||||
from app.cloud_session import resolve_session, get_db_path
|
||||
|
||||
_CONFIG_DIR = Path(__file__).parent.parent / "config"
|
||||
_NOTION_CONNECTED = (_CONFIG_DIR / "integrations" / "notion.yaml").exists()
|
||||
|
||||
resolve_session("peregrine")
|
||||
init_db(get_db_path())
|
||||
|
||||
_CONFIG_DIR = get_config_dir()
|
||||
_USER_YAML = _CONFIG_DIR / "user.yaml"
|
||||
_profile = UserProfile(_USER_YAML) if UserProfile.exists(_USER_YAML) else None
|
||||
_name = _profile.name if _profile else "Job Seeker"
|
||||
|
||||
def _email_configured() -> bool:
|
||||
_e = get_config_dir() / "email.yaml"
|
||||
_e = Path(__file__).parent.parent / "config" / "email.yaml"
|
||||
if not _e.exists():
|
||||
return False
|
||||
import yaml as _yaml
|
||||
|
|
@ -37,7 +39,7 @@ def _email_configured() -> bool:
|
|||
return bool(_cfg.get("username") or _cfg.get("user") or _cfg.get("imap_host"))
|
||||
|
||||
def _notion_configured() -> bool:
|
||||
_n = get_config_dir() / "notion.yaml"
|
||||
_n = Path(__file__).parent.parent / "config" / "notion.yaml"
|
||||
if not _n.exists():
|
||||
return False
|
||||
import yaml as _yaml
|
||||
|
|
@ -45,7 +47,7 @@ def _notion_configured() -> bool:
|
|||
return bool(_cfg.get("token"))
|
||||
|
||||
def _keywords_configured() -> bool:
|
||||
_k = get_config_dir() / "resume_keywords.yaml"
|
||||
_k = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
|
||||
if not _k.exists():
|
||||
return False
|
||||
import yaml as _yaml
|
||||
|
|
@ -132,7 +134,7 @@ def _queue_url_imports(db_path: Path, urls: list) -> int:
|
|||
|
||||
|
||||
st.title(f"🔍 {_name}'s Job Search")
|
||||
st.caption("Discover → Review → Sync" + (" to Notion" if _notion_configured() else ""))
|
||||
st.caption("Discover → Review → Sync to Notion")
|
||||
|
||||
st.divider()
|
||||
|
||||
|
|
@ -144,7 +146,7 @@ def _live_counts():
|
|||
col1.metric("Pending Review", counts.get("pending", 0))
|
||||
col2.metric("Approved", counts.get("approved", 0))
|
||||
col3.metric("Applied", counts.get("applied", 0))
|
||||
col4.metric("Synced" + (" to Notion" if _notion_configured() else ""), counts.get("synced", 0))
|
||||
col4.metric("Synced to Notion", counts.get("synced", 0))
|
||||
col5.metric("Rejected", counts.get("rejected", 0))
|
||||
|
||||
|
||||
|
|
@ -235,7 +237,7 @@ with mid:
|
|||
|
||||
with right:
|
||||
approved_count = get_job_counts(get_db_path()).get("approved", 0)
|
||||
if _notion_configured():
|
||||
if _NOTION_CONNECTED:
|
||||
st.subheader("Send to Notion")
|
||||
st.caption("Push all approved jobs to your Notion tracking database.")
|
||||
if approved_count == 0:
|
||||
|
|
@ -374,144 +376,177 @@ _scrape_status()
|
|||
|
||||
st.divider()
|
||||
|
||||
# ── Danger zone ───────────────────────────────────────────────────────────────
|
||||
# ── Danger zone: purge + re-scrape ────────────────────────────────────────────
|
||||
with st.expander("⚠️ Danger Zone", expanded=False):
|
||||
|
||||
# ── Queue reset (the common case) ─────────────────────────────────────────
|
||||
st.markdown("**Queue reset**")
|
||||
st.caption(
|
||||
"Archive clears your review queue while keeping job URLs for dedup, "
|
||||
"so the same listings won't resurface on the next discovery run. "
|
||||
"Use hard purge only if you want a full clean slate including dedup history."
|
||||
"**Purge** permanently deletes jobs from the local database. "
|
||||
"Applied and synced jobs are never touched."
|
||||
)
|
||||
|
||||
_scope = st.radio(
|
||||
"Clear scope",
|
||||
["Pending only", "Pending + approved (stale search)"],
|
||||
horizontal=True,
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
_scope_statuses = (
|
||||
["pending"] if _scope == "Pending only" else ["pending", "approved"]
|
||||
)
|
||||
purge_col, rescrape_col, email_col, tasks_col = st.columns(4)
|
||||
|
||||
_qc1, _qc2, _qc3 = st.columns([2, 2, 4])
|
||||
if _qc1.button("📦 Archive & reset", use_container_width=True, type="primary"):
|
||||
st.session_state["confirm_dz"] = "archive"
|
||||
if _qc2.button("🗑 Hard purge (delete)", use_container_width=True):
|
||||
st.session_state["confirm_dz"] = "purge"
|
||||
with purge_col:
|
||||
st.markdown("**Purge pending & rejected**")
|
||||
st.caption("Removes all _pending_ and _rejected_ listings so the next discovery starts fresh.")
|
||||
if st.button("🗑 Purge Pending + Rejected", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "partial"
|
||||
|
||||
if st.session_state.get("confirm_dz") == "archive":
|
||||
st.info(
|
||||
f"Archive **{', '.join(_scope_statuses)}** jobs? "
|
||||
"URLs are kept for dedup — nothing is permanently deleted."
|
||||
)
|
||||
_dc1, _dc2 = st.columns(2)
|
||||
if _dc1.button("Yes, archive", type="primary", use_container_width=True, key="dz_archive_confirm"):
|
||||
n = archive_jobs(get_db_path(), statuses=_scope_statuses)
|
||||
st.success(f"Archived {n} jobs.")
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
if st.session_state.get("confirm_purge") == "partial":
|
||||
st.warning("Are you sure? This cannot be undone.")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, purge", type="primary", use_container_width=True):
|
||||
deleted = purge_jobs(get_db_path(), statuses=["pending", "rejected"])
|
||||
st.success(f"Purged {deleted} jobs.")
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if _dc2.button("Cancel", use_container_width=True, key="dz_archive_cancel"):
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
if c2.button("Cancel", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
if st.session_state.get("confirm_dz") == "purge":
|
||||
st.warning(
|
||||
f"Permanently delete **{', '.join(_scope_statuses)}** jobs? "
|
||||
"This removes the URLs from dedup history too. Cannot be undone."
|
||||
)
|
||||
_dc1, _dc2 = st.columns(2)
|
||||
if _dc1.button("Yes, delete", type="primary", use_container_width=True, key="dz_purge_confirm"):
|
||||
n = purge_jobs(get_db_path(), statuses=_scope_statuses)
|
||||
st.success(f"Deleted {n} jobs.")
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
with email_col:
|
||||
st.markdown("**Purge email data**")
|
||||
st.caption("Clears all email thread logs and email-sourced pending jobs so the next sync starts fresh.")
|
||||
if st.button("📧 Purge Email Data", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "email"
|
||||
|
||||
if st.session_state.get("confirm_purge") == "email":
|
||||
st.warning("This deletes all email contacts and email-sourced jobs. Cannot be undone.")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, purge emails", type="primary", use_container_width=True):
|
||||
contacts, jobs = purge_email_data(get_db_path())
|
||||
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if _dc2.button("Cancel", use_container_width=True, key="dz_purge_cancel"):
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
if c2.button("Cancel ", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
st.divider()
|
||||
|
||||
# ── Background tasks ──────────────────────────────────────────────────────
|
||||
with tasks_col:
|
||||
_active = get_active_tasks(get_db_path())
|
||||
st.markdown(f"**Background tasks** — {len(_active)} active")
|
||||
|
||||
if _active:
|
||||
_task_icons = {"cover_letter": "✉️", "research": "🔍", "discovery": "🌐", "enrich_descriptions": "📝"}
|
||||
for _t in _active:
|
||||
_tc1, _tc2, _tc3 = st.columns([3, 4, 2])
|
||||
_icon = _task_icons.get(_t["task_type"], "⚙️")
|
||||
_tc1.caption(f"{_icon} `{_t['task_type']}`")
|
||||
_job_label = f"{_t['title']} @ {_t['company']}" if _t.get("title") else f"job #{_t['job_id']}"
|
||||
_tc2.caption(_job_label)
|
||||
_tc3.caption(f"_{_t['status']}_")
|
||||
if st.button("✕ Cancel", key=f"dz_cancel_task_{_t['id']}", use_container_width=True):
|
||||
cancel_task(get_db_path(), _t["id"])
|
||||
st.rerun()
|
||||
st.caption("")
|
||||
|
||||
_kill_col, _ = st.columns([2, 6])
|
||||
if _kill_col.button("⏹ Kill all stuck", use_container_width=True, disabled=len(_active) == 0):
|
||||
st.markdown("**Kill stuck tasks**")
|
||||
st.caption(f"Force-fail all queued/running background tasks. Currently **{len(_active)}** active.")
|
||||
if st.button("⏹ Kill All Tasks", use_container_width=True, disabled=len(_active) == 0):
|
||||
killed = kill_stuck_tasks(get_db_path())
|
||||
st.success(f"Killed {killed} task(s).")
|
||||
st.rerun()
|
||||
|
||||
st.divider()
|
||||
with rescrape_col:
|
||||
st.markdown("**Purge all & re-scrape**")
|
||||
st.caption("Wipes _all_ non-applied, non-synced jobs then immediately runs a fresh discovery.")
|
||||
if st.button("🔄 Purge All + Re-scrape", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "full"
|
||||
|
||||
# ── Rarely needed (collapsed) ─────────────────────────────────────────────
|
||||
with st.expander("More options", expanded=False):
|
||||
_rare1, _rare2, _rare3 = st.columns(3)
|
||||
|
||||
with _rare1:
|
||||
st.markdown("**Purge email data**")
|
||||
st.caption("Clears all email thread logs and email-sourced pending jobs.")
|
||||
if st.button("📧 Purge Email Data", use_container_width=True):
|
||||
st.session_state["confirm_dz"] = "email"
|
||||
if st.session_state.get("confirm_dz") == "email":
|
||||
st.warning("Deletes all email contacts and email-sourced jobs. Cannot be undone.")
|
||||
_ec1, _ec2 = st.columns(2)
|
||||
if _ec1.button("Yes, purge emails", type="primary", use_container_width=True, key="dz_email_confirm"):
|
||||
contacts, jobs = purge_email_data(get_db_path())
|
||||
st.success(f"Purged {contacts} email contacts, {jobs} email jobs.")
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
st.rerun()
|
||||
if _ec2.button("Cancel", use_container_width=True, key="dz_email_cancel"):
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
st.rerun()
|
||||
|
||||
with _rare2:
|
||||
st.markdown("**Purge non-remote**")
|
||||
st.caption("Removes pending/approved/rejected on-site listings from the DB.")
|
||||
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
|
||||
st.session_state["confirm_dz"] = "non_remote"
|
||||
if st.session_state.get("confirm_dz") == "non_remote":
|
||||
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
|
||||
_rc1, _rc2 = st.columns(2)
|
||||
if _rc1.button("Yes, purge on-site", type="primary", use_container_width=True, key="dz_nonremote_confirm"):
|
||||
deleted = purge_non_remote(get_db_path())
|
||||
st.success(f"Purged {deleted} non-remote jobs.")
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
st.rerun()
|
||||
if _rc2.button("Cancel", use_container_width=True, key="dz_nonremote_cancel"):
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
st.rerun()
|
||||
|
||||
with _rare3:
|
||||
st.markdown("**Wipe all + re-scrape**")
|
||||
st.caption("Deletes all non-applied jobs then immediately runs a fresh discovery.")
|
||||
if st.button("🔄 Wipe + Re-scrape", use_container_width=True):
|
||||
st.session_state["confirm_dz"] = "rescrape"
|
||||
if st.session_state.get("confirm_dz") == "rescrape":
|
||||
st.warning("Wipes ALL pending, approved, and rejected jobs, then re-scrapes. Applied and synced records are kept.")
|
||||
_wc1, _wc2 = st.columns(2)
|
||||
if _wc1.button("Yes, wipe + scrape", type="primary", use_container_width=True, key="dz_rescrape_confirm"):
|
||||
if st.session_state.get("confirm_purge") == "full":
|
||||
st.warning("This will delete ALL pending, approved, and rejected jobs, then re-scrape. Applied and synced records are kept.")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, wipe + scrape", type="primary", use_container_width=True):
|
||||
purge_jobs(get_db_path(), statuses=["pending", "approved", "rejected"])
|
||||
submit_task(get_db_path(), "discovery", 0)
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if _wc2.button("Cancel", use_container_width=True, key="dz_rescrape_cancel"):
|
||||
st.session_state.pop("confirm_dz", None)
|
||||
if c2.button("Cancel ", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
st.divider()
|
||||
|
||||
pending_col, nonremote_col, approved_col, _ = st.columns(4)
|
||||
|
||||
with pending_col:
|
||||
st.markdown("**Purge pending review**")
|
||||
st.caption("Removes only _pending_ listings, keeping your rejected history intact.")
|
||||
if st.button("🗑 Purge Pending Only", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "pending_only"
|
||||
|
||||
if st.session_state.get("confirm_purge") == "pending_only":
|
||||
st.warning("Deletes all pending jobs. Rejected jobs are kept. Cannot be undone.")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, purge pending", type="primary", use_container_width=True):
|
||||
deleted = purge_jobs(get_db_path(), statuses=["pending"])
|
||||
st.success(f"Purged {deleted} pending jobs.")
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if c2.button("Cancel ", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
with nonremote_col:
|
||||
st.markdown("**Purge non-remote**")
|
||||
st.caption("Removes pending/approved/rejected jobs where remote is not set. Keeps anything already in the pipeline.")
|
||||
if st.button("🏢 Purge On-site Jobs", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "non_remote"
|
||||
|
||||
if st.session_state.get("confirm_purge") == "non_remote":
|
||||
st.warning("Deletes all non-remote jobs not yet applied to. Cannot be undone.")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, purge on-site", type="primary", use_container_width=True):
|
||||
deleted = purge_non_remote(get_db_path())
|
||||
st.success(f"Purged {deleted} non-remote jobs.")
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if c2.button("Cancel ", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
with approved_col:
|
||||
st.markdown("**Purge approved (unapplied)**")
|
||||
st.caption("Removes _approved_ jobs you haven't applied to yet — e.g. to reset after a review pass.")
|
||||
if st.button("🗑 Purge Approved", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "approved_only"
|
||||
|
||||
if st.session_state.get("confirm_purge") == "approved_only":
|
||||
st.warning("Deletes all approved-but-not-applied jobs. Cannot be undone.")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, purge approved", type="primary", use_container_width=True):
|
||||
deleted = purge_jobs(get_db_path(), statuses=["approved"])
|
||||
st.success(f"Purged {deleted} approved jobs.")
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if c2.button("Cancel ", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
st.divider()
|
||||
|
||||
archive_col1, archive_col2, _, _ = st.columns(4)
|
||||
|
||||
with archive_col1:
|
||||
st.markdown("**Archive remaining**")
|
||||
st.caption(
|
||||
"Move all _pending_ and _rejected_ jobs to archived status. "
|
||||
"Archived jobs stay in the DB for dedup — they just won't appear in Job Review."
|
||||
)
|
||||
if st.button("📦 Archive Pending + Rejected", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "archive_remaining"
|
||||
|
||||
if st.session_state.get("confirm_purge") == "archive_remaining":
|
||||
st.info("Jobs will be archived (not deleted) — URLs are kept for dedup.")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, archive", type="primary", use_container_width=True):
|
||||
archived = archive_jobs(get_db_path(), statuses=["pending", "rejected"])
|
||||
st.success(f"Archived {archived} jobs.")
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if c2.button("Cancel ", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
with archive_col2:
|
||||
st.markdown("**Archive approved (unapplied)**")
|
||||
st.caption("Archive _approved_ listings you decided to skip — keeps history without cluttering the apply queue.")
|
||||
if st.button("📦 Archive Approved", use_container_width=True):
|
||||
st.session_state["confirm_purge"] = "archive_approved"
|
||||
|
||||
if st.session_state.get("confirm_purge") == "archive_approved":
|
||||
st.info("Approved jobs will be archived (not deleted).")
|
||||
c1, c2 = st.columns(2)
|
||||
if c1.button("Yes, archive approved", type="primary", use_container_width=True):
|
||||
archived = archive_jobs(get_db_path(), statuses=["approved"])
|
||||
st.success(f"Archived {archived} approved jobs.")
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
if c2.button("Cancel ", use_container_width=True):
|
||||
st.session_state.pop("confirm_purge", None)
|
||||
st.rerun()
|
||||
|
||||
# ── Setup banners ─────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -17,16 +17,10 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
|
||||
logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s")
|
||||
|
||||
# Load .env before any os.environ reads — safe to call inside Docker too
|
||||
# (uses setdefault, so Docker-injected vars take precedence over .env values)
|
||||
from circuitforge_core.config.settings import load_env as _load_env
|
||||
_load_env(Path(__file__).parent.parent / ".env")
|
||||
|
||||
IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
||||
|
||||
import streamlit as st
|
||||
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
|
||||
from scripts.db_migrate import migrate_db
|
||||
from app.feedback import inject_feedback_button
|
||||
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
|
||||
import sqlite3
|
||||
|
|
@ -42,7 +36,6 @@ st.set_page_config(
|
|||
|
||||
resolve_session("peregrine")
|
||||
init_db(get_db_path())
|
||||
migrate_db(Path(get_db_path()))
|
||||
|
||||
# Demo tier — initialize once per session (cookie persistence handled client-side)
|
||||
if IS_DEMO and "simulated_tier" not in st.session_state:
|
||||
|
|
|
|||
|
|
@ -203,16 +203,8 @@ def get_config_dir() -> Path:
|
|||
isolated and never shared across tenants.
|
||||
Local: repo-level config/ directory.
|
||||
"""
|
||||
if CLOUD_MODE:
|
||||
db_path = st.session_state.get("db_path")
|
||||
if db_path:
|
||||
return Path(db_path).parent / "config"
|
||||
# Session not resolved yet (resolve_session() should have called st.stop() already).
|
||||
# Return an isolated empty temp dir rather than the repo config, which may contain
|
||||
# another user's data baked into the image.
|
||||
_safe = Path("/tmp/peregrine-cloud-noconfig")
|
||||
_safe.mkdir(exist_ok=True)
|
||||
return _safe
|
||||
if CLOUD_MODE and st.session_state.get("db_path"):
|
||||
return Path(st.session_state["db_path"]).parent / "config"
|
||||
return Path(__file__).parent.parent / "config"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -124,6 +124,12 @@ def sync_ui_cookie(yaml_path: Path, tier: str) -> None:
|
|||
# UI components must not crash the app — silent fallback to default
|
||||
pref = "streamlit"
|
||||
|
||||
# Demo mode: Vue SPA has no demo data wiring — always serve Streamlit.
|
||||
# (The tier downgrade check below is skipped in demo mode, but we must
|
||||
# also block the Vue navigation itself so Caddy doesn't route to a blank SPA.)
|
||||
if pref == "vue" and _DEMO_MODE:
|
||||
pref = "streamlit"
|
||||
|
||||
# Tier downgrade protection (skip in demo — demo bypasses tier gate)
|
||||
if pref == "vue" and not _DEMO_MODE and not can_use(tier, "vue_ui_beta"):
|
||||
if profile is not None:
|
||||
|
|
|
|||
|
|
@ -457,11 +457,6 @@ elif step == 5:
|
|||
from app.wizard.step_inference import validate
|
||||
|
||||
st.subheader("Step 5 \u2014 Inference & API Keys")
|
||||
st.info(
|
||||
"**Simplest setup:** set `OLLAMA_HOST` in your `.env` file — "
|
||||
"Peregrine auto-detects it, no config file needed. "
|
||||
"Or use the fields below to configure API keys and endpoints."
|
||||
)
|
||||
profile = saved_yaml.get("inference_profile", "remote")
|
||||
|
||||
if profile == "remote":
|
||||
|
|
@ -471,18 +466,8 @@ elif step == 5:
|
|||
placeholder="https://api.together.xyz/v1")
|
||||
openai_key = st.text_input("Endpoint API Key (optional)", type="password",
|
||||
key="oai_key") if openai_url else ""
|
||||
ollama_host = st.text_input("Ollama host (optional \u2014 local fallback)",
|
||||
placeholder="http://localhost:11434",
|
||||
key="ollama_host_input")
|
||||
ollama_model = st.text_input("Ollama model (optional)",
|
||||
value="llama3.2:3b",
|
||||
key="ollama_model_input")
|
||||
else:
|
||||
st.info(f"Local mode ({profile}): Ollama provides inference.")
|
||||
import os
|
||||
_ollama_host_env = os.environ.get("OLLAMA_HOST", "")
|
||||
if _ollama_host_env:
|
||||
st.caption(f"OLLAMA_HOST from .env: `{_ollama_host_env}`")
|
||||
anthropic_key = openai_url = openai_key = ""
|
||||
|
||||
with st.expander("Advanced \u2014 Service Ports & Hosts"):
|
||||
|
|
@ -561,14 +546,6 @@ elif step == 5:
|
|||
if anthropic_key or openai_url:
|
||||
env_path.write_text("\n".join(env_lines) + "\n")
|
||||
|
||||
if profile == "remote":
|
||||
if ollama_host:
|
||||
env_lines = _set_env(env_lines, "OLLAMA_HOST", ollama_host)
|
||||
if ollama_model:
|
||||
env_lines = _set_env(env_lines, "OLLAMA_MODEL", ollama_model)
|
||||
if ollama_host or ollama_model:
|
||||
env_path.write_text("\n".join(env_lines) + "\n")
|
||||
|
||||
_save_yaml({"services": svc, "wizard_step": 5})
|
||||
st.session_state.wizard_step = 6
|
||||
st.rerun()
|
||||
|
|
|
|||
|
Before Width: | Height: | Size: 298 KiB |
|
Before Width: | Height: | Size: 276 KiB |
|
|
@ -49,7 +49,6 @@ FEATURES: dict[str, str] = {
|
|||
"company_research": "paid",
|
||||
"interview_prep": "paid",
|
||||
"survey_assistant": "paid",
|
||||
"llm_reply_draft": "paid",
|
||||
|
||||
# Orchestration / infrastructure — stays gated
|
||||
"email_classifier": "paid",
|
||||
|
|
@ -82,7 +81,6 @@ BYOK_UNLOCKABLE: frozenset[str] = frozenset({
|
|||
"company_research",
|
||||
"interview_prep",
|
||||
"survey_assistant",
|
||||
"llm_reply_draft",
|
||||
})
|
||||
|
||||
# Demo mode flag — read from environment at module load time.
|
||||
|
|
|
|||
|
|
@ -6,40 +6,41 @@
|
|||
# Caddy injects the Directus session cookie as X-CF-Session header before forwarding.
|
||||
# cloud_session.py resolves user_id → per-user db_path at session init.
|
||||
#
|
||||
# Services: api (FastAPI :8601), web (Vue :8508), searxng (internal)
|
||||
# Streamlit app service removed — Vue+FastAPI is the only frontend (peregrine#104).
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f compose.cloud.yml --project-name peregrine-cloud up -d
|
||||
# docker compose -f compose.cloud.yml --project-name peregrine-cloud down
|
||||
# docker compose -f compose.cloud.yml --project-name peregrine-cloud logs api -f
|
||||
# docker compose -f compose.cloud.yml --project-name peregrine-cloud logs app -f
|
||||
|
||||
services:
|
||||
api:
|
||||
app:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: peregrine/Dockerfile.cfcore
|
||||
command: >
|
||||
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
|
||||
container_name: peregrine-cloud
|
||||
ports:
|
||||
- "8601:8601" # LAN-accessible — Caddy gates the public route; Kuma monitors this port directly
|
||||
- "8505:8501"
|
||||
volumes:
|
||||
- /devl/menagerie-data:/devl/menagerie-data
|
||||
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro
|
||||
- /devl/menagerie-data:/devl/menagerie-data # per-user data trees
|
||||
- ./config/llm.cloud.yaml:/app/config/llm.yaml:ro # cloud-safe backends only (no claude_code/copilot/anthropic)
|
||||
environment:
|
||||
- CLOUD_MODE=true
|
||||
- CLOUD_DATA_ROOT=/devl/menagerie-data
|
||||
- STAGING_DB=/devl/menagerie-data/cloud-default.db
|
||||
- DIRECTUS_JWT_SECRET=${DIRECTUS_JWT_SECRET}
|
||||
- CF_SERVER_SECRET=${CF_SERVER_SECRET}
|
||||
- PLATFORM_DB_URL=${PLATFORM_DB_URL}
|
||||
- HEIMDALL_URL=${HEIMDALL_URL:-http://cf-license:8000}
|
||||
- HEIMDALL_ADMIN_TOKEN=${HEIMDALL_ADMIN_TOKEN}
|
||||
- STAGING_DB=/devl/menagerie-data/cloud-default.db # fallback only — never used
|
||||
- DOCS_DIR=/tmp/cloud-docs
|
||||
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
|
||||
- PYTHONUNBUFFERED=1
|
||||
- PEREGRINE_CADDY_PROXY=1
|
||||
- CF_ORCH_URL=http://host.docker.internal:7700
|
||||
- DEMO_MODE=false
|
||||
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
|
||||
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
|
||||
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
|
||||
- CF_APP_NAME=peregrine
|
||||
depends_on:
|
||||
searxng:
|
||||
condition: service_healthy
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: unless-stopped
|
||||
|
|
@ -52,13 +53,8 @@ services:
|
|||
VITE_BASE_PATH: /peregrine/
|
||||
ports:
|
||||
- "8508:80"
|
||||
depends_on:
|
||||
- api
|
||||
restart: unless-stopped
|
||||
|
||||
# cf-orch-agent: not needed in cloud — a host-native agent already runs on :7701
|
||||
# and is registered with the coordinator. app/api reach it via CF_ORCH_URL.
|
||||
|
||||
searxng:
|
||||
image: searxng/searxng:latest
|
||||
volumes:
|
||||
|
|
|
|||
|
|
@ -15,21 +15,19 @@
|
|||
|
||||
services:
|
||||
|
||||
api:
|
||||
app:
|
||||
build: .
|
||||
command: >
|
||||
bash -c "uvicorn dev_api:app --host 0.0.0.0 --port 8601"
|
||||
ports:
|
||||
- "8504:8501"
|
||||
volumes:
|
||||
- ./demo/config:/app/config
|
||||
- ./demo:/app/demo:ro # seed.sql lives here; read-only
|
||||
# /app/data is tmpfs — ephemeral, resets on every container start
|
||||
tmpfs:
|
||||
- /app/data
|
||||
- ./demo/data:/app/data
|
||||
# No /docs mount — demo has no personal documents
|
||||
environment:
|
||||
- DEMO_MODE=true
|
||||
- STAGING_DB=/app/data/staging.db
|
||||
- DEMO_SEED_FILE=/app/demo/seed.sql
|
||||
- DOCS_DIR=/tmp/demo-docs
|
||||
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
|
||||
- PYTHONUNBUFFERED=1
|
||||
- PYTHONLOGGING=WARNING
|
||||
# No API keys — inference is blocked by DEMO_MODE before any key is needed
|
||||
|
|
@ -39,7 +37,6 @@ services:
|
|||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: unless-stopped
|
||||
# No host port — nginx proxies /api/ → api:8601 internally
|
||||
|
||||
web:
|
||||
build:
|
||||
|
|
@ -48,9 +45,7 @@ services:
|
|||
args:
|
||||
VITE_BASE_PATH: /peregrine/
|
||||
ports:
|
||||
- "8504:80" # demo.circuitforge.tech/peregrine* → host:8504
|
||||
depends_on:
|
||||
- api
|
||||
- "8507:80"
|
||||
restart: unless-stopped
|
||||
|
||||
searxng:
|
||||
|
|
|
|||
|
|
@ -29,8 +29,7 @@ services:
|
|||
- STAGING_DB=/devl/job-seeker/staging.db
|
||||
- PYTHONUNBUFFERED=1
|
||||
- STREAMLIT_SERVER_BASE_URL_PATH=
|
||||
- GPU_SERVER_URL=${GPU_SERVER_URL:-http://host.docker.internal:7700}
|
||||
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
|
||||
- CF_ORCH_URL=http://host.docker.internal:7700
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: "no"
|
||||
|
|
|
|||
71
compose.yml
|
|
@ -1,7 +1,48 @@
|
|||
# compose.yml — Peregrine by Circuit Forge LLC
|
||||
# Streamlit (app service) removed — Vue+FastAPI is the only frontend (#104)
|
||||
# Profiles: remote | cpu | single-gpu | dual-gpu-ollama
|
||||
services:
|
||||
|
||||
app:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: peregrine/Dockerfile.cfcore
|
||||
command: >
|
||||
bash -c "streamlit run app/app.py
|
||||
--server.port=8501
|
||||
--server.headless=true
|
||||
--server.fileWatcherType=none
|
||||
2>&1 | tee /app/data/.streamlit.log"
|
||||
ports:
|
||||
- "${STREAMLIT_PORT:-8501}:8501"
|
||||
volumes:
|
||||
- ./config:/app/config
|
||||
- ./data:/app/data
|
||||
- ${DOCS_DIR:-~/Documents/JobSearch}:/docs
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /usr/bin/docker:/usr/bin/docker:ro
|
||||
environment:
|
||||
- STAGING_DB=/app/data/staging.db
|
||||
- DOCS_DIR=/docs
|
||||
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
|
||||
- OPENAI_COMPAT_URL=${OPENAI_COMPAT_URL:-}
|
||||
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
||||
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
|
||||
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
|
||||
- RECOMMENDED_PROFILE=${RECOMMENDED_PROFILE:-remote}
|
||||
- STREAMLIT_SERVER_BASE_URL_PATH=${STREAMLIT_BASE_URL_PATH:-}
|
||||
- FORGEJO_API_TOKEN=${FORGEJO_API_TOKEN:-}
|
||||
- FORGEJO_REPO=${FORGEJO_REPO:-}
|
||||
- FORGEJO_API_URL=${FORGEJO_API_URL:-}
|
||||
- PYTHONUNBUFFERED=1
|
||||
- PYTHONLOGGING=WARNING
|
||||
- PEREGRINE_CADDY_PROXY=1
|
||||
depends_on:
|
||||
searxng:
|
||||
condition: service_healthy
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: unless-stopped
|
||||
|
||||
api:
|
||||
build:
|
||||
context: ..
|
||||
|
|
@ -20,9 +61,6 @@ services:
|
|||
- OPENAI_COMPAT_KEY=${OPENAI_COMPAT_KEY:-}
|
||||
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
|
||||
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
|
||||
- GPU_SERVER_URL=${GPU_SERVER_URL:-${CF_ORCH_URL:-http://host.docker.internal:7700}}
|
||||
- CF_ORCH_URL=${CF_ORCH_URL:-${GPU_SERVER_URL:-http://host.docker.internal:7700}}
|
||||
- CF_APP_NAME=peregrine
|
||||
- PYTHONUNBUFFERED=1
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
|
@ -91,31 +129,6 @@ services:
|
|||
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
|
||||
restart: unless-stopped
|
||||
|
||||
cf-orch-agent:
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: peregrine/Dockerfile.cfcore
|
||||
command: ["/bin/sh", "/app/docker/cf-orch-agent/start.sh"]
|
||||
ports:
|
||||
- "${CF_ORCH_AGENT_PORT:-7701}:7701"
|
||||
environment:
|
||||
- CF_ORCH_COORDINATOR_URL=${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700}
|
||||
- CF_ORCH_NODE_ID=${CF_ORCH_NODE_ID:-peregrine}
|
||||
- CF_ORCH_AGENT_PORT=${CF_ORCH_AGENT_PORT:-7701}
|
||||
- CF_ORCH_ADVERTISE_HOST=${CF_ORCH_ADVERTISE_HOST:-}
|
||||
- PYTHONUNBUFFERED=1
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
profiles: [single-gpu, dual-gpu-ollama, dual-gpu-vllm, dual-gpu-mixed]
|
||||
restart: unless-stopped
|
||||
|
||||
finetune:
|
||||
build:
|
||||
context: .
|
||||
|
|
|
|||
|
|
@ -1,23 +0,0 @@
|
|||
# config/label_tool.yaml — Multi-account IMAP config for the email label tool
|
||||
# Copy to config/label_tool.yaml and fill in your credentials.
|
||||
# This file is gitignored.
|
||||
|
||||
accounts:
|
||||
- name: "Gmail"
|
||||
host: "imap.gmail.com"
|
||||
port: 993
|
||||
username: "you@gmail.com"
|
||||
password: "your-app-password" # Use an App Password, not your login password
|
||||
folder: "INBOX"
|
||||
days_back: 90
|
||||
|
||||
- name: "Outlook"
|
||||
host: "outlook.office365.com"
|
||||
port: 993
|
||||
username: "you@outlook.com"
|
||||
password: "your-app-password"
|
||||
folder: "INBOX"
|
||||
days_back: 90
|
||||
|
||||
# Optional: limit emails fetched per account per run (0 = unlimited)
|
||||
max_per_account: 500
|
||||
|
|
@ -45,11 +45,6 @@ backends:
|
|||
model: __auto__
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
cf_orch:
|
||||
service: vllm
|
||||
model_candidates:
|
||||
- Qwen2.5-3B-Instruct
|
||||
ttl_s: 300
|
||||
vllm_research:
|
||||
api_key: ''
|
||||
base_url: http://host.docker.internal:8000/v1
|
||||
|
|
@ -57,11 +52,6 @@ backends:
|
|||
model: __auto__
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
cf_orch:
|
||||
service: vllm
|
||||
model_candidates:
|
||||
- Qwen2.5-3B-Instruct
|
||||
ttl_s: 300
|
||||
fallback_order:
|
||||
- vllm
|
||||
- ollama
|
||||
|
|
|
|||
|
|
@ -1,11 +1,4 @@
|
|||
backends:
|
||||
cf_text:
|
||||
api_key: any
|
||||
base_url: http://host.docker.internal:8006/v1
|
||||
enabled: true
|
||||
model: cf-text
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
anthropic:
|
||||
api_key_env: ANTHROPIC_API_KEY
|
||||
enabled: false
|
||||
|
|
@ -41,7 +34,7 @@ backends:
|
|||
supports_images: false
|
||||
type: openai_compat
|
||||
vision_service:
|
||||
base_url: http://vision:8002
|
||||
base_url: http://host.docker.internal:8002
|
||||
enabled: true
|
||||
supports_images: true
|
||||
type: vision_service
|
||||
|
|
@ -65,7 +58,6 @@ backends:
|
|||
supports_images: false
|
||||
type: openai_compat
|
||||
fallback_order:
|
||||
- cf_text
|
||||
- ollama
|
||||
- claude_code
|
||||
- vllm
|
||||
|
|
@ -75,7 +67,6 @@ research_fallback_order:
|
|||
- claude_code
|
||||
- vllm_research
|
||||
- ollama_research
|
||||
- cf_text
|
||||
- github_copilot
|
||||
- anthropic
|
||||
vision_fallback_order:
|
||||
|
|
|
|||
|
|
@ -45,89 +45,6 @@ backends:
|
|||
enabled: false
|
||||
type: vision_service
|
||||
supports_images: true
|
||||
|
||||
# ── cf-orch task-routed backends (preferred for GPU inference) ────────────
|
||||
# Use these when GPU_SERVER_URL is configured. The coordinator resolves
|
||||
# product+task → model_id → node via assignments.yaml; no model IDs needed here.
|
||||
# Set enabled: true once GPU_SERVER_URL is configured.
|
||||
cf_cover_letter:
|
||||
type: openai_compat
|
||||
enabled: false
|
||||
base_url: http://localhost:8008/v1 # fallback when cf-orch is unavailable
|
||||
model: __auto__
|
||||
api_key: any
|
||||
supports_images: false
|
||||
cf_orch:
|
||||
product: peregrine
|
||||
task: cover_letter
|
||||
ttl_s: 3600
|
||||
|
||||
cf_ats_rewrite:
|
||||
type: openai_compat
|
||||
enabled: false
|
||||
base_url: http://localhost:8008/v1
|
||||
model: __auto__
|
||||
api_key: any
|
||||
supports_images: false
|
||||
cf_orch:
|
||||
product: peregrine
|
||||
task: ats_rewrite
|
||||
ttl_s: 3600
|
||||
|
||||
cf_job_research:
|
||||
type: openai_compat
|
||||
enabled: false
|
||||
base_url: http://localhost:8008/v1
|
||||
model: __auto__
|
||||
api_key: any
|
||||
supports_images: false
|
||||
cf_orch:
|
||||
product: peregrine
|
||||
task: job_research
|
||||
ttl_s: 3600
|
||||
|
||||
cf_interview_prep:
|
||||
type: openai_compat
|
||||
enabled: false
|
||||
base_url: http://localhost:8008/v1
|
||||
model: __auto__
|
||||
api_key: any
|
||||
supports_images: false
|
||||
cf_orch:
|
||||
product: peregrine
|
||||
task: interview_prep
|
||||
ttl_s: 3600
|
||||
|
||||
# ── cf-orch trunk services (service-based, legacy) ─────────────────────────
|
||||
# Generic service allocation — use the task-routed backends above when possible.
|
||||
# Set GPU_SERVER_URL (env) or url below; leave enabled: false if cf-orch is
|
||||
# not deployed in your environment.
|
||||
cf_text:
|
||||
type: openai_compat
|
||||
enabled: false
|
||||
base_url: http://localhost:8008/v1 # fallback when cf-orch is not available
|
||||
model: __auto__
|
||||
api_key: any
|
||||
supports_images: false
|
||||
cf_orch:
|
||||
service: cf-text
|
||||
# model_candidates: leave empty to use the service's default_model,
|
||||
# or specify an alias from the node's catalog (e.g. "qwen2.5-3b").
|
||||
model_candidates: []
|
||||
ttl_s: 3600
|
||||
|
||||
cf_voice:
|
||||
type: openai_compat
|
||||
enabled: false
|
||||
base_url: http://localhost:8009/v1 # fallback when cf-orch is not available
|
||||
model: __auto__
|
||||
api_key: any
|
||||
supports_images: false
|
||||
cf_orch:
|
||||
service: cf-voice
|
||||
model_candidates: []
|
||||
ttl_s: 3600
|
||||
|
||||
fallback_order:
|
||||
- ollama
|
||||
- claude_code
|
||||
|
|
|
|||
|
|
@ -1,258 +0,0 @@
|
|||
# Mission domain signal configuration for cover letter generation.
|
||||
#
|
||||
# When a job description or company name matches signals in a domain,
|
||||
# the cover letter prompt injects a Para 3 hint to reflect genuine personal
|
||||
# alignment. Dict order = match priority (first match wins).
|
||||
#
|
||||
# Users can add custom domains under `mission_preferences` in user.yaml.
|
||||
# Any key in mission_preferences that is NOT listed here is treated as a
|
||||
# user-defined domain: no signal detection, custom note only (skipped if
|
||||
# the job description doesn't contain the key as a literal word).
|
||||
#
|
||||
# Schema per domain:
|
||||
# signals: list[str] — lowercase keywords to scan for in "company + JD"
|
||||
# default_note: str — hint injected when user has no custom note for domain
|
||||
|
||||
domains:
|
||||
music:
|
||||
signals:
|
||||
- music
|
||||
- spotify
|
||||
- tidal
|
||||
- soundcloud
|
||||
- bandcamp
|
||||
- apple music
|
||||
- distrokid
|
||||
- cd baby
|
||||
- landr
|
||||
- beatport
|
||||
- reverb
|
||||
- vinyl
|
||||
- streaming
|
||||
- artist
|
||||
- label
|
||||
- live nation
|
||||
- ticketmaster
|
||||
- aeg
|
||||
- songkick
|
||||
- concert
|
||||
- venue
|
||||
- festival
|
||||
- audio
|
||||
- podcast
|
||||
- studio
|
||||
- record
|
||||
- musician
|
||||
- playlist
|
||||
default_note: >
|
||||
This company is in the music industry — an industry the candidate finds genuinely
|
||||
compelling. Para 3 should warmly and specifically reflect this authentic alignment,
|
||||
not as a generic fan statement, but as an honest statement of where they'd love to
|
||||
apply their skills.
|
||||
|
||||
animal_welfare:
|
||||
signals:
|
||||
- animal
|
||||
- shelter
|
||||
- rescue
|
||||
- humane society
|
||||
- spca
|
||||
- aspca
|
||||
- veterinary
|
||||
- "vet "
|
||||
- wildlife
|
||||
- "pet "
|
||||
- adoption
|
||||
- foster
|
||||
- dog
|
||||
- cat
|
||||
- feline
|
||||
- canine
|
||||
- sanctuary
|
||||
- zoo
|
||||
default_note: >
|
||||
This organization works in animal welfare/rescue — a mission the candidate finds
|
||||
genuinely meaningful. Para 3 should reflect this authentic connection warmly and
|
||||
specifically, tying their skills to this mission.
|
||||
|
||||
education:
|
||||
signals:
|
||||
- education
|
||||
- school
|
||||
- learning
|
||||
- student
|
||||
- edtech
|
||||
- classroom
|
||||
- curriculum
|
||||
- tutoring
|
||||
- academic
|
||||
- university
|
||||
- kids
|
||||
- children
|
||||
- youth
|
||||
- literacy
|
||||
- khan academy
|
||||
- duolingo
|
||||
- chegg
|
||||
- coursera
|
||||
- instructure
|
||||
- canvas lms
|
||||
- clever
|
||||
- district
|
||||
- teacher
|
||||
- k-12
|
||||
- k12
|
||||
- grade
|
||||
- pedagogy
|
||||
default_note: >
|
||||
This company works in education or EdTech — a domain that resonates with the
|
||||
candidate's values. Para 3 should reflect this authentic connection specifically
|
||||
and warmly.
|
||||
|
||||
social_impact:
|
||||
signals:
|
||||
- nonprofit
|
||||
- non-profit
|
||||
- "501(c)"
|
||||
- social impact
|
||||
- mission-driven
|
||||
- public benefit
|
||||
- community
|
||||
- underserved
|
||||
- equity
|
||||
- justice
|
||||
- humanitarian
|
||||
- advocacy
|
||||
- charity
|
||||
- foundation
|
||||
- ngo
|
||||
- social good
|
||||
- civic
|
||||
- public health
|
||||
- mental health
|
||||
- food security
|
||||
- housing
|
||||
- homelessness
|
||||
- poverty
|
||||
- workforce development
|
||||
default_note: >
|
||||
This organization is mission-driven / social impact focused — exactly the kind of
|
||||
cause the candidate cares deeply about. Para 3 should warmly reflect their genuine
|
||||
desire to apply their skills to work that makes a real difference in people's lives.
|
||||
|
||||
# Health listed last — genuine but lower-priority connection.
|
||||
health:
|
||||
signals:
|
||||
- patient
|
||||
- patients
|
||||
- healthcare
|
||||
- health tech
|
||||
- healthtech
|
||||
- pharma
|
||||
- pharmaceutical
|
||||
- clinical
|
||||
- medical
|
||||
- hospital
|
||||
- clinic
|
||||
- therapy
|
||||
- therapist
|
||||
- rare disease
|
||||
- life sciences
|
||||
- life science
|
||||
- treatment
|
||||
- prescription
|
||||
- biotech
|
||||
- biopharma
|
||||
- medtech
|
||||
- behavioral health
|
||||
- population health
|
||||
- care management
|
||||
- care coordination
|
||||
- oncology
|
||||
- specialty pharmacy
|
||||
- provider network
|
||||
- payer
|
||||
- health plan
|
||||
- benefits administration
|
||||
- ehr
|
||||
- emr
|
||||
- fhir
|
||||
- hipaa
|
||||
default_note: >
|
||||
This company works in healthcare, life sciences, or patient care.
|
||||
Do NOT write about the candidate's passion for pharmaceuticals or healthcare as an
|
||||
industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies
|
||||
exist to serve: those navigating complex, often invisible, or unusual health journeys;
|
||||
patients facing rare or poorly understood conditions; individuals whose situations don't
|
||||
fit a clean category. The connection is to the humans behind the data, not the industry.
|
||||
If the user has provided a personal note, use that to anchor Para 3 specifically.
|
||||
|
||||
# Extended domains — added 2026-04-12
|
||||
|
||||
privacy:
|
||||
signals:
|
||||
- privacy
|
||||
- data rights
|
||||
- surveillance
|
||||
- gdpr
|
||||
- ccpa
|
||||
- anonymity
|
||||
- end-to-end encryption
|
||||
- open source
|
||||
- decentralized
|
||||
- self-hosted
|
||||
- zero knowledge
|
||||
- data sovereignty
|
||||
- digital rights
|
||||
- eff
|
||||
- electronic frontier
|
||||
default_note: >
|
||||
This company operates in the privacy, data rights, or digital rights space —
|
||||
a domain the candidate genuinely cares about. Para 3 should reflect their
|
||||
authentic belief in user autonomy and data sovereignty, not as abstract principle
|
||||
but as something that shapes how they approach their work.
|
||||
|
||||
accessibility:
|
||||
signals:
|
||||
- accessibility
|
||||
- assistive technology
|
||||
- a11y
|
||||
- wcag
|
||||
- screen reader
|
||||
- adaptive technology
|
||||
- disability
|
||||
- neurodivergent
|
||||
- neurodiversity
|
||||
- adhd
|
||||
- autism
|
||||
- inclusive design
|
||||
- universal design
|
||||
- accommodations
|
||||
- ada compliance
|
||||
default_note: >
|
||||
This company works in accessibility or assistive technology — a mission the
|
||||
candidate feels genuine, personal alignment with. Para 3 should reflect authentic
|
||||
investment in building tools and systems that work for everyone, especially those
|
||||
whose needs are most often overlooked in mainstream product development.
|
||||
|
||||
open_source:
|
||||
signals:
|
||||
- open source
|
||||
- open-source
|
||||
- linux foundation
|
||||
- apache foundation
|
||||
- free software
|
||||
- gnu
|
||||
- contributor
|
||||
- maintainer
|
||||
- upstream
|
||||
- community-driven
|
||||
- innersource
|
||||
- copyleft
|
||||
- mozilla
|
||||
- wikimedia
|
||||
default_note: >
|
||||
This organization is rooted in open source culture — a community the candidate
|
||||
actively participates in and believes in. Para 3 should reflect genuine investment
|
||||
in the collaborative, transparent, and community-driven approach to building
|
||||
software that lasts.
|
||||
|
|
@ -1,11 +1,9 @@
|
|||
candidate_accessibility_focus: false
|
||||
candidate_lgbtq_focus: false
|
||||
candidate_voice: Clear, direct, and human. Focuses on impact over jargon. Avoids
|
||||
buzzwords and lets the work speak.
|
||||
career_summary: 'Senior UX Designer with 6 years of experience designing for music,
|
||||
education, and media products. Strong background in cross-platform design systems,
|
||||
user research, and 0-to-1 feature development. Passionate about making complex
|
||||
digital experiences feel effortless.
|
||||
candidate_voice: Clear, direct, and human. Focuses on impact over jargon.
|
||||
career_summary: 'Experienced software engineer with a background in full-stack development,
|
||||
cloud infrastructure, and data pipelines. Passionate about building tools that help
|
||||
people navigate complex systems.
|
||||
|
||||
'
|
||||
dev_tier_override: null
|
||||
|
|
@ -18,9 +16,9 @@ inference_profile: remote
|
|||
linkedin: ''
|
||||
mission_preferences:
|
||||
animal_welfare: ''
|
||||
education: Education technology is where design decisions have long-term impact on how people learn.
|
||||
education: ''
|
||||
health: ''
|
||||
music: Love designing for music and audio discovery — it combines craft with genuine emotional resonance.
|
||||
music: ''
|
||||
social_impact: Want my work to reach people who need it most.
|
||||
name: Demo User
|
||||
nda_companies: []
|
||||
|
|
|
|||
259
demo/seed.sql
|
|
@ -1,259 +0,0 @@
|
|||
-- jobs
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Spotify', 'https://www.linkedin.com/jobs/view/1000001', 'linkedin', 'Remote', '1', '$110k–$140k', '94.0', 'approved', '2026-04-14', '2026-04-12', 'Dear Hiring Manager,
|
||||
|
||||
I''m excited to apply for the UX Designer role at Spotify. With five years of
|
||||
experience designing for music discovery and cross-platform experiences, I''ve
|
||||
consistently shipped features that make complex audio content feel effortless to
|
||||
navigate. At my last role I led a redesign of the playlist creation flow that
|
||||
reduced drop-off by 31%.
|
||||
|
||||
Spotify''s commitment to artist and listener discovery — and its recent push into
|
||||
audiobooks and podcast tooling — aligns directly with the kind of cross-format
|
||||
design challenges I''m most energised by.
|
||||
|
||||
I''d love to bring that focus to your product design team.
|
||||
|
||||
Warm regards,
|
||||
[Your name]
|
||||
', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Duolingo', 'https://www.linkedin.com/jobs/view/1000002', 'linkedin', 'Pittsburgh, PA', '0', '$95k–$120k', '87.0', 'approved', '2026-04-13', '2026-04-10', 'Draft in progress — cover letter generating…', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Lead', 'NPR', 'https://www.indeed.com/viewjob?jk=1000003', 'indeed', 'Washington, DC', '1', '$120k–$150k', '81.0', 'approved', '2026-04-12', '2026-04-08', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior UX Designer', 'Mozilla', 'https://www.linkedin.com/jobs/view/1000004', 'linkedin', 'Remote', '1', '$105k–$130k', '81.0', 'pending', '2026-04-13', '2026-03-12', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Interaction Designer', 'Figma', 'https://www.indeed.com/viewjob?jk=1000005', 'indeed', 'San Francisco, CA', '1', '$115k–$145k', '78.0', 'pending', '2026-04-11', '2026-04-09', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer II', 'Notion', 'https://www.linkedin.com/jobs/view/1000006', 'linkedin', 'Remote', '1', '$100k–$130k', '76.0', 'pending', '2026-04-10', '2026-04-07', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Stripe', 'https://www.linkedin.com/jobs/view/1000007', 'linkedin', 'Remote', '1', '$120k–$150k', '74.0', 'pending', '2026-04-09', '2026-04-06', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UI/UX Designer', 'Canva', 'https://www.indeed.com/viewjob?jk=1000008', 'indeed', 'Remote', '1', '$90k–$115k', '72.0', 'pending', '2026-04-08', '2026-04-05', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior Product Designer', 'Asana', 'https://www.linkedin.com/jobs/view/1000009', 'linkedin', 'San Francisco, CA', '1', '$125k–$155k', '69.0', 'pending', '2026-04-07', '2026-04-04', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Researcher', 'Intercom', 'https://www.indeed.com/viewjob?jk=1000010', 'indeed', 'Remote', '1', '$95k–$120k', '67.0', 'pending', '2026-04-06', '2026-04-03', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Linear', 'https://www.linkedin.com/jobs/view/1000011', 'linkedin', 'Remote', '1', '$110k–$135k', '65.0', 'pending', '2026-04-05', '2026-04-02', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Loom', 'https://www.indeed.com/viewjob?jk=1000012', 'indeed', 'Remote', '1', '$90k–$110k', '62.0', 'pending', '2026-04-04', '2026-04-01', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Senior Product Designer', 'Asana', 'https://www.asana.com/jobs/1000013', 'linkedin', 'San Francisco, CA', '1', '$125k–$155k', '91.0', 'phone_screen', '2026-04-01', '2026-03-30', NULL, '2026-04-08', '2026-04-15', NULL, NULL, NULL, '2026-04-15T14:00:00', NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Product Designer', 'Notion', 'https://www.notion.so/jobs/1000014', 'indeed', 'Remote', '1', '$100k–$130k', '88.0', 'interviewing', '2026-03-25', '2026-03-23', NULL, '2026-04-01', '2026-04-05', '2026-04-12', NULL, NULL, '2026-04-22T10:00:00', NULL, NULL);
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('Design Systems Designer', 'Figma', 'https://www.figma.com/jobs/1000015', 'linkedin', 'San Francisco, CA', '1', '$130k–$160k', '96.0', 'hired', '2026-03-01', '2026-02-27', NULL, '2026-03-08', '2026-03-14', '2026-03-21', '2026-04-01', '2026-04-08', NULL, NULL, '{"factors":["clear_scope","great_manager","mission_aligned"],"notes":"Excited about design systems work. Salary met expectations."}');
|
||||
INSERT INTO jobs (title, company, url, source, location, is_remote, salary, match_score, status, date_found, date_posted, cover_letter, applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, interview_date, rejection_stage, hired_feedback) VALUES ('UX Designer', 'Slack', 'https://slack.com/jobs/1000016', 'indeed', 'Remote', '1', '$115k–$140k', '79.0', 'applied', '2026-03-18', '2026-03-16', NULL, '2026-03-28', NULL, NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
|
||||
-- job_contacts
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (1, 'inbound', 'Excited to connect — UX Designer role at Spotify', 'jamie.chen@spotify.com', 'you@example.com', '2026-04-12', 'positive_response');
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (1, 'outbound', 'Re: Excited to connect — UX Designer role at Spotify', 'you@example.com', 'jamie.chen@spotify.com', '2026-04-13', NULL);
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (13, 'inbound', 'Interview Confirmation — Senior Product Designer', 'recruiting@asana.com', 'you@example.com', '2026-04-13', 'interview_scheduled');
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (14, 'inbound', 'Your panel interview is confirmed for Apr 22', 'recruiting@notion.so', 'you@example.com', '2026-04-12', 'interview_scheduled');
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (14, 'inbound', 'Pre-interview prep resources', 'marcus.webb@notion.so', 'you@example.com', '2026-04-13', 'positive_response');
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'inbound', 'Figma Design Systems — Offer Letter', 'offers@figma.com', 'you@example.com', '2026-04-01', 'offer_received');
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'outbound', 'Re: Figma Design Systems — Offer Letter (acceptance)', 'you@example.com', 'offers@figma.com', '2026-04-05', NULL);
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (15, 'inbound', 'Welcome to Figma! Onboarding next steps', 'onboarding@figma.com', 'you@example.com', '2026-04-08', NULL);
|
||||
INSERT INTO job_contacts (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) VALUES (16, 'inbound', 'Thanks for applying to Slack', 'noreply@slack.com', 'you@example.com', '2026-03-28', NULL);
|
||||
|
||||
-- references_
|
||||
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Dr. Priya Nair', 'priya.nair@example.com', 'Director of Design', 'Acme Corp', 'former_manager', 'Managed me for 3 years on the consumer app redesign. Enthusiastic reference.', '["manager","design"]', 'Hi Priya,
|
||||
|
||||
I hope you''re doing well! I''m currently interviewing for a few senior UX roles and would be so grateful if you''d be willing to serve as a reference.
|
||||
|
||||
Thank you!
|
||||
[Your name]');
|
||||
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Sam Torres', 'sam.torres@example.com', 'Senior Product Designer', 'Acme Corp', 'former_colleague', 'Worked together on design systems. Great at speaking to collaborative process.', '["colleague","design_systems"]', NULL);
|
||||
INSERT INTO references_ (name, email, role, company, relationship, notes, tags, prep_email) VALUES ('Jordan Kim', 'jordan.kim@example.com', 'VP of Product', 'Streamline Inc', 'former_manager', 'Led the product team I was embedded in. Can speak to business impact of design work.', '["manager","product"]', NULL);
|
||||
|
||||
-- resumes
|
||||
INSERT INTO resumes (name, source, job_id, text, struct_json, word_count, is_default) VALUES (
|
||||
'Base Resume',
|
||||
'uploaded',
|
||||
NULL,
|
||||
'ALEX RIVERA
|
||||
UX Designer · Product Design · Design Systems
|
||||
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
|
||||
|
||||
SUMMARY
|
||||
Senior UX Designer with 6 years of experience designing for music, education, and media platforms. Led 0-to-1 product work and redesigned high-traffic flows used by tens of millions of users. Deep background in user research, interaction design, and cross-platform design systems. Strong collaborator with engineering and product — comfortable in ambiguity, methodical about process.
|
||||
|
||||
EXPERIENCE
|
||||
|
||||
Senior UX Designer — StreamNote (2023–present)
|
||||
- Led redesign of the core listening queue, reducing abandonment by 31% across mobile and web
|
||||
- Built and maintained a component library (Figma tokens + React) used by 8 product squads
|
||||
- Ran 60+ moderated user research sessions; findings shaped 3 major product bets
|
||||
- Partnered with ML team to design recommendation transparency features for power users
|
||||
|
||||
UX Designer — EduPath (2021–2023)
|
||||
- Designed the onboarding and early-habit loop for a K–12 learning app (2.4M DAU)
|
||||
- Shipped streak redesign that improved D7 retention by 18%
|
||||
- Drove accessibility audit and remediation (WCAG 2.1 AA); filed and closed 47 issues
|
||||
- Mentored 2 junior designers; led weekly design critique
|
||||
|
||||
Product Designer — Signal Media (2019–2021)
|
||||
- Designed editorial tools and reader-facing article experiences for a digital news publisher
|
||||
- Prototyped and shipped a "read later" feature that became the #2 most-used feature within 90 days
|
||||
- Collaborated with editorial and engineering to establish a shared component system (reduces new-story design time by 60%)
|
||||
|
||||
SKILLS
|
||||
Figma · Prototyping · User Research · Usability Testing · Design Systems · Interaction Design
|
||||
Accessibility (WCAG 2.1) · Cross-Platform (iOS/Android/Web) · React (collaboration-level) · SQL (basic)
|
||||
Workshop Facilitation · Stakeholder Communication
|
||||
|
||||
EDUCATION
|
||||
B.F.A. Graphic Design, Minor in Human-Computer Interaction — State University of the Arts, 2019
|
||||
|
||||
SELECTED PROJECTS
|
||||
Playlist Flow Redesign (StreamNote) — reduced creation drop-off 31%, won internal design award
|
||||
D7 Retention Streak (EduPath) — +18% weekly retention; featured in company all-hands
|
||||
Accessibility Audit (EduPath) — full WCAG 2.1 AA remediation across iOS, Android, web',
|
||||
'{"contact":{"name":"Alex Rivera","email":"alex.rivera@example.com","linkedin":"linkedin.com/in/alexrivera","portfolio":"alexrivera.design"},"summary":"Senior UX Designer with 6 years of experience designing for music, education, and media platforms.","experience":[{"company":"StreamNote","title":"Senior UX Designer","dates":"2023–present","bullets":["Led redesign of core listening queue, reducing abandonment by 31%","Built component library used by 8 product squads","Ran 60+ moderated user research sessions"]},{"company":"EduPath","title":"UX Designer","dates":"2021–2023","bullets":["Designed onboarding and early-habit loop for K–12 app (2.4M DAU)","Shipped streak redesign that improved D7 retention by 18%","Drove accessibility audit (WCAG 2.1 AA)"]},{"company":"Signal Media","title":"Product Designer","dates":"2019–2021","bullets":["Designed editorial tools and reader-facing article experiences","Prototyped and shipped read-later feature (top 2 used within 90 days)"]}],"education":[{"institution":"State University of the Arts","degree":"B.F.A. Graphic Design, Minor in HCI","year":"2019"}],"skills":["Figma","Prototyping","User Research","Usability Testing","Design Systems","Interaction Design","Accessibility (WCAG 2.1)","Cross-Platform","React","SQL","Workshop Facilitation"]}',
|
||||
320,
|
||||
1
|
||||
);
|
||||
|
||||
-- ATS resume optimizer data for approved jobs (Spotify=1, Duolingo=2, NPR=3)
|
||||
-- Spotify: gap report highlights audio/podcast tooling keywords; optimized resume tailored
|
||||
UPDATE jobs SET
|
||||
ats_gap_report = '[{"term":"audio UX","section":"experience","priority":3,"rationale":"Spotify''s JD emphasizes audio product experience; resume mentions music broadly but not audio-specific UX patterns"},{"term":"podcast design","section":"experience","priority":2,"rationale":"Spotify is investing heavily in podcast tooling; related experience at Signal Media could be framed around audio content"},{"term":"cross-platform mobile","section":"skills","priority":2,"rationale":"JD specifies iOS and Android explicitly; resume lists cross-platform but not mobile-first framing"},{"term":"A/B testing","section":"experience","priority":1,"rationale":"JD mentions data-driven iteration; resume does not reference experimentation framework"}]',
|
||||
optimized_resume = 'ALEX RIVERA
|
||||
UX Designer · Audio Product · Cross-Platform Design
|
||||
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
|
||||
|
||||
SUMMARY
|
||||
Senior UX Designer specializing in audio and media product design. 6 years of experience shipping cross-platform features used by millions — with a focus on music discovery, content navigation, and habit-forming interactions. Comfortable moving from user research to pixel-perfect specs to cross-functional alignment.
|
||||
|
||||
EXPERIENCE
|
||||
|
||||
Senior UX Designer — StreamNote (2023–present)
|
||||
- Led redesign of the core listening queue (audio UX) — reduced abandonment 31% across iOS, Android, and web
|
||||
- Designed podcast chapter navigation prototype; validated with 8 user sessions, handed off to eng in Q3
|
||||
- Built Figma component library (tokens + variants) used by 8 product squads — cut design-to-dev handoff time by 40%
|
||||
- Drove A/B test framework with data team: 12 experiments shipped; 7 reached statistical significance
|
||||
|
||||
UX Designer — EduPath (2021–2023)
|
||||
- Designed cross-platform onboarding (iOS/Android/web) for K–12 learning app, 2.4M DAU
|
||||
- Shipped streak redesign with 3 A/B variants — winning variant improved D7 retention by 18%
|
||||
- Full WCAG 2.1 AA remediation across all platforms; filed and closed 47 issues
|
||||
|
||||
Product Designer — Signal Media (2019–2021)
|
||||
- Designed audio and editorial experiences for a digital media publisher
|
||||
- Prototyped and shipped "listen later" feature for podcast content — #2 most-used feature within 90 days
|
||||
- Established shared design system that reduced new-story design time by 60%
|
||||
|
||||
SKILLS
|
||||
Figma · Audio UX · Podcast Design · Cross-Platform (iOS/Android/Web) · Design Systems
|
||||
A/B Testing · User Research · Usability Testing · Accessibility (WCAG 2.1) · Interaction Design
|
||||
|
||||
EDUCATION
|
||||
B.F.A. Graphic Design, Minor in HCI — State University of the Arts, 2019'
|
||||
WHERE id = 1;
|
||||
|
||||
-- Duolingo: gap report highlights gamification, retention, and learning science keywords
|
||||
UPDATE jobs SET
|
||||
ats_gap_report = '[{"term":"gamification","section":"experience","priority":3,"rationale":"Duolingo''s entire product is built on gamification mechanics; streak work at EduPath is highly relevant but not explicitly framed"},{"term":"streak mechanics","section":"experience","priority":3,"rationale":"Duolingo invented the streak; EduPath streak redesign is directly applicable and should be foregrounded"},{"term":"learning science","section":"experience","priority":2,"rationale":"JD references behavioral psychology; resume does not mention research-backed habit design"},{"term":"localization","section":"skills","priority":1,"rationale":"Duolingo ships to 40+ languages; internationalization experience or awareness would strengthen application"}]',
|
||||
optimized_resume = 'ALEX RIVERA
|
||||
UX Designer · Gamification · Learning Products
|
||||
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
|
||||
|
||||
SUMMARY
|
||||
UX Designer with 6 years of experience in education and media products. Designed habit-forming experiences grounded in behavioral research — streak systems, onboarding flows, and retention mechanics — for apps with millions of daily active users. Passionate about learning products that feel like play.
|
||||
|
||||
EXPERIENCE
|
||||
|
||||
UX Designer — EduPath (2021–2023)
|
||||
- Redesigned streak and gamification mechanics for K–12 learning app (2.4M DAU) — D7 retention +18%
|
||||
- Applied behavioral science principles (variable reward, loss aversion, social proof) to onboarding flow redesign
|
||||
- Led 30+ user research sessions with students, parents, and teachers; findings shaped product roadmap for 2 quarters
|
||||
- Drove WCAG 2.1 AA accessibility remediation — 47 issues filed and closed across iOS, Android, web
|
||||
|
||||
Senior UX Designer — StreamNote (2023–present)
|
||||
- Designed habit-reinforcing listening queue with personalized recommendations surface — abandonment -31%
|
||||
- Built and scaled Figma design system used by 8 squads; reduced design-to-dev cycle by 40%
|
||||
- Ran A/B tests with data team; 12 experiments across retention and discovery features
|
||||
|
||||
Product Designer — Signal Media (2019–2021)
|
||||
- Designed reader engagement and content-return mechanics for digital news platform
|
||||
- "Read later" feature reached #2 usage within 90 days of launch
|
||||
|
||||
SKILLS
|
||||
Figma · Gamification Design · Habit & Retention Mechanics · User Research · Behavioral UX
|
||||
Learning Products · Accessibility (WCAG 2.1) · Cross-Platform (iOS/Android/Web) · Design Systems
|
||||
|
||||
EDUCATION
|
||||
B.F.A. Graphic Design, Minor in HCI — State University of the Arts, 2019'
|
||||
WHERE id = 2;
|
||||
|
||||
-- NPR: gap report highlights public media, accessibility, and editorial tool experience
|
||||
UPDATE jobs SET
|
||||
ats_gap_report = '[{"term":"public media","section":"experience","priority":3,"rationale":"NPR is a public media org; framing experience around mission-driven media rather than commercial products strengthens fit"},{"term":"editorial tools","section":"experience","priority":3,"rationale":"NPR''s UX Lead role includes internal tools for journalists; Signal Media editorial tools work is directly applicable"},{"term":"accessibility standards","section":"experience","priority":2,"rationale":"NPR serves a broad public audience including listeners with disabilities; WCAG work at EduPath should be prominent"},{"term":"content discovery","section":"experience","priority":2,"rationale":"NPR''s JD mentions listener discovery; StreamNote queue redesign is relevant framing"}]',
|
||||
optimized_resume = 'ALEX RIVERA
|
||||
UX Lead · Public Media · Accessible Design
|
||||
alex.rivera@example.com · linkedin.com/in/alexrivera · Portfolio: alexrivera.design
|
||||
|
||||
SUMMARY
|
||||
Senior UX Designer with 6 years of experience in media, education, and content platforms. Led design for editorial tools, content discovery surfaces, and accessible experiences for mission-driven organizations. Believes design has an obligation to reach all users — especially the ones the industry tends to forget.
|
||||
|
||||
EXPERIENCE
|
||||
|
||||
Senior UX Designer — StreamNote (2023–present)
|
||||
- Led content discovery redesign (listening queue, personalized surfaces) — abandonment -31%
|
||||
- Designed and shipped podcast chapter navigation as a 0-to-1 feature
|
||||
- Built scalable Figma component library used by 8 cross-functional squads
|
||||
- Ran 60+ moderated research sessions; regularly presented findings to CPO and VP Product
|
||||
|
||||
Product Designer — Signal Media (2019–2021)
|
||||
- Designed editorial authoring tools used daily by 120+ journalists — reduced story publish time by 35%
|
||||
- Shipped "read later" feature for a digital news publisher — #2 most-used feature within 90 days
|
||||
- Established shared design system that cut new-template design time by 60%
|
||||
|
||||
UX Designer — EduPath (2021–2023)
|
||||
- Led full WCAG 2.1 AA accessibility audit and remediation across iOS, Android, and web
|
||||
- Designed onboarding and retention flows for a public K–12 learning app (2.4M DAU)
|
||||
- D7 retention +18% following streak redesign; results shared at company all-hands
|
||||
|
||||
SKILLS
|
||||
Figma · Editorial & Publishing Tools · Content Discovery UX · Accessibility (WCAG 2.1 AA)
|
||||
Public-Facing Product Design · User Research · Cross-Platform · Design Systems
|
||||
|
||||
EDUCATION
|
||||
B.F.A. Graphic Design, Minor in HCI — State University of the Arts, 2019'
|
||||
WHERE id = 3;
|
||||
|
||||
-- company_research for interview-stage jobs
|
||||
-- Job 13: Asana (phone_screen, interview 2026-04-15)
|
||||
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
|
||||
13,
|
||||
'2026-04-14T09:00:00',
|
||||
'Asana is a work management platform founded in 2008 by Dustin Moskovitz and Justin Rosenstein (both ex-Facebook). Headquartered in San Francisco, Asana went public on the NYSE in September 2020 via a direct listing. The product focuses on project and task management for teams, with a strong emphasis on clarity of ownership and cross-functional coordination. It serves over 130,000 paying customers across 190+ countries. Asana''s design philosophy centers on removing ambiguity from work — a principle that directly shapes product design decisions. The company has made significant investments in AI-assisted task management through its "AI Studio" features, launched in 2024.',
|
||||
'Dustin Moskovitz, co-founder and CEO, is known for a thoughtful management style and genuine interest in org design and well-being at work. He is a co-founder of the effective altruism movement and the Open Philanthropy Project. Expect questions and conversation that reflect a values-driven culture — mission alignment matters here. Anne Raimondi is COO and a well-regarded operations leader.',
|
||||
'["Asana''s design team works closely with the Core Product and Platform squads — ask how design embeds with engineering","Recent focus on AI features (AI Studio, smart task assignment) — familiarity with AI UX patterns will land well","Asana''s brand voice is unusually distinct — understand their design language before the call","Ask about the cross-functional collaboration model: how does design influence roadmap priority?","The role is hybrid SF — clarify expectations around in-office days upfront"]',
|
||||
'Asana is built primarily on React (frontend), Python and PHP (backend), and uses a proprietary data model (the Asana object graph) that drives their real-time sync. Their design team uses Figma heavily. They have invested in their own design system ("Alchemy") which underpins the entire product.',
|
||||
'Asana went public via direct listing (NYSE: ASAN) in September 2020. Revenue in FY2025 was approximately $726M, with consistent double-digit YoY growth. The company has been investing in profitability — operating losses have narrowed significantly. No recent acquisition activity.',
|
||||
'Primary competitors: Monday.com, ClickUp, Notion (project management use cases), Jira (for engineering teams), and Microsoft Project. Asana differentiates on simplicity, clear ownership model, and enterprise reliability over raw feature count.',
|
||||
NULL,
|
||||
'Asana has published an accessibility statement and maintains WCAG 2.1 AA compliance across their core product. Their employee ERGs include groups for disability and neurodiversity. The company scores above average on Glassdoor for work-life balance. Their San Francisco HQ has dedicated quiet spaces and standing desks.',
|
||||
0,
|
||||
'Asana company research generated for phone screen 2026-04-15. Sources: public filings, company blog, Glassdoor.'
|
||||
);
|
||||
|
||||
-- Job 14: Notion (interviewing, panel 2026-04-22)
|
||||
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
|
||||
14,
|
||||
'2026-04-11T14:30:00',
|
||||
'Notion is an all-in-one workspace tool combining notes, docs, wikis, and project management. Founded in 2013, relaunched in 2018 after a near-failure. Headquartered in San Francisco, with a significant remote-first culture. Notion reached a $10B valuation in its 2021 funding round and has since focused on consolidation and profitability. The product is unusually design-forward — Notion''s UI is considered a benchmark in the industry for flexibility without overwhelming complexity. Their 2023–2024 push into AI (Notion AI) added LLM-powered writing and summarization directly into the workspace. The product design team is small-but-influential and works closely with the founders.',
|
||||
'Ivan Zhao is co-founder and CEO, known for being deeply product-focused and aesthetically driven. He has described Notion as an attempt to make software feel like a craftsman''s tool. Akshay Kothari is co-founder and COO. The culture reflects the founders'' values: deliberate, high-craft, opinionated. Expect the panel to include designers or PMs who will probe your design sensibility and taste.',
|
||||
'["Notion''s design team is small and influential — expect ownership of end-to-end features, not component-level work","AI features (Notion AI) are a major current initiative — come with opinions on how AI should integrate into a workspace without disrupting user flow","Notion''s design language is a competitive moat — study it carefully before the panel","Panel likely includes a PM, a senior designer, and possibly a founder — tailor your portfolio walk to each audience","Ask about the product design team structure: how many designers, how do they embed with eng, what does the IC path look like?"]',
|
||||
'Notion is built on a React frontend with a custom block-based data model. Their backend uses Postgres and Kafka for real-time sync. Notion AI uses third-party LLM providers (Anthropic, OpenAI) via API. The design team uses Figma and maintains a well-documented internal design system.',
|
||||
'Notion raised $275M at a $10B valuation in October 2021 (led by Sequoia and Coatue). The company has not announced further funding rounds; public commentary suggests a path to profitability. ARR estimated at $300–500M as of 2024.',
|
||||
'Competitors include Confluence (Atlassian), Coda, Linear (for engineering-focused workflows), Obsidian (local-first notes), and increasingly Asana and ClickUp for project management use cases. Notion''s differentiator is its flexible block model and strong brand identity with knowledge workers.',
|
||||
'Some employee reviews mention that the small team size means high ownership but also that projects can pivot quickly. Design headcount has been stable post-2022 layoffs. Worth asking about team stability in the panel.',
|
||||
'Notion has made public commitments to WCAG 2.1 AA compliance but has received community feedback that keyboard navigation in the block editor has gaps. Their 2024 accessibility roadmap addressed the most commonly reported issues. The company has a neurodiversity ERG and remote-first culture (async-friendly).',
|
||||
0,
|
||||
'Notion company research generated for panel interview 2026-04-22. Sources: public filings, company blog, community accessibility reports.'
|
||||
);
|
||||
|
||||
-- Job 15: Figma (hired — research used during interview cycle)
|
||||
INSERT INTO company_research (job_id, generated_at, company_brief, ceo_brief, talking_points, tech_brief, funding_brief, competitors_brief, red_flags, accessibility_brief, scrape_used, raw_output) VALUES (
|
||||
15,
|
||||
'2026-03-13T11:00:00',
|
||||
'Figma is the leading browser-based design tool, founded in 2012 by Dylan Field and Evan Wallace. Headquartered in San Francisco. Figma disrupted the design tool market with its collaborative, multiplayer approach — Google Docs for design. The product includes Figma Design, FigJam (whiteboarding), and Dev Mode (engineering handoff). Adobe''s attempted $20B acquisition was blocked by UK and EU regulators in 2023; Figma received a $1B termination fee. Post-Adobe, Figma has accelerated independent investment in AI features and a new "Figma Make" prototyping tool. The Design Systems team (the role you accepted) is responsible for the core component and token infrastructure used across all Figma products.',
|
||||
'Dylan Field, co-founder and CEO, is known for being deeply technical and product-obsessed. He joined the board of OpenAI. Post-Adobe-deal fallout, Field has been publicly focused on Figma''s independent growth trajectory. Expect a culture of high standards and genuine product craft. Noah Levin leads the design org.',
|
||||
'["You are joining the Design Systems team — the infrastructure team for Figma''s own product design","Your work will directly impact every other designer at Figma — high visibility, high leverage","Figma uses its own product (dogfooding) — you will be designing in Figma for Figma","Key initiative: AI-assisted component generation in Figma Make — design systems input is critical","You are the first external hire in this role since the Adobe deal fell through — ask about team direction post-acquisition"]',
|
||||
'Figma''s frontend is React with a custom WebGL rendering engine (written in Rust + WASM) for the canvas. This is some of the most sophisticated browser-based graphics code in production. Dev Mode connects to GitHub, Storybook, and VS Code. The design system team works in Figma and outputs tokens that connect to code via Figma''s token pipeline.',
|
||||
'Figma received a $1B termination fee from Adobe when the acquisition was blocked in late 2023. The company raised $200M at a $10B valuation in 2021. With the termination fee and strong ARR, Figma is well-capitalized for independent growth. No IPO timeline announced publicly.',
|
||||
'Primary competitor is Sketch (declining market share), with Adobe XD effectively sunset. Framer is a growing competitor for prototyping. Penpot (open-source) is gaining traction in privacy-conscious and European markets. Figma''s multiplayer and browser-based approach remains a strong moat.',
|
||||
NULL,
|
||||
'Figma has an active accessibility team and public blog posts on designing accessible components. Their design system (the one you will be contributing to) includes built-in accessibility annotations and ARIA guidance. The company has disability and neurodiversity ERGs. Remote-friendly with SF HQ.',
|
||||
0,
|
||||
'Figma company research generated for interviewing stage 2026-03-13. Sources: company blog, public filings, design community.'
|
||||
);
|
||||
3164
dev-api.py
|
|
@ -1,14 +0,0 @@
|
|||
#!/bin/sh
|
||||
# Start the cf-orch agent. Adds --advertise-host only when CF_ORCH_ADVERTISE_HOST is set.
|
||||
set -e
|
||||
|
||||
ARGS="--coordinator ${CF_ORCH_COORDINATOR_URL:-http://host.docker.internal:7700} \
|
||||
--node-id ${CF_ORCH_NODE_ID:-peregrine} \
|
||||
--host 0.0.0.0 \
|
||||
--port ${CF_ORCH_AGENT_PORT:-7701}"
|
||||
|
||||
if [ -n "${CF_ORCH_ADVERTISE_HOST}" ]; then
|
||||
ARGS="$ARGS --advertise-host ${CF_ORCH_ADVERTISE_HOST}"
|
||||
fi
|
||||
|
||||
exec cf-orch agent $ARGS
|
||||
|
|
@ -2,8 +2,6 @@ server {
|
|||
listen 80;
|
||||
server_name _;
|
||||
|
||||
client_max_body_size 20m;
|
||||
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
|
|
@ -22,19 +20,6 @@ server {
|
|||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
# Handle /peregrine/ base path — used when accessed directly (no Caddy prefix stripping).
|
||||
# ^~ blocks regex location matches so assets at /peregrine/assets/... are served correctly.
|
||||
location ^~ /peregrine/assets/ {
|
||||
alias /usr/share/nginx/html/assets/;
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
}
|
||||
|
||||
location /peregrine/ {
|
||||
alias /usr/share/nginx/html/;
|
||||
try_files $uri $uri/ /index.html;
|
||||
}
|
||||
|
||||
# SPA fallback — must come after API and assets
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ Shipped in v0.4.0. Ongoing maintenance and known decisions:
|
|||
|
||||
## Container Runtime
|
||||
|
||||
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `install.sh` detects existing Podman and skips Docker install.
|
||||
- ~~**Podman support**~~ — ✅ Done: `Makefile` auto-detects `docker compose` / `podman compose` / `podman-compose`; `compose.podman-gpu.yml` CDI override for GPU profiles; `setup.sh` detects existing Podman and skips Docker install.
|
||||
- **FastAPI migration path** — When concurrent-user scale demands it: port Streamlit pages to FastAPI + React/HTMX, keep `scripts/` layer unchanged, replace daemon threads with Celery + Redis. The `scripts/` separation already makes this clean.
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ Thank you for your interest in contributing to Peregrine. This guide covers the
|
|||
## Fork and Clone
|
||||
|
||||
```bash
|
||||
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
|
||||
git clone https://git.circuitforge.io/circuitforge/peregrine
|
||||
cd peregrine
|
||||
```
|
||||
|
||||
|
|
@ -102,23 +102,6 @@ Before opening a pull request:
|
|||
|
||||
---
|
||||
|
||||
## Database Migrations
|
||||
|
||||
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
|
||||
|
||||
### Adding a migration
|
||||
|
||||
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
|
||||
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
|
||||
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
|
||||
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
|
||||
|
||||
### Rollbacks
|
||||
|
||||
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
|
||||
|
||||
---
|
||||
|
||||
## What NOT to Do
|
||||
|
||||
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ This page walks through a full Peregrine installation from scratch.
|
|||
## Prerequisites
|
||||
|
||||
- **Git** — to clone the repository
|
||||
- **Internet connection** — `install.sh` downloads Docker and other dependencies
|
||||
- **Internet connection** — `setup.sh` downloads Docker and other dependencies
|
||||
- **Operating system**: Ubuntu/Debian, Fedora/RHEL, Arch Linux, or macOS (with Docker Desktop)
|
||||
|
||||
!!! warning "Windows"
|
||||
|
|
@ -18,19 +18,19 @@ This page walks through a full Peregrine installation from scratch.
|
|||
## Step 1 — Clone the repository
|
||||
|
||||
```bash
|
||||
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
|
||||
git clone https://git.circuitforge.io/circuitforge/peregrine
|
||||
cd peregrine
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 2 — Run install.sh
|
||||
## Step 2 — Run setup.sh
|
||||
|
||||
```bash
|
||||
bash install.sh
|
||||
bash setup.sh
|
||||
```
|
||||
|
||||
`install.sh` performs the following automatically:
|
||||
`setup.sh` performs the following automatically:
|
||||
|
||||
1. **Detects your platform** (Ubuntu/Debian, Fedora/RHEL, Arch, macOS)
|
||||
2. **Installs Git** if not already present
|
||||
|
|
@ -40,10 +40,10 @@ bash install.sh
|
|||
6. **Creates `.env` from `.env.example`** — edit `.env` to customise ports and model storage paths before starting
|
||||
|
||||
!!! note "macOS"
|
||||
`install.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script.
|
||||
`setup.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script.
|
||||
|
||||
!!! note "GPU requirement"
|
||||
For GPU support, `nvidia-smi` must return output before you run `install.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present.
|
||||
For GPU support, `nvidia-smi` must return output before you run `setup.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present.
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -107,7 +107,7 @@ The first-run wizard launches automatically. See [First-Run Wizard](first-run-wi
|
|||
Only NVIDIA GPUs are supported. AMD ROCm is not currently supported.
|
||||
|
||||
Requirements:
|
||||
- NVIDIA driver installed and `nvidia-smi` working before running `install.sh`
|
||||
- NVIDIA driver installed and `nvidia-smi` working before running `setup.sh`
|
||||
- CUDA 12.x recommended (CUDA 11.x may work but is untested)
|
||||
- Minimum 8 GB VRAM for `single-gpu` profile with default models
|
||||
- For `dual-gpu`: GPU 0 is assigned to Ollama, GPU 1 to vLLM
|
||||
|
|
|
|||
|
|
@ -4,17 +4,15 @@
|
|||
|
||||
Peregrine automates the full job search lifecycle: discovery, matching, cover letter generation, application tracking, and interview preparation. It is privacy-first and local-first — your data never leaves your machine unless you configure an external integration.
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# 1. Clone and install dependencies
|
||||
git clone https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine
|
||||
git clone https://git.circuitforge.io/circuitforge/peregrine
|
||||
cd peregrine
|
||||
bash install.sh
|
||||
bash setup.sh
|
||||
|
||||
# 2. Start Peregrine
|
||||
make start # no GPU, API-only
|
||||
|
|
@ -31,23 +29,20 @@ The first-run wizard guides you through hardware detection, tier selection, iden
|
|||
|
||||
## Feature Overview
|
||||
|
||||
| Feature | Free | Paid† | Premium |
|
||||
|---------|------|-------|---------|
|
||||
| Feature | Free | Paid | Premium |
|
||||
|---------|------|------|---------|
|
||||
| Job discovery (JobSpy + custom boards) | Yes | Yes | Yes |
|
||||
| Resume keyword matching | Yes | Yes | Yes |
|
||||
| Cover letter generation | BYOK‡ | Yes | Yes |
|
||||
| Company research briefs | BYOK‡ | Yes | Yes |
|
||||
| Interview prep & practice Q&A | BYOK‡ | Yes | Yes |
|
||||
| Cover letter generation | - | Yes | Yes |
|
||||
| Company research briefs | - | Yes | Yes |
|
||||
| Interview prep & practice Q&A | - | Yes | Yes |
|
||||
| Email sync & auto-classification | - | Yes | Yes |
|
||||
| Survey assistant (culture-fit Q&A) | BYOK‡ | Yes | Yes |
|
||||
| Survey assistant (culture-fit Q&A) | - | Yes | Yes |
|
||||
| Integration connectors (Notion, Airtable, etc.) | Partial | Yes | Yes |
|
||||
| Calendar sync (Google, Apple) | - | Yes | Yes |
|
||||
| Cover letter model fine-tuning | - | - | Yes |
|
||||
| Multi-user support | - | - | Yes |
|
||||
|
||||
† **Paid** gives access to CircuitForge's hosted inference — no API key required.
|
||||
‡ **BYOK** — configure any LLM backend in `config/llm.yaml` (local Ollama/vLLM or an API key) and these features unlock at no charge, regardless of tier.
|
||||
|
||||
See [Tier System](reference/tier-system.md) for the full feature gate table.
|
||||
|
||||
---
|
||||
|
|
@ -63,8 +58,8 @@ See [Tier System](reference/tier-system.md) for the full feature gate table.
|
|||
|
||||
## License
|
||||
|
||||
Core discovery pipeline: [MIT](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/src/branch/main/LICENSE-MIT)
|
||||
Core discovery pipeline: [MIT](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-MIT)
|
||||
|
||||
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](https://git.opensourcesolarpunk.com/Circuit-Forge/peregrine/src/branch/main/LICENSE-BSL)
|
||||
AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-BSL)
|
||||
|
||||
© 2026 Circuit Forge LLC
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
(function(){var s=document.createElement("script");s.defer=true;s.dataset.domain="docs.circuitforge.tech,circuitforge.tech";s.dataset.api="https://analytics.circuitforge.tech/api/event";s.src="https://analytics.circuitforge.tech/js/script.js";document.head.appendChild(s);})();
|
||||
|
|
@ -337,7 +337,7 @@ webhook_url: "https://discord.com/api/webhooks/..."
|
|||
|
||||
## .env
|
||||
|
||||
Docker port and path overrides. Created from `.env.example` by `install.sh`. Gitignored.
|
||||
Docker port and path overrides. Created from `.env.example` by `setup.sh`. Gitignored.
|
||||
|
||||
```bash
|
||||
# Ports (change if defaults conflict with existing services)
|
||||
|
|
|
|||
|
|
@ -1,157 +0,0 @@
|
|||
# Forgejo Feedback API — Schema & Bug Bot Setup
|
||||
|
||||
## API Endpoints Used
|
||||
|
||||
| Operation | Method | Endpoint |
|
||||
|-----------|--------|----------|
|
||||
| List labels | GET | `/repos/{owner}/{repo}/labels` |
|
||||
| Create label | POST | `/repos/{owner}/{repo}/labels` |
|
||||
| Create issue | POST | `/repos/{owner}/{repo}/issues` |
|
||||
| Upload attachment | POST | `/repos/{owner}/{repo}/issues/{index}/assets` |
|
||||
| Post comment | POST | `/repos/{owner}/{repo}/issues/{index}/comments` |
|
||||
|
||||
Base URL: `https://git.opensourcesolarpunk.com/api/v1`
|
||||
|
||||
---
|
||||
|
||||
## Issue Creation Payload
|
||||
|
||||
```json
|
||||
POST /repos/{owner}/{repo}/issues
|
||||
{
|
||||
"title": "string",
|
||||
"body": "markdown string",
|
||||
"labels": [1, 2, 3] // array of label IDs (not names)
|
||||
}
|
||||
```
|
||||
|
||||
Response (201):
|
||||
```json
|
||||
{
|
||||
"number": 42,
|
||||
"html_url": "https://git.opensourcesolarpunk.com/pyr0ball/peregrine/issues/42"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Issue Body Structure
|
||||
|
||||
The `build_issue_body()` function produces this markdown layout:
|
||||
|
||||
```markdown
|
||||
## 🐛 Bug | ✨ Feature Request | 💬 Other
|
||||
|
||||
<user description>
|
||||
|
||||
### Reproduction Steps ← bug type only, when repro provided
|
||||
|
||||
<repro steps>
|
||||
|
||||
### Context
|
||||
|
||||
- **page:** Home
|
||||
- **version:** v0.2.5-61-ga6d787f ← from `git describe`; "dev" inside Docker
|
||||
- **tier:** free | paid | premium
|
||||
- **llm_backend:** ollama | vllm | claude_code | ...
|
||||
- **os:** Linux-6.8.0-65-generic-x86_64-with-glibc2.39
|
||||
- **timestamp:** 2026-03-06T15:58:29Z
|
||||
|
||||
<details>
|
||||
<summary>App Logs (last 100 lines)</summary>
|
||||
|
||||
```
|
||||
... log content (PII masked) ...
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
### Recent Listings ← only when include_diag = True
|
||||
|
||||
- [Title @ Company](url)
|
||||
|
||||
---
|
||||
*Submitted by: Name <email>* ← only when attribution consent checked
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Screenshot Attachment
|
||||
|
||||
Screenshots are uploaded as issue assets, then embedded inline via a follow-up comment:
|
||||
|
||||
```markdown
|
||||
### Screenshot
|
||||
|
||||

|
||||
```
|
||||
|
||||
This keeps the issue body clean and puts the screenshot in a distinct comment.
|
||||
|
||||
---
|
||||
|
||||
## Labels
|
||||
|
||||
| Label | Color | Applied when |
|
||||
|-------|-------|-------------|
|
||||
| `beta-feedback` | `#0075ca` | Always |
|
||||
| `needs-triage` | `#e4e669` | Always |
|
||||
| `bug` | `#d73a4a` | Type = Bug |
|
||||
| `feature-request` | `#a2eeef` | Type = Feature Request |
|
||||
| `question` | `#d876e3` | Type = Other |
|
||||
|
||||
Labels are looked up by name on each submission; missing ones are auto-created via `_ensure_labels()`.
|
||||
|
||||
---
|
||||
|
||||
## Bug Bot Account Setup
|
||||
|
||||
The token currently bundled in `.env` is pyr0ball's personal token. For beta distribution,
|
||||
create a dedicated bot account so the token has limited scope and can be rotated independently.
|
||||
|
||||
### Why a bot account?
|
||||
|
||||
- Token gets bundled in beta testers' `.env` — shouldn't be tied to the repo owner's account
|
||||
- Bot can be limited to issue write only (cannot push code, see private repos, etc.)
|
||||
- Token rotation doesn't affect the owner's other integrations
|
||||
|
||||
### Steps (requires Forgejo admin panel — API admin access not available on this token)
|
||||
|
||||
1. **Create bot account** at `https://git.opensourcesolarpunk.com/-/admin/users/new`
|
||||
- Username: `peregrine-bot` (or `cf-bugbot`)
|
||||
- Email: a real address you control (e.g. `bot+peregrine@circuitforge.tech`)
|
||||
- Set a strong password (store in your password manager)
|
||||
- Check "Prohibit login" if you want a pure API-only account
|
||||
|
||||
2. **Add as collaborator** on `pyr0ball/peregrine`:
|
||||
- Settings → Collaborators → Add `peregrine-bot` with **Write** access
|
||||
- Write access is required to create labels; issue creation alone would need only Read+Comment
|
||||
|
||||
3. **Generate API token** (log in as the bot, or use admin impersonation):
|
||||
- User Settings → Applications → Generate New Token
|
||||
- Name: `peregrine-feedback`
|
||||
- Scopes: `issue` (write) — no repo code access needed
|
||||
- Copy the token — it won't be shown again
|
||||
|
||||
4. **Update environment**:
|
||||
```
|
||||
FORGEJO_API_TOKEN=<new bot token>
|
||||
FORGEJO_REPO=pyr0ball/peregrine
|
||||
FORGEJO_API_URL=https://git.opensourcesolarpunk.com/api/v1
|
||||
```
|
||||
Update both `.env` (dev machine) and any beta tester `.env` files.
|
||||
|
||||
5. **Verify** the bot can create issues:
|
||||
```bash
|
||||
curl -s -X POST https://git.opensourcesolarpunk.com/api/v1/repos/pyr0ball/peregrine/issues \
|
||||
-H "Authorization: token <bot-token>" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"title":"[TEST] bot token check","body":"safe to close","labels":[]}'
|
||||
```
|
||||
Expected: HTTP 201 with `number` and `html_url` in response.
|
||||
|
||||
### Future: Heimdall token management
|
||||
|
||||
Once Heimdall is live, the bot token should be served by the license server rather than
|
||||
bundled in `.env`. The app fetches it at startup using the user's license key → token is
|
||||
never stored on disk and can be rotated server-side. Track as a future Heimdall feature.
|
||||
|
Before Width: | Height: | Size: 94 KiB |
|
Before Width: | Height: | Size: 220 KiB |
|
Before Width: | Height: | Size: 72 KiB |
|
Before Width: | Height: | Size: 99 KiB |
|
Before Width: | Height: | Size: 98 KiB |
|
Before Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 73 KiB |
|
|
@ -1,7 +1,5 @@
|
|||
# Apply Workspace
|
||||
|
||||

|
||||
|
||||
The Apply Workspace is where you generate cover letters, export application documents, and record that you have applied to a job.
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
# Job Review
|
||||
|
||||

|
||||
|
||||
The Job Review page is where you approve or reject newly discovered jobs before they enter the application pipeline.
|
||||
|
||||
---
|
||||
|
|
|
|||
73
manage.sh
|
|
@ -15,11 +15,6 @@ cd "$SCRIPT_DIR"
|
|||
|
||||
PROFILE="${PROFILE:-remote}"
|
||||
|
||||
# ── Compose engine detection ──────────────────────────────────────────────────
|
||||
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
|
||||
&& echo "docker compose" \
|
||||
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
|
||||
|
||||
# ── Usage ────────────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
echo ""
|
||||
|
|
@ -33,10 +28,9 @@ usage() {
|
|||
echo -e " ${GREEN}start${NC} Start Peregrine (preflight → up)"
|
||||
echo -e " ${GREEN}stop${NC} Stop all services"
|
||||
echo -e " ${GREEN}restart${NC} Restart all services"
|
||||
echo -e " ${GREEN}build [service]${NC} Rebuild image(s) without restarting (default: api web)"
|
||||
echo -e " ${GREEN}status${NC} Show running containers"
|
||||
echo -e " ${GREEN}logs [service]${NC} Tail logs (default: api)"
|
||||
echo -e " ${GREEN}update${NC} Pull latest images + rebuild"
|
||||
echo -e " ${GREEN}logs [service]${NC} Tail logs (default: app)"
|
||||
echo -e " ${GREEN}update${NC} Pull latest images + rebuild app"
|
||||
echo -e " ${GREEN}preflight${NC} Check ports + resources; write .env"
|
||||
echo -e " ${GREEN}models${NC} Check ollama models in config; pull any missing"
|
||||
echo -e " ${GREEN}test${NC} Run test suite"
|
||||
|
|
@ -47,12 +41,6 @@ usage() {
|
|||
echo -e " ${GREEN}clean${NC} Remove containers, images, volumes (DESTRUCTIVE)"
|
||||
echo -e " ${GREEN}open${NC} Open the web UI in your browser"
|
||||
echo ""
|
||||
echo -e " Cloud / demo commands:"
|
||||
echo -e " ${GREEN}cloud-start${NC} Start the cloud stack (peregrine-cloud)"
|
||||
echo -e " ${GREEN}cloud-restart${NC} Rebuild + restart the cloud stack"
|
||||
echo -e " ${GREEN}demo-start${NC} Start the demo stack (peregrine-demo)"
|
||||
echo -e " ${GREEN}demo-restart${NC} Rebuild + restart the demo stack"
|
||||
echo ""
|
||||
echo " Profiles (set via --profile or PROFILE env var):"
|
||||
echo " remote API-only, no local inference (default)"
|
||||
echo " cpu Local Ollama inference on CPU"
|
||||
|
|
@ -82,7 +70,7 @@ while [[ $# -gt 0 ]]; do
|
|||
esac
|
||||
done
|
||||
|
||||
SERVICE="${1:-api}" # used by `logs` command
|
||||
SERVICE="${1:-app}" # used by `logs` command
|
||||
|
||||
# ── Dependency guard ──────────────────────────────────────────────────────────
|
||||
# Commands that delegate to make; others (status, logs, update, open, setup) run fine without it.
|
||||
|
|
@ -96,7 +84,7 @@ case "$CMD" in
|
|||
|
||||
setup)
|
||||
info "Running dependency installer..."
|
||||
bash install.sh
|
||||
bash setup.sh
|
||||
;;
|
||||
|
||||
preflight)
|
||||
|
|
@ -113,7 +101,7 @@ case "$CMD" in
|
|||
start)
|
||||
info "Starting Peregrine (PROFILE=${PROFILE})..."
|
||||
make start PROFILE="$PROFILE"
|
||||
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)"
|
||||
PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
|
||||
success "Peregrine is up → http://localhost:${PORT}"
|
||||
;;
|
||||
|
||||
|
|
@ -126,30 +114,33 @@ case "$CMD" in
|
|||
restart)
|
||||
info "Restarting (PROFILE=${PROFILE})..."
|
||||
make restart PROFILE="$PROFILE"
|
||||
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)"
|
||||
PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
|
||||
success "Peregrine restarted → http://localhost:${PORT}"
|
||||
;;
|
||||
|
||||
status)
|
||||
# Auto-detect compose engine same way Makefile does
|
||||
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
|
||||
&& echo "docker compose" \
|
||||
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
|
||||
$COMPOSE ps
|
||||
;;
|
||||
|
||||
logs)
|
||||
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
|
||||
&& echo "docker compose" \
|
||||
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
|
||||
info "Tailing logs for: ${SERVICE}"
|
||||
$COMPOSE logs -f "$SERVICE"
|
||||
;;
|
||||
|
||||
build)
|
||||
BUILD_SVC="$([[ "${SERVICE}" == "api" ]] && echo "api web" || echo "${SERVICE}")"
|
||||
info "Building ${BUILD_SVC}..."
|
||||
$COMPOSE build $BUILD_SVC
|
||||
success "Build complete. Run './manage.sh restart' to apply."
|
||||
;;
|
||||
|
||||
update)
|
||||
info "Pulling latest images and rebuilding..."
|
||||
info "Pulling latest images and rebuilding app..."
|
||||
COMPOSE="$(command -v docker >/dev/null 2>&1 && docker compose version >/dev/null 2>&1 \
|
||||
&& echo "docker compose" \
|
||||
|| (command -v podman >/dev/null 2>&1 && echo "podman compose" || echo "podman-compose"))"
|
||||
$COMPOSE pull searxng ollama 2>/dev/null || true
|
||||
$COMPOSE build api web
|
||||
$COMPOSE build app web
|
||||
success "Update complete. Run './manage.sh restart' to apply."
|
||||
;;
|
||||
|
||||
|
|
@ -176,7 +167,7 @@ case "$CMD" in
|
|||
;;
|
||||
|
||||
open)
|
||||
PORT="$(grep -m1 '^VUE_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8506)"
|
||||
PORT="$(grep -m1 '^STREAMLIT_PORT=' .env 2>/dev/null | cut -d= -f2 || echo 8501)"
|
||||
URL="http://localhost:${PORT}"
|
||||
info "Opening ${URL}"
|
||||
if command -v xdg-open &>/dev/null; then
|
||||
|
|
@ -206,32 +197,6 @@ case "$CMD" in
|
|||
-v "${@:3}"
|
||||
;;
|
||||
|
||||
cloud-start)
|
||||
info "Starting cloud stack (peregrine-cloud)..."
|
||||
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud up -d
|
||||
success "Cloud stack up → http://localhost:8508"
|
||||
;;
|
||||
|
||||
cloud-restart)
|
||||
info "Rebuilding + restarting cloud stack (peregrine-cloud)..."
|
||||
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud build api web
|
||||
$COMPOSE -f compose.cloud.yml --project-name peregrine-cloud up -d
|
||||
success "Cloud stack restarted → http://localhost:8508"
|
||||
;;
|
||||
|
||||
demo-start)
|
||||
info "Starting demo stack (peregrine-demo)..."
|
||||
$COMPOSE -f compose.demo.yml --project-name peregrine-demo up -d
|
||||
success "Demo stack up → http://localhost:8504"
|
||||
;;
|
||||
|
||||
demo-restart)
|
||||
info "Rebuilding + restarting demo stack (peregrine-demo)..."
|
||||
$COMPOSE -f compose.demo.yml --project-name peregrine-demo build api web
|
||||
$COMPOSE -f compose.demo.yml --project-name peregrine-demo up -d
|
||||
success "Demo stack restarted → http://localhost:8504"
|
||||
;;
|
||||
|
||||
help|--help|-h)
|
||||
usage
|
||||
;;
|
||||
|
|
|
|||
|
|
@ -1,97 +0,0 @@
|
|||
-- Migration 001: Baseline schema
|
||||
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT,
|
||||
company TEXT,
|
||||
url TEXT UNIQUE,
|
||||
source TEXT,
|
||||
location TEXT,
|
||||
is_remote INTEGER DEFAULT 0,
|
||||
salary TEXT,
|
||||
description TEXT,
|
||||
match_score REAL,
|
||||
keyword_gaps TEXT,
|
||||
date_found TEXT,
|
||||
status TEXT DEFAULT 'pending',
|
||||
notion_page_id TEXT,
|
||||
cover_letter TEXT,
|
||||
applied_at TEXT,
|
||||
interview_date TEXT,
|
||||
rejection_stage TEXT,
|
||||
phone_screen_at TEXT,
|
||||
interviewing_at TEXT,
|
||||
offer_at TEXT,
|
||||
hired_at TEXT,
|
||||
survey_at TEXT,
|
||||
calendar_event_id TEXT,
|
||||
optimized_resume TEXT,
|
||||
ats_gap_report TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS job_contacts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER,
|
||||
direction TEXT,
|
||||
subject TEXT,
|
||||
from_addr TEXT,
|
||||
to_addr TEXT,
|
||||
body TEXT,
|
||||
received_at TEXT,
|
||||
is_response_needed INTEGER DEFAULT 0,
|
||||
responded_at TEXT,
|
||||
message_id TEXT,
|
||||
stage_signal TEXT,
|
||||
suggestion_dismissed INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS company_research (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER UNIQUE,
|
||||
generated_at TEXT,
|
||||
company_brief TEXT,
|
||||
ceo_brief TEXT,
|
||||
talking_points TEXT,
|
||||
raw_output TEXT,
|
||||
tech_brief TEXT,
|
||||
funding_brief TEXT,
|
||||
competitors_brief TEXT,
|
||||
red_flags TEXT,
|
||||
scrape_used INTEGER DEFAULT 0,
|
||||
accessibility_brief TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS background_tasks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
task_type TEXT,
|
||||
job_id INTEGER,
|
||||
params TEXT,
|
||||
status TEXT DEFAULT 'pending',
|
||||
error TEXT,
|
||||
created_at TEXT,
|
||||
started_at TEXT,
|
||||
finished_at TEXT,
|
||||
stage TEXT,
|
||||
updated_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS survey_responses (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER,
|
||||
survey_name TEXT,
|
||||
received_at TEXT,
|
||||
source TEXT,
|
||||
raw_input TEXT,
|
||||
image_path TEXT,
|
||||
mode TEXT,
|
||||
llm_output TEXT,
|
||||
reported_score REAL,
|
||||
created_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS digest_queue (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_contact_id INTEGER UNIQUE,
|
||||
created_at TEXT
|
||||
);
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
-- Add ATS resume optimizer columns introduced in v0.8.x.
|
||||
-- Existing DBs that were created before the baseline included these columns
|
||||
-- need this migration to add them. Safe to run on new DBs: IF NOT EXISTS guards
|
||||
-- are not available for ADD COLUMN in SQLite, so we use a try/ignore pattern
|
||||
-- at the application level (db_migrate.py wraps each migration in a transaction).
|
||||
ALTER TABLE jobs ADD COLUMN optimized_resume TEXT;
|
||||
ALTER TABLE jobs ADD COLUMN ats_gap_report TEXT;
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
-- Resume review draft and version archive columns (migration 003)
|
||||
ALTER TABLE jobs ADD COLUMN resume_draft_json TEXT;
|
||||
ALTER TABLE jobs ADD COLUMN resume_archive_json TEXT;
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
-- Migration 004: add resume_final_struct to jobs table
|
||||
-- Stores the approved resume as a structured JSON dict alongside the plain text
|
||||
-- (resume_optimized_text). Enables YAML export and future re-processing without
|
||||
-- re-parsing the plain text.
|
||||
ALTER TABLE jobs ADD COLUMN resume_final_struct TEXT;
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
-- 005_resumes_table.sql
|
||||
-- Resume library: named saved resumes per user (optimizer output, imports, manual)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS resumes (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
source TEXT NOT NULL DEFAULT 'manual',
|
||||
job_id INTEGER REFERENCES jobs(id),
|
||||
text TEXT NOT NULL,
|
||||
struct_json TEXT,
|
||||
word_count INTEGER,
|
||||
is_default INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
ALTER TABLE jobs ADD COLUMN resume_id INTEGER REFERENCES resumes(id);
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
-- 006_date_posted.sql
|
||||
-- Add date_posted column for shadow listing detection (stale/shadow score feature).
|
||||
-- New DBs already have this column from the CREATE TABLE statement in db.py;
|
||||
-- this migration adds it to existing user DBs.
|
||||
|
||||
ALTER TABLE jobs ADD COLUMN date_posted TEXT;
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
-- Migration 006: Add columns and tables present in the live DB but missing from migrations
|
||||
-- These were added via direct ALTER TABLE after the v0.8.5 baseline was written.
|
||||
|
||||
-- date_posted: used for ghost-post shadow-score detection
|
||||
ALTER TABLE jobs ADD COLUMN date_posted TEXT;
|
||||
|
||||
-- hired_feedback: JSON blob saved when a job reaches the 'hired' outcome
|
||||
ALTER TABLE jobs ADD COLUMN hired_feedback TEXT;
|
||||
|
||||
-- references_ table: contacts who can provide references for applications
|
||||
CREATE TABLE IF NOT EXISTS references_ (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
relationship TEXT,
|
||||
company TEXT,
|
||||
email TEXT,
|
||||
phone TEXT,
|
||||
notes TEXT,
|
||||
tags TEXT,
|
||||
prep_email TEXT,
|
||||
role TEXT
|
||||
);
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
-- 007_resume_sync.sql
|
||||
-- Add synced_at to resumes: ISO datetime of last library↔profile sync, null = never synced.
|
||||
ALTER TABLE resumes ADD COLUMN synced_at TEXT;
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
-- messages: manual log entries and LLM drafts
|
||||
CREATE TABLE IF NOT EXISTS messages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER REFERENCES jobs(id) ON DELETE SET NULL,
|
||||
job_contact_id INTEGER REFERENCES job_contacts(id) ON DELETE SET NULL,
|
||||
type TEXT NOT NULL DEFAULT 'email',
|
||||
direction TEXT,
|
||||
subject TEXT,
|
||||
body TEXT,
|
||||
from_addr TEXT,
|
||||
to_addr TEXT,
|
||||
logged_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
approved_at TEXT,
|
||||
template_id INTEGER REFERENCES message_templates(id) ON DELETE SET NULL,
|
||||
osprey_call_id TEXT
|
||||
);
|
||||
|
||||
-- message_templates: built-in seeds and user-created templates
|
||||
CREATE TABLE IF NOT EXISTS message_templates (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
key TEXT UNIQUE,
|
||||
title TEXT NOT NULL,
|
||||
category TEXT NOT NULL DEFAULT 'custom',
|
||||
subject_template TEXT,
|
||||
body_template TEXT NOT NULL,
|
||||
is_builtin INTEGER NOT NULL DEFAULT 0,
|
||||
is_community INTEGER NOT NULL DEFAULT 0,
|
||||
community_source TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
INSERT OR IGNORE INTO message_templates
|
||||
(key, title, category, subject_template, body_template, is_builtin)
|
||||
VALUES
|
||||
(
|
||||
'follow_up',
|
||||
'Following up on my application',
|
||||
'follow_up',
|
||||
'Following up — {{role}} application',
|
||||
'Hi {{recruiter_name}},
|
||||
|
||||
I wanted to follow up on my application for the {{role}} position at {{company}}. I remain very interested in the opportunity and would welcome the chance to discuss my background further.
|
||||
|
||||
Please let me know if there is anything else you need from me.
|
||||
|
||||
Best regards,
|
||||
{{name}}',
|
||||
1
|
||||
),
|
||||
(
|
||||
'thank_you',
|
||||
'Thank you for the interview',
|
||||
'thank_you',
|
||||
'Thank you — {{role}} interview',
|
||||
'Hi {{recruiter_name}},
|
||||
|
||||
Thank you for taking the time to speak with me about the {{role}} role at {{company}}. I enjoyed learning more about the team and the work you are doing.
|
||||
|
||||
I am very excited about this opportunity and look forward to hearing about the next steps.
|
||||
|
||||
Best regards,
|
||||
{{name}}',
|
||||
1
|
||||
),
|
||||
(
|
||||
'accommodation_request',
|
||||
'Accommodation request',
|
||||
'accommodation',
|
||||
'Accommodation request — {{role}} interview',
|
||||
'Hi {{recruiter_name}},
|
||||
|
||||
I am writing to request a reasonable accommodation for my upcoming interview for the {{role}} position. Specifically, I would appreciate:
|
||||
|
||||
{{accommodation_details}}
|
||||
|
||||
Please let me know if you need any additional information. I am happy to discuss this further.
|
||||
|
||||
Thank you,
|
||||
{{name}}',
|
||||
1
|
||||
),
|
||||
(
|
||||
'withdrawal',
|
||||
'Withdrawing my application',
|
||||
'withdrawal',
|
||||
'Application withdrawal — {{role}}',
|
||||
'Hi {{recruiter_name}},
|
||||
|
||||
I am writing to let you know that I would like to withdraw my application for the {{role}} position at {{company}}.
|
||||
|
||||
Thank you for your time and consideration. I wish you and the team all the best.
|
||||
|
||||
Best regards,
|
||||
{{name}}',
|
||||
1
|
||||
)
|
||||
|
|
@ -1 +0,0 @@
|
|||
ALTER TABLE jobs ADD COLUMN excluded_from_training INTEGER DEFAULT 0;
|
||||
|
|
@ -70,6 +70,3 @@ nav:
|
|||
- Tier System: reference/tier-system.md
|
||||
- LLM Router: reference/llm-router.md
|
||||
- Config Files: reference/config-files.md
|
||||
|
||||
extra_javascript:
|
||||
- plausible.js
|
||||
|
|
|
|||
|
|
@ -1,92 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# podman-standalone.sh — Peregrine rootful Podman setup (no Compose)
|
||||
#
|
||||
# For beta testers running system Podman (non-rootless) with systemd.
|
||||
# Mirrors the manage.sh "remote" profile: app + SearXNG only.
|
||||
# Ollama/vLLM/vision are expected as host services if needed.
|
||||
#
|
||||
# ── Prerequisites ────────────────────────────────────────────────────────────
|
||||
# 1. Clone the repo:
|
||||
# sudo git clone <repo-url> /opt/peregrine
|
||||
#
|
||||
# 2. Build the app image:
|
||||
# cd /opt/peregrine && sudo podman build -t localhost/peregrine:latest .
|
||||
#
|
||||
# 3. Create a config directory and copy the example configs:
|
||||
# sudo mkdir -p /opt/peregrine/{config,data}
|
||||
# sudo cp /opt/peregrine/config/*.example /opt/peregrine/config/
|
||||
# # Edit /opt/peregrine/config/llm.yaml, notion.yaml, etc. as needed
|
||||
#
|
||||
# 4. Run this script:
|
||||
# sudo bash /opt/peregrine/podman-standalone.sh
|
||||
#
|
||||
# ── After setup — generate systemd unit files ────────────────────────────────
|
||||
# sudo podman generate systemd --new --name peregrine-searxng \
|
||||
# | sudo tee /etc/systemd/system/peregrine-searxng.service
|
||||
# sudo podman generate systemd --new --name peregrine \
|
||||
# | sudo tee /etc/systemd/system/peregrine.service
|
||||
# sudo systemctl daemon-reload
|
||||
# sudo systemctl enable --now peregrine-searxng peregrine
|
||||
#
|
||||
# ── SearXNG ──────────────────────────────────────────────────────────────────
|
||||
# Peregrine expects a SearXNG instance with JSON format enabled.
|
||||
# If you already run one, skip the SearXNG container and set the URL in
|
||||
# config/llm.yaml (searxng_url key). The default is http://localhost:8888.
|
||||
#
|
||||
# ── Ports ────────────────────────────────────────────────────────────────────
|
||||
# Peregrine UI → http://localhost:8501
|
||||
#
|
||||
# ── To use a different Streamlit port ────────────────────────────────────────
|
||||
# Uncomment the CMD override at the bottom of the peregrine run block and
|
||||
# set PORT= to your desired port. The Dockerfile default is 8501.
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
REPO_DIR=/opt/peregrine
|
||||
DATA_DIR=/opt/peregrine/data
|
||||
DOCS_DIR=/Library/Documents/JobSearch # ← adjust to your docs path
|
||||
TZ=America/Los_Angeles
|
||||
|
||||
# ── Peregrine App ─────────────────────────────────────────────────────────────
|
||||
# Image is built locally — no registry auto-update label.
|
||||
# To update: sudo podman build -t localhost/peregrine:latest /opt/peregrine
|
||||
# sudo podman restart peregrine
|
||||
#
|
||||
# Env vars: ANTHROPIC_API_KEY, OPENAI_COMPAT_URL, OPENAI_COMPAT_KEY are
|
||||
# optional — only needed if you're using those backends in config/llm.yaml.
|
||||
#
|
||||
sudo podman run -d \
|
||||
--name=peregrine \
|
||||
--restart=unless-stopped \
|
||||
--net=host \
|
||||
-v ${REPO_DIR}/config:/app/config:Z \
|
||||
-v ${DATA_DIR}:/app/data:Z \
|
||||
-v ${DOCS_DIR}:/docs:z \
|
||||
-e STAGING_DB=/app/data/staging.db \
|
||||
-e DOCS_DIR=/docs \
|
||||
-e PYTHONUNBUFFERED=1 \
|
||||
-e PYTHONLOGGING=WARNING \
|
||||
-e TZ=${TZ} \
|
||||
--health-cmd="curl -f http://localhost:8501/_stcore/health || exit 1" \
|
||||
--health-interval=30s \
|
||||
--health-timeout=10s \
|
||||
--health-start-period=60s \
|
||||
--health-retries=3 \
|
||||
localhost/peregrine:latest
|
||||
# To override the default port (8501), uncomment and edit the line below,
|
||||
# then remove the image name above and place it at the end of the CMD:
|
||||
# streamlit run app/app.py --server.port=8501 --server.headless=true --server.fileWatcherType=none
|
||||
|
||||
echo ""
|
||||
echo "Peregrine is starting up."
|
||||
echo " App: http://localhost:8501"
|
||||
echo ""
|
||||
echo "Check container health with:"
|
||||
echo " sudo podman ps"
|
||||
echo " sudo podman logs peregrine"
|
||||
echo ""
|
||||
echo "To register as a systemd service:"
|
||||
echo " sudo podman generate systemd --new --name peregrine \\"
|
||||
echo " | sudo tee /etc/systemd/system/peregrine.service"
|
||||
echo " sudo systemctl daemon-reload"
|
||||
echo " sudo systemctl enable --now peregrine"
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
[tool.ruff]
|
||||
# app/ is the deprecated Streamlit UI (replaced by Vue+FastAPI).
|
||||
# No new work goes there; exclude from linting rather than accumulate suppressions.
|
||||
exclude = ["app/"]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
# dev-api.py / dev_api.py (symlink): E702 semicolons in compact Pydantic model
|
||||
# definitions — intentional style for dense data models with many simple fields.
|
||||
# E402: mid-file module-level imports are intentional in dev-api.py for test patchability.
|
||||
"dev-api.py" = ["E702", "E402"]
|
||||
"dev_api.py" = ["E702", "E402"]
|
||||
|
||||
# finetune_local.py: E402 ML libs (torch, datasets, trl) are imported after
|
||||
# runtime CUDA / Unsloth availability checks — conditional import pattern.
|
||||
"scripts/finetune_local.py" = ["E402", "E741"]
|
||||
|
||||
# scripts/: E402 mid-file imports used for lazy loading or post-env-setup imports.
|
||||
"scripts/task_runner.py" = ["E402"]
|
||||
"scripts/migrate.py" = ["E741"]
|
||||
|
||||
# scrapers/: third-party script; minimal changes policy.
|
||||
"scrapers/companyScraper.py" = ["E722"]
|
||||
|
||||
# tools/: deprecated label tool copy (canonical in avocet); suppress style warnings.
|
||||
"tools/label_tool.py" = ["E741"]
|
||||
|
||||
# tests/: F841 unused variables are the standard mock-patch capture pattern
|
||||
# (e.g., `original_fn = obj.method` before monkeypatching).
|
||||
# E741 ambiguous `l` names and E402 conditional imports are common in test fixtures.
|
||||
# E702 compact `con.commit(); con.close()` is a common SQLite test helper idiom.
|
||||
"tests/**" = ["F841", "E741", "E402", "E702"]
|
||||
"tests/test_wizard_steps.py" = ["F841", "E741", "E402", "E702"]
|
||||
"scripts/test_email_classify.py" = ["E402", "F841"]
|
||||
|
|
@ -3,12 +3,10 @@
|
|||
# Keep in sync with environment.yml
|
||||
|
||||
# ── CircuitForge shared core ───────────────────────────────────────────────
|
||||
# Requires circuitforge-core >= 0.8.0 (config.load_env, db, tasks; resources moved to circuitforge-orch).
|
||||
# Local dev / Docker (parent-context build): path install works because
|
||||
# circuitforge-core/ is a sibling directory.
|
||||
# CI / fresh checkouts: falls back to the Forgejo VCS URL below.
|
||||
# To use local editable install run: pip install -e ../circuitforge-core
|
||||
# TODO: pin to @v0.7.0 tag once cf-core cuts a release tag.
|
||||
git+https://git.opensourcesolarpunk.com/Circuit-Forge/circuitforge-core.git@main
|
||||
|
||||
# ── Web UI ────────────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -1,89 +0,0 @@
|
|||
"""
|
||||
Peregrine cloud session — thin wrapper around cf_core.cloud_session.
|
||||
|
||||
Sets request-scoped ContextVars with the authenticated user_id, tier, and
|
||||
custom writing model so that _allocate_orch_async in llm.py can forward them
|
||||
to cf-orch without any service function signature changes.
|
||||
|
||||
Usage — add to main.py once:
|
||||
|
||||
from app.cloud_session import session_middleware_dep
|
||||
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
|
||||
|
||||
From that point, any route (and every service/llm function it calls)
|
||||
has access to the current user context via llm.get_request_*() helpers.
|
||||
|
||||
Writing model resolution order (first match wins):
|
||||
1. USER_WRITING_MODELS env var — JSON dict mapping Directus UUID → model name
|
||||
e.g. USER_WRITING_MODELS={"5b99ca9f-...": "meghan-letter-writer:latest"}
|
||||
Use this for Monday; no Heimdall changes required.
|
||||
2. session.meta["custom_writing_model"] — returned by Heimdall resolve endpoint
|
||||
once Heimdall is updated to expose user_preferences fields.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
from fastapi import Depends, Request, Response
|
||||
|
||||
from circuitforge_core.cloud_session import CloudSessionFactory, CloudUser, detect_byok
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ["CloudUser", "get_session", "require_tier", "session_middleware_dep"]
|
||||
|
||||
# JSON dict mapping Directus user UUID → custom writing model name.
|
||||
# Used until Heimdall's resolve endpoint exposes user_preferences.
|
||||
def _load_user_writing_models() -> dict[str, str]:
|
||||
raw = os.environ.get("USER_WRITING_MODELS", "").strip()
|
||||
if not raw:
|
||||
return {}
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
log.warning("USER_WRITING_MODELS is not valid JSON — ignoring")
|
||||
return {}
|
||||
|
||||
_USER_WRITING_MODELS: dict[str, str] = _load_user_writing_models()
|
||||
|
||||
|
||||
_factory = CloudSessionFactory(
|
||||
product="peregrine",
|
||||
byok_detector=detect_byok,
|
||||
)
|
||||
|
||||
get_session = _factory.dependency()
|
||||
require_tier = _factory.require_tier
|
||||
|
||||
|
||||
def session_middleware_dep(request: Request, response: Response) -> None:
|
||||
"""Global FastAPI dependency — resolves the session and sets request-scoped
|
||||
ContextVars so llm._allocate_orch_async can forward them to cf-orch.
|
||||
|
||||
Sets:
|
||||
- user_id: real cloud UUID, or None for local/anon sessions
|
||||
- tier: the resolved tier string (free/paid/premium/ultra/local)
|
||||
- writing_model: custom fine-tuned model from Heimdall meta, or None
|
||||
|
||||
Add as a global dependency in main.py:
|
||||
app = FastAPI(..., dependencies=[Depends(session_middleware_dep)])
|
||||
"""
|
||||
from app.llm import set_request_tier, set_request_user_id, set_request_writing_model
|
||||
|
||||
session = _factory.resolve(request, response)
|
||||
user_id = session.user_id
|
||||
|
||||
# Only forward real cloud UUIDs — local/dev/anon sessions use the shared catalog
|
||||
if user_id in (None, "local", "local-dev") or (user_id or "").startswith("anon-"):
|
||||
user_id = None
|
||||
|
||||
set_request_user_id(user_id)
|
||||
set_request_tier(session.tier)
|
||||
# Resolution order: env-var map (Monday path) → Heimdall meta (future path)
|
||||
writing_model = (
|
||||
_USER_WRITING_MODELS.get(session.user_id)
|
||||
or session.meta.get("custom_writing_model")
|
||||
)
|
||||
set_request_writing_model(writing_model)
|
||||
|
|
@ -1,843 +0,0 @@
|
|||
"""LiteLLM wrapper for multi-provider AI support."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from contextlib import asynccontextmanager
|
||||
from contextvars import ContextVar
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import litellm
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.config import settings
|
||||
|
||||
# LLM timeout configuration (seconds) - base values
|
||||
LLM_TIMEOUT_HEALTH_CHECK = 30
|
||||
LLM_TIMEOUT_COMPLETION = 120
|
||||
LLM_TIMEOUT_JSON = 180 # JSON completions may take longer
|
||||
|
||||
# LLM-004: OpenRouter JSON-capable models (explicit allowlist)
|
||||
OPENROUTER_JSON_CAPABLE_MODELS = {
|
||||
# Anthropic models
|
||||
"anthropic/claude-3-opus",
|
||||
"anthropic/claude-3-sonnet",
|
||||
"anthropic/claude-3-haiku",
|
||||
"anthropic/claude-3.5-sonnet",
|
||||
"anthropic/claude-3.5-haiku",
|
||||
"anthropic/claude-haiku-4-5-20251001",
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
"anthropic/claude-opus-4-20250514",
|
||||
# OpenAI models
|
||||
"openai/gpt-4-turbo",
|
||||
"openai/gpt-4",
|
||||
"openai/gpt-4o",
|
||||
"openai/gpt-4o-mini",
|
||||
"openai/gpt-3.5-turbo",
|
||||
"openai/gpt-5-nano-2025-08-07",
|
||||
# Google models
|
||||
"google/gemini-pro",
|
||||
"google/gemini-1.5-pro",
|
||||
"google/gemini-1.5-flash",
|
||||
"google/gemini-2.0-flash",
|
||||
"google/gemini-3-flash-preview",
|
||||
# DeepSeek models
|
||||
"deepseek/deepseek-chat",
|
||||
"deepseek/deepseek-reasoner",
|
||||
# Mistral models
|
||||
"mistralai/mistral-large",
|
||||
"mistralai/mistral-medium",
|
||||
}
|
||||
|
||||
# JSON-010: JSON extraction safety limits
|
||||
MAX_JSON_EXTRACTION_RECURSION = 10
|
||||
MAX_JSON_CONTENT_SIZE = 1024 * 1024 # 1MB
|
||||
|
||||
# Request-scoped user_id — set once by session_middleware_dep, read inside _allocate_orch_async.
|
||||
# ContextVar is safe for concurrent async requests: each request task gets its own copy.
|
||||
_request_user_id: ContextVar[str | None] = ContextVar("request_user_id", default=None)
|
||||
_request_tier: ContextVar[str | None] = ContextVar("request_tier", default=None)
|
||||
# Custom writing model for premium/ultra users — populated from Heimdall license key meta.
|
||||
# Set to None for all other tiers; complete() falls back to the shared base model.
|
||||
_request_writing_model: ContextVar[str | None] = ContextVar("request_writing_model", default=None)
|
||||
|
||||
_PREMIUM_TIERS: frozenset[str] = frozenset({"premium", "ultra"})
|
||||
|
||||
|
||||
def set_request_user_id(user_id: str | None) -> None:
|
||||
_request_user_id.set(user_id)
|
||||
|
||||
|
||||
def get_request_user_id() -> str | None:
|
||||
return _request_user_id.get()
|
||||
|
||||
|
||||
def set_request_tier(tier: str | None) -> None:
|
||||
_request_tier.set(tier)
|
||||
|
||||
|
||||
def get_request_tier() -> str | None:
|
||||
return _request_tier.get()
|
||||
|
||||
|
||||
def set_request_writing_model(model: str | None) -> None:
|
||||
_request_writing_model.set(model)
|
||||
|
||||
|
||||
def get_request_writing_model() -> str | None:
|
||||
return _request_writing_model.get()
|
||||
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
"""LLM configuration model."""
|
||||
|
||||
provider: str
|
||||
model: str
|
||||
api_key: str
|
||||
api_base: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class _OrchAllocation:
|
||||
allocation_id: str
|
||||
url: str
|
||||
service: str
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _allocate_orch_async(
|
||||
coordinator_url: str,
|
||||
service: str,
|
||||
model_candidates: list[str],
|
||||
ttl_s: float,
|
||||
caller: str,
|
||||
):
|
||||
"""Async context manager that allocates a cf-orch service and releases on exit."""
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
payload: dict[str, Any] = {
|
||||
"model_candidates": model_candidates,
|
||||
"ttl_s": ttl_s,
|
||||
"caller": caller,
|
||||
}
|
||||
uid = get_request_user_id()
|
||||
if uid:
|
||||
payload["user_id"] = uid
|
||||
resp = await client.post(
|
||||
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocate",
|
||||
json=payload,
|
||||
)
|
||||
if not resp.is_success:
|
||||
raise RuntimeError(
|
||||
f"cf-orch allocation failed for {service!r}: "
|
||||
f"HTTP {resp.status_code} — {resp.text[:200]}"
|
||||
)
|
||||
data = resp.json()
|
||||
alloc = _OrchAllocation(
|
||||
allocation_id=data["allocation_id"],
|
||||
url=data["url"],
|
||||
service=service,
|
||||
)
|
||||
try:
|
||||
yield alloc
|
||||
finally:
|
||||
try:
|
||||
await client.delete(
|
||||
f"{coordinator_url.rstrip('/')}/api/services/{service}/allocations/{alloc.allocation_id}",
|
||||
timeout=10.0,
|
||||
)
|
||||
except Exception as exc:
|
||||
logging.debug("cf-orch release failed (non-fatal): %s", exc)
|
||||
|
||||
|
||||
def _normalize_api_base(provider: str, api_base: str | None) -> str | None:
|
||||
"""Normalize api_base for LiteLLM provider-specific expectations.
|
||||
|
||||
When using proxies/aggregators, users often paste a base URL that already
|
||||
includes a version segment (e.g., `/v1`). Some LiteLLM provider handlers
|
||||
append those segments internally, which can lead to duplicated paths like
|
||||
`/v1/v1/...` and cause 404s.
|
||||
"""
|
||||
if not api_base:
|
||||
return None
|
||||
|
||||
base = api_base.strip()
|
||||
if not base:
|
||||
return None
|
||||
|
||||
base = base.rstrip("/")
|
||||
|
||||
# Anthropic handler appends '/v1/messages'. If base already ends with '/v1',
|
||||
# strip it to avoid '/v1/v1/messages'.
|
||||
if provider == "anthropic" and base.endswith("/v1"):
|
||||
base = base[: -len("/v1")].rstrip("/")
|
||||
|
||||
# Gemini handler appends '/v1/models/...'. If base already ends with '/v1',
|
||||
# strip it to avoid '/v1/v1/models/...'.
|
||||
if provider == "gemini" and base.endswith("/v1"):
|
||||
base = base[: -len("/v1")].rstrip("/")
|
||||
|
||||
return base or None
|
||||
|
||||
|
||||
def _extract_text_parts(value: Any, depth: int = 0, max_depth: int = 10) -> list[str]:
|
||||
"""Recursively extract text segments from nested response structures.
|
||||
|
||||
Handles strings, lists, dicts with 'text'/'content'/'value' keys, and objects
|
||||
with text/content attributes. Limits recursion depth to avoid cycles.
|
||||
|
||||
Args:
|
||||
value: Input value that may contain text in strings, lists, dicts, or objects.
|
||||
depth: Current recursion depth.
|
||||
max_depth: Maximum recursion depth before returning no content.
|
||||
|
||||
Returns:
|
||||
A list of extracted text segments.
|
||||
"""
|
||||
if depth >= max_depth:
|
||||
return []
|
||||
|
||||
if value is None:
|
||||
return []
|
||||
|
||||
if isinstance(value, str):
|
||||
return [value]
|
||||
|
||||
if isinstance(value, list):
|
||||
parts: list[str] = []
|
||||
next_depth = depth + 1
|
||||
for item in value:
|
||||
parts.extend(_extract_text_parts(item, next_depth, max_depth))
|
||||
return parts
|
||||
|
||||
if isinstance(value, dict):
|
||||
next_depth = depth + 1
|
||||
if "text" in value:
|
||||
return _extract_text_parts(value.get("text"), next_depth, max_depth)
|
||||
if "content" in value:
|
||||
return _extract_text_parts(value.get("content"), next_depth, max_depth)
|
||||
if "value" in value:
|
||||
return _extract_text_parts(value.get("value"), next_depth, max_depth)
|
||||
return []
|
||||
|
||||
next_depth = depth + 1
|
||||
if hasattr(value, "text"):
|
||||
return _extract_text_parts(getattr(value, "text"), next_depth, max_depth)
|
||||
if hasattr(value, "content"):
|
||||
return _extract_text_parts(getattr(value, "content"), next_depth, max_depth)
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def _join_text_parts(parts: list[str]) -> str | None:
|
||||
"""Join text parts with newlines, filtering empty strings.
|
||||
|
||||
Args:
|
||||
parts: Candidate text segments.
|
||||
|
||||
Returns:
|
||||
Joined string or None if the result is empty.
|
||||
"""
|
||||
joined = "\n".join(part for part in parts if part).strip()
|
||||
return joined or None
|
||||
|
||||
|
||||
def _extract_message_text(message: Any) -> str | None:
|
||||
"""Extract plain text from a LiteLLM message object across providers."""
|
||||
content: Any = None
|
||||
|
||||
if hasattr(message, "content"):
|
||||
content = message.content
|
||||
elif isinstance(message, dict):
|
||||
content = message.get("content")
|
||||
|
||||
return _join_text_parts(_extract_text_parts(content))
|
||||
|
||||
|
||||
def _extract_choice_text(choice: Any) -> str | None:
|
||||
"""Extract plain text from a LiteLLM choice object.
|
||||
|
||||
Tries message.content first, then choice.text, then choice.delta. Handles both
|
||||
object attributes and dict keys.
|
||||
|
||||
Args:
|
||||
choice: LiteLLM choice object or dict.
|
||||
|
||||
Returns:
|
||||
Extracted text or None if no content is found.
|
||||
"""
|
||||
message: Any = None
|
||||
if hasattr(choice, "message"):
|
||||
message = choice.message
|
||||
elif isinstance(choice, dict):
|
||||
message = choice.get("message")
|
||||
|
||||
content = _extract_message_text(message)
|
||||
if content:
|
||||
return content
|
||||
|
||||
if hasattr(choice, "text"):
|
||||
content = _join_text_parts(_extract_text_parts(getattr(choice, "text")))
|
||||
if content:
|
||||
return content
|
||||
if isinstance(choice, dict) and "text" in choice:
|
||||
content = _join_text_parts(_extract_text_parts(choice.get("text")))
|
||||
if content:
|
||||
return content
|
||||
|
||||
if hasattr(choice, "delta"):
|
||||
content = _join_text_parts(_extract_text_parts(getattr(choice, "delta")))
|
||||
if content:
|
||||
return content
|
||||
if isinstance(choice, dict) and "delta" in choice:
|
||||
content = _join_text_parts(_extract_text_parts(choice.get("delta")))
|
||||
if content:
|
||||
return content
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _to_code_block(content: str | None, language: str = "text") -> str:
|
||||
"""Wrap content in a markdown code block for client display."""
|
||||
text = (content or "").strip()
|
||||
if not text:
|
||||
text = "<empty>"
|
||||
return f"```{language}\n{text}\n```"
|
||||
|
||||
|
||||
def _load_stored_config() -> dict:
|
||||
"""Load config from config.json file."""
|
||||
config_path = settings.config_path
|
||||
if config_path.exists():
|
||||
try:
|
||||
return json.loads(config_path.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
return {}
|
||||
|
||||
|
||||
def get_llm_config() -> LLMConfig:
|
||||
"""Get current LLM configuration.
|
||||
|
||||
Priority: config.json file > environment variables/settings
|
||||
"""
|
||||
stored = _load_stored_config()
|
||||
|
||||
return LLMConfig(
|
||||
provider=stored.get("provider", settings.llm_provider),
|
||||
model=stored.get("model", settings.llm_model),
|
||||
api_key=stored.get("api_key", settings.llm_api_key),
|
||||
api_base=stored.get("api_base", settings.llm_api_base),
|
||||
)
|
||||
|
||||
|
||||
def get_model_name(config: LLMConfig) -> str:
|
||||
"""Convert provider/model to LiteLLM format.
|
||||
|
||||
For most providers, adds the provider prefix if not already present.
|
||||
For OpenRouter, always adds 'openrouter/' prefix since OpenRouter models
|
||||
use nested prefixes like 'openrouter/anthropic/claude-3.5-sonnet'.
|
||||
"""
|
||||
provider_prefixes = {
|
||||
"openai": "", # OpenAI models don't need prefix
|
||||
"anthropic": "anthropic/",
|
||||
"openrouter": "openrouter/",
|
||||
"gemini": "gemini/",
|
||||
"deepseek": "deepseek/",
|
||||
"ollama": "ollama/",
|
||||
}
|
||||
|
||||
prefix = provider_prefixes.get(config.provider, "")
|
||||
|
||||
# OpenRouter is special: always add openrouter/ prefix unless already present
|
||||
# OpenRouter models use nested format: openrouter/anthropic/claude-3.5-sonnet
|
||||
if config.provider == "openrouter":
|
||||
if config.model.startswith("openrouter/"):
|
||||
return config.model
|
||||
return f"openrouter/{config.model}"
|
||||
|
||||
# For other providers, don't add prefix if model already has a known prefix
|
||||
known_prefixes = ["openrouter/", "anthropic/", "gemini/", "deepseek/", "ollama/"]
|
||||
if any(config.model.startswith(p) for p in known_prefixes):
|
||||
return config.model
|
||||
|
||||
# Add provider prefix for models that need it
|
||||
return f"{prefix}{config.model}" if prefix else config.model
|
||||
|
||||
|
||||
def _supports_temperature(provider: str, model: str) -> bool:
|
||||
"""Return whether passing `temperature` is supported for this model/provider combo.
|
||||
|
||||
Some models (e.g., OpenAI gpt-5 family) reject temperature values other than 1,
|
||||
and LiteLLM may error when temperature is passed.
|
||||
"""
|
||||
_ = provider
|
||||
model_lower = model.lower()
|
||||
if "gpt-5" in model_lower:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _get_reasoning_effort(provider: str, model: str) -> str | None:
|
||||
"""Return a default reasoning_effort for models that require it.
|
||||
|
||||
Some OpenAI gpt-5 models may return empty message.content unless a supported
|
||||
`reasoning_effort` is explicitly set. This keeps downstream JSON parsing reliable.
|
||||
"""
|
||||
_ = provider
|
||||
model_lower = model.lower()
|
||||
if "gpt-5" in model_lower:
|
||||
return "minimal"
|
||||
return None
|
||||
|
||||
|
||||
async def check_llm_health(
|
||||
config: LLMConfig | None = None,
|
||||
*,
|
||||
include_details: bool = False,
|
||||
test_prompt: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Check if the LLM provider is accessible and working."""
|
||||
if config is None:
|
||||
config = get_llm_config()
|
||||
|
||||
# Check if API key is configured (except for Ollama)
|
||||
if config.provider != "ollama" and not config.api_key:
|
||||
return {
|
||||
"healthy": False,
|
||||
"provider": config.provider,
|
||||
"model": config.model,
|
||||
"error_code": "api_key_missing",
|
||||
}
|
||||
|
||||
model_name = get_model_name(config)
|
||||
|
||||
prompt = test_prompt or "Hi"
|
||||
|
||||
try:
|
||||
# Make a minimal test call with timeout
|
||||
# Pass API key directly to avoid race conditions with global os.environ
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model_name,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": 16,
|
||||
"api_key": config.api_key,
|
||||
"api_base": _normalize_api_base(config.provider, config.api_base),
|
||||
"timeout": LLM_TIMEOUT_HEALTH_CHECK,
|
||||
}
|
||||
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
|
||||
if reasoning_effort:
|
||||
kwargs["reasoning_effort"] = reasoning_effort
|
||||
|
||||
response = await litellm.acompletion(**kwargs)
|
||||
content = _extract_choice_text(response.choices[0])
|
||||
if not content:
|
||||
# LLM-003: Empty response should mark health check as unhealthy
|
||||
logging.warning(
|
||||
"LLM health check returned empty content",
|
||||
extra={"provider": config.provider, "model": config.model},
|
||||
)
|
||||
result: dict[str, Any] = {
|
||||
"healthy": False, # Fixed: empty content means unhealthy
|
||||
"provider": config.provider,
|
||||
"model": config.model,
|
||||
"response_model": response.model if response else None,
|
||||
"error_code": "empty_content", # Changed from warning_code
|
||||
"message": "LLM returned empty response",
|
||||
}
|
||||
if include_details:
|
||||
result["test_prompt"] = _to_code_block(prompt)
|
||||
result["model_output"] = _to_code_block(None)
|
||||
return result
|
||||
|
||||
result = {
|
||||
"healthy": True,
|
||||
"provider": config.provider,
|
||||
"model": config.model,
|
||||
"response_model": response.model if response else None,
|
||||
}
|
||||
if include_details:
|
||||
result["test_prompt"] = _to_code_block(prompt)
|
||||
result["model_output"] = _to_code_block(content)
|
||||
return result
|
||||
except Exception as e:
|
||||
# Log full exception details server-side, but do not expose them to clients
|
||||
logging.exception(
|
||||
"LLM health check failed",
|
||||
extra={"provider": config.provider, "model": config.model},
|
||||
)
|
||||
|
||||
# Provide a minimal, actionable client-facing hint without leaking secrets.
|
||||
error_code = "health_check_failed"
|
||||
message = str(e)
|
||||
if "404" in message and "/v1/v1/" in message:
|
||||
error_code = "duplicate_v1_path"
|
||||
elif "404" in message:
|
||||
error_code = "not_found_404"
|
||||
elif "<!doctype html" in message.lower() or "<html" in message.lower():
|
||||
error_code = "html_response"
|
||||
result = {
|
||||
"healthy": False,
|
||||
"provider": config.provider,
|
||||
"model": config.model,
|
||||
"error_code": error_code,
|
||||
}
|
||||
if include_details:
|
||||
result["test_prompt"] = _to_code_block(prompt)
|
||||
result["model_output"] = _to_code_block(None)
|
||||
result["error_detail"] = _to_code_block(message)
|
||||
return result
|
||||
|
||||
|
||||
async def complete(
|
||||
prompt: str,
|
||||
system_prompt: str | None = None,
|
||||
config: LLMConfig | None = None,
|
||||
max_tokens: int = 4096,
|
||||
temperature: float = 0.7,
|
||||
) -> str:
|
||||
"""Make a completion request to the LLM."""
|
||||
if config is None:
|
||||
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
|
||||
if cf_orch_url:
|
||||
try:
|
||||
# Premium/ultra users get their personal fine-tuned writing model as the
|
||||
# first candidate; the base model is the fallback so cf-orch can
|
||||
# degrade gracefully if the personal model isn't loaded yet.
|
||||
tier = get_request_tier()
|
||||
writing_model = get_request_writing_model()
|
||||
model_candidates: list[str] = (
|
||||
[writing_model, "Qwen2.5-3B-Instruct"]
|
||||
if writing_model and tier in _PREMIUM_TIERS
|
||||
else ["Qwen2.5-3B-Instruct"]
|
||||
)
|
||||
async with _allocate_orch_async(
|
||||
cf_orch_url,
|
||||
"vllm",
|
||||
model_candidates=model_candidates,
|
||||
ttl_s=300.0,
|
||||
caller="peregrine-resume-matcher",
|
||||
) as alloc:
|
||||
orch_config = LLMConfig(
|
||||
provider="openai",
|
||||
model="__auto__",
|
||||
api_key="any",
|
||||
api_base=alloc.url.rstrip("/") + "/v1",
|
||||
)
|
||||
return await complete(prompt, system_prompt, orch_config, max_tokens, temperature)
|
||||
except Exception as exc:
|
||||
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
|
||||
config = get_llm_config()
|
||||
|
||||
model_name = get_model_name(config)
|
||||
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
try:
|
||||
# Pass API key directly to avoid race conditions with global os.environ
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model_name,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"api_key": config.api_key,
|
||||
"api_base": _normalize_api_base(config.provider, config.api_base),
|
||||
"timeout": LLM_TIMEOUT_COMPLETION,
|
||||
}
|
||||
if _supports_temperature(config.provider, model_name):
|
||||
kwargs["temperature"] = temperature
|
||||
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
|
||||
if reasoning_effort:
|
||||
kwargs["reasoning_effort"] = reasoning_effort
|
||||
|
||||
response = await litellm.acompletion(**kwargs)
|
||||
|
||||
content = _extract_choice_text(response.choices[0])
|
||||
if not content:
|
||||
raise ValueError("Empty response from LLM")
|
||||
return content
|
||||
except Exception as e:
|
||||
# Log the actual error server-side for debugging
|
||||
logging.error(f"LLM completion failed: {e}", extra={"model": model_name})
|
||||
raise ValueError(
|
||||
"LLM completion failed. Please check your API configuration and try again."
|
||||
) from e
|
||||
|
||||
|
||||
def _supports_json_mode(provider: str, model: str) -> bool:
|
||||
"""Check if the model supports JSON mode."""
|
||||
# Models that support response_format={"type": "json_object"}
|
||||
json_mode_providers = ["openai", "anthropic", "gemini", "deepseek"]
|
||||
if provider in json_mode_providers:
|
||||
return True
|
||||
# LLM-004: OpenRouter models - use explicit allowlist instead of substring matching
|
||||
if provider == "openrouter":
|
||||
return model in OPENROUTER_JSON_CAPABLE_MODELS
|
||||
return False
|
||||
|
||||
|
||||
def _appears_truncated(data: dict) -> bool:
|
||||
"""LLM-001: Check if JSON data appears to be truncated.
|
||||
|
||||
Detects suspicious patterns indicating incomplete responses.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
|
||||
# Check for empty arrays that should typically have content
|
||||
suspicious_empty_arrays = ["workExperience", "education", "skills"]
|
||||
for key in suspicious_empty_arrays:
|
||||
if key in data and data[key] == []:
|
||||
# Log warning - these are rarely empty in real resumes
|
||||
logging.warning(
|
||||
"Possible truncation detected: '%s' is empty",
|
||||
key,
|
||||
)
|
||||
return True
|
||||
|
||||
# Check for missing critical sections
|
||||
required_top_level = ["personalInfo"]
|
||||
for key in required_top_level:
|
||||
if key not in data:
|
||||
logging.warning(
|
||||
"Possible truncation detected: missing required section '%s'",
|
||||
key,
|
||||
)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _get_retry_temperature(attempt: int, base_temp: float = 0.1) -> float:
|
||||
"""LLM-002: Get temperature for retry attempt - increases with each retry.
|
||||
|
||||
Higher temperature on retries gives the model more variation to produce
|
||||
different (hopefully valid) output.
|
||||
"""
|
||||
temperatures = [base_temp, 0.3, 0.5, 0.7]
|
||||
return temperatures[min(attempt, len(temperatures) - 1)]
|
||||
|
||||
|
||||
def _calculate_timeout(
|
||||
operation: str,
|
||||
max_tokens: int = 4096,
|
||||
provider: str = "openai",
|
||||
) -> int:
|
||||
"""LLM-005: Calculate adaptive timeout based on operation and parameters."""
|
||||
base_timeouts = {
|
||||
"health_check": LLM_TIMEOUT_HEALTH_CHECK,
|
||||
"completion": LLM_TIMEOUT_COMPLETION,
|
||||
"json": LLM_TIMEOUT_JSON,
|
||||
}
|
||||
|
||||
base = base_timeouts.get(operation, LLM_TIMEOUT_COMPLETION)
|
||||
|
||||
# Scale by token count (relative to 4096 baseline)
|
||||
token_factor = max(1.0, max_tokens / 4096)
|
||||
|
||||
# Provider-specific latency adjustments
|
||||
provider_factors = {
|
||||
"openai": 1.0,
|
||||
"anthropic": 1.2,
|
||||
"openrouter": 1.5, # More variable latency
|
||||
"ollama": 2.0, # Local models can be slower
|
||||
}
|
||||
provider_factor = provider_factors.get(provider, 1.0)
|
||||
|
||||
return int(base * token_factor * provider_factor)
|
||||
|
||||
|
||||
def _extract_json(content: str, _depth: int = 0) -> str:
|
||||
"""Extract JSON from LLM response, handling various formats.
|
||||
|
||||
LLM-001: Improved to detect and reject likely truncated JSON.
|
||||
LLM-007: Improved error messages for debugging.
|
||||
JSON-010: Added recursion depth and size limits.
|
||||
"""
|
||||
# JSON-010: Safety limits
|
||||
if _depth > MAX_JSON_EXTRACTION_RECURSION:
|
||||
raise ValueError(f"JSON extraction exceeded max recursion depth: {_depth}")
|
||||
if len(content) > MAX_JSON_CONTENT_SIZE:
|
||||
raise ValueError(f"Content too large for JSON extraction: {len(content)} bytes")
|
||||
|
||||
original = content
|
||||
|
||||
# Remove markdown code blocks
|
||||
if "```json" in content:
|
||||
content = content.split("```json")[1].split("```")[0]
|
||||
elif "```" in content:
|
||||
parts = content.split("```")
|
||||
if len(parts) >= 2:
|
||||
content = parts[1]
|
||||
# Remove language identifier if present (e.g., "json\n{...")
|
||||
if content.startswith(("json", "JSON")):
|
||||
content = content[4:]
|
||||
|
||||
content = content.strip()
|
||||
|
||||
# If content starts with {, find the matching }
|
||||
if content.startswith("{"):
|
||||
depth = 0
|
||||
end_idx = -1
|
||||
in_string = False
|
||||
escape_next = False
|
||||
|
||||
for i, char in enumerate(content):
|
||||
if escape_next:
|
||||
escape_next = False
|
||||
continue
|
||||
if char == "\\":
|
||||
escape_next = True
|
||||
continue
|
||||
if char == '"' and not escape_next:
|
||||
in_string = not in_string
|
||||
continue
|
||||
if in_string:
|
||||
continue
|
||||
if char == "{":
|
||||
depth += 1
|
||||
elif char == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
end_idx = i
|
||||
break
|
||||
|
||||
# LLM-001: Check for unbalanced braces - loop ended without depth reaching 0
|
||||
if end_idx == -1 and depth != 0:
|
||||
logging.warning(
|
||||
"JSON extraction found unbalanced braces (depth=%d), possible truncation",
|
||||
depth,
|
||||
)
|
||||
|
||||
if end_idx != -1:
|
||||
return content[: end_idx + 1]
|
||||
|
||||
# Try to find JSON object in the content (only if not already at start)
|
||||
start_idx = content.find("{")
|
||||
if start_idx > 0:
|
||||
# Only recurse if { is found after position 0 to avoid infinite recursion
|
||||
return _extract_json(content[start_idx:], _depth + 1)
|
||||
|
||||
# LLM-007: Log unrecognized format for debugging
|
||||
logging.error(
|
||||
"Could not extract JSON from response format. Content preview: %s",
|
||||
content[:200] if content else "<empty>",
|
||||
)
|
||||
raise ValueError(f"No JSON found in response: {original[:200]}")
|
||||
|
||||
|
||||
async def complete_json(
|
||||
prompt: str,
|
||||
system_prompt: str | None = None,
|
||||
config: LLMConfig | None = None,
|
||||
max_tokens: int = 4096,
|
||||
retries: int = 2,
|
||||
) -> dict[str, Any]:
|
||||
"""Make a completion request expecting JSON response.
|
||||
|
||||
Uses JSON mode when available, with retry logic for reliability.
|
||||
"""
|
||||
if config is None:
|
||||
cf_orch_url = os.environ.get("CF_ORCH_URL", "").strip()
|
||||
if cf_orch_url:
|
||||
try:
|
||||
async with _allocate_orch_async(
|
||||
cf_orch_url,
|
||||
"vllm",
|
||||
model_candidates=["Qwen2.5-3B-Instruct"],
|
||||
ttl_s=300.0,
|
||||
caller="peregrine-resume-matcher",
|
||||
) as alloc:
|
||||
orch_config = LLMConfig(
|
||||
provider="openai",
|
||||
model="__auto__",
|
||||
api_key="any",
|
||||
api_base=alloc.url.rstrip("/") + "/v1",
|
||||
)
|
||||
return await complete_json(prompt, system_prompt, orch_config, max_tokens, retries)
|
||||
except Exception as exc:
|
||||
logging.warning("cf-orch allocation failed, falling back to default config: %s", exc)
|
||||
config = get_llm_config()
|
||||
|
||||
model_name = get_model_name(config)
|
||||
|
||||
# Build messages
|
||||
json_system = (
|
||||
system_prompt or ""
|
||||
) + "\n\nYou must respond with valid JSON only. No explanations, no markdown."
|
||||
messages = [
|
||||
{"role": "system", "content": json_system},
|
||||
{"role": "user", "content": prompt},
|
||||
]
|
||||
|
||||
# Check if we can use JSON mode
|
||||
use_json_mode = _supports_json_mode(config.provider, config.model)
|
||||
|
||||
last_error = None
|
||||
for attempt in range(retries + 1):
|
||||
try:
|
||||
# Build request kwargs
|
||||
# Pass API key directly to avoid race conditions with global os.environ
|
||||
kwargs: dict[str, Any] = {
|
||||
"model": model_name,
|
||||
"messages": messages,
|
||||
"max_tokens": max_tokens,
|
||||
"api_key": config.api_key,
|
||||
"api_base": _normalize_api_base(config.provider, config.api_base),
|
||||
"timeout": _calculate_timeout("json", max_tokens, config.provider),
|
||||
}
|
||||
if _supports_temperature(config.provider, model_name):
|
||||
# LLM-002: Increase temperature on retry for variation
|
||||
kwargs["temperature"] = _get_retry_temperature(attempt)
|
||||
reasoning_effort = _get_reasoning_effort(config.provider, model_name)
|
||||
if reasoning_effort:
|
||||
kwargs["reasoning_effort"] = reasoning_effort
|
||||
|
||||
# Add JSON mode if supported
|
||||
if use_json_mode:
|
||||
kwargs["response_format"] = {"type": "json_object"}
|
||||
|
||||
response = await litellm.acompletion(**kwargs)
|
||||
content = _extract_choice_text(response.choices[0])
|
||||
|
||||
if not content:
|
||||
raise ValueError("Empty response from LLM")
|
||||
|
||||
logging.debug(f"LLM response (attempt {attempt + 1}): {content[:300]}")
|
||||
|
||||
# Extract and parse JSON
|
||||
json_str = _extract_json(content)
|
||||
result = json.loads(json_str)
|
||||
|
||||
# LLM-001: Check if parsed result appears truncated
|
||||
if isinstance(result, dict) and _appears_truncated(result):
|
||||
logging.warning(
|
||||
"Parsed JSON appears truncated, but proceeding with result"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
last_error = e
|
||||
logging.warning(f"JSON parse failed (attempt {attempt + 1}): {e}")
|
||||
if attempt < retries:
|
||||
# Add hint to prompt for retry
|
||||
messages[-1]["content"] = (
|
||||
prompt
|
||||
+ "\n\nIMPORTANT: Output ONLY a valid JSON object. Start with { and end with }."
|
||||
)
|
||||
continue
|
||||
raise ValueError(f"Failed to parse JSON after {retries + 1} attempts: {e}")
|
||||
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logging.warning(f"LLM call failed (attempt {attempt + 1}): {e}")
|
||||
if attempt < retries:
|
||||
continue
|
||||
raise
|
||||
|
||||
raise ValueError(f"Failed after {retries + 1} attempts: {last_error}")
|
||||
|
|
@ -1,88 +0,0 @@
|
|||
"""FastAPI application entry point."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import Depends, FastAPI
|
||||
|
||||
# Fix for Windows: Use ProactorEventLoop for subprocess support (Playwright)
|
||||
if sys.platform == "win32":
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app import __version__
|
||||
from app.cloud_session import session_middleware_dep
|
||||
from app.config import settings
|
||||
from app.database import db
|
||||
from app.pdf import close_pdf_renderer, init_pdf_renderer
|
||||
from app.routers import config_router, enrichment_router, health_router, jobs_router, resumes_router
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan manager."""
|
||||
# Startup
|
||||
settings.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
# PDF renderer uses lazy initialization - will initialize on first use
|
||||
# await init_pdf_renderer()
|
||||
yield
|
||||
# Shutdown - wrap each cleanup in try-except to ensure all resources are released
|
||||
try:
|
||||
await close_pdf_renderer()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing PDF renderer: {e}")
|
||||
|
||||
try:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error(f"Error closing database: {e}")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Resume Matcher API",
|
||||
description="AI-powered resume tailoring for job descriptions",
|
||||
version=__version__,
|
||||
lifespan=lifespan,
|
||||
dependencies=[Depends(session_middleware_dep)],
|
||||
)
|
||||
|
||||
# CORS middleware - origins configurable via CORS_ORIGINS env var
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.cors_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(health_router, prefix="/api/v1")
|
||||
app.include_router(config_router, prefix="/api/v1")
|
||||
app.include_router(resumes_router, prefix="/api/v1")
|
||||
app.include_router(jobs_router, prefix="/api/v1")
|
||||
app.include_router(enrichment_router, prefix="/api/v1")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""Root endpoint."""
|
||||
return {
|
||||
"name": "Resume Matcher API",
|
||||
"version": __version__,
|
||||
"docs": "/docs",
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(
|
||||
"app.main:app",
|
||||
host=settings.host,
|
||||
port=settings.port,
|
||||
reload=True,
|
||||
)
|
||||
|
|
@ -14,6 +14,7 @@ Enhanced features:
|
|||
|
||||
import argparse
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
|
||||
from scripts.classifier_adapters import (
|
||||
LABELS,
|
||||
LABEL_DESCRIPTIONS,
|
||||
ClassifierAdapter,
|
||||
GLiClassAdapter,
|
||||
RerankerAdapter,
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ push updates the existing event rather than creating a duplicate.
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
import yaml
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
|
|
|||
|
|
@ -277,8 +277,7 @@ def _load_resume_and_keywords() -> tuple[dict, list[str]]:
|
|||
return resume, keywords
|
||||
|
||||
|
||||
def research_company(job: dict, use_scraper: bool = True, on_stage=None,
|
||||
config_path: "Path | None" = None) -> dict:
|
||||
def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict:
|
||||
"""
|
||||
Generate a pre-interview research brief for a job.
|
||||
|
||||
|
|
@ -296,7 +295,7 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None,
|
|||
"""
|
||||
from scripts.llm_router import LLMRouter
|
||||
|
||||
router = LLMRouter(config_path=config_path) if config_path else LLMRouter()
|
||||
router = LLMRouter()
|
||||
research_order = router.config.get("research_fallback_order") or router.config["fallback_order"]
|
||||
company = job.get("company") or "the company"
|
||||
title = job.get("title") or "this role"
|
||||
|
|
|
|||
427
scripts/db.py
|
|
@ -130,32 +130,6 @@ CREATE TABLE IF NOT EXISTS digest_queue (
|
|||
)
|
||||
"""
|
||||
|
||||
CREATE_REFERENCES = """
|
||||
CREATE TABLE IF NOT EXISTS references_ (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
relationship TEXT,
|
||||
company TEXT,
|
||||
email TEXT,
|
||||
phone TEXT,
|
||||
notes TEXT,
|
||||
tags TEXT DEFAULT '[]',
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
updated_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
"""
|
||||
|
||||
CREATE_JOB_REFERENCES = """
|
||||
CREATE TABLE IF NOT EXISTS job_references (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER NOT NULL REFERENCES jobs(id) ON DELETE CASCADE,
|
||||
reference_id INTEGER NOT NULL REFERENCES references_(id) ON DELETE CASCADE,
|
||||
prep_email TEXT,
|
||||
rec_letter TEXT,
|
||||
UNIQUE(job_id, reference_id)
|
||||
);
|
||||
"""
|
||||
|
||||
_MIGRATIONS = [
|
||||
("cover_letter", "TEXT"),
|
||||
("applied_at", "TEXT"),
|
||||
|
|
@ -169,9 +143,6 @@ _MIGRATIONS = [
|
|||
("calendar_event_id", "TEXT"),
|
||||
("optimized_resume", "TEXT"), # ATS-rewritten resume text (paid tier)
|
||||
("ats_gap_report", "TEXT"), # JSON gap report (free tier)
|
||||
("date_posted", "TEXT"), # Original posting date from job board (shadow listing detection)
|
||||
("hired_feedback", "TEXT"), # JSON: optional post-hire "what helped" response
|
||||
("excluded_from_training", "INTEGER DEFAULT 0"), # opt-out of training export
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -205,9 +176,6 @@ def _migrate_db(db_path: Path) -> None:
|
|||
conn.execute("ALTER TABLE background_tasks ADD COLUMN params TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
pass # column already exists
|
||||
# Ensure references tables exist (CREATE IF NOT EXISTS is idempotent)
|
||||
conn.execute(CREATE_REFERENCES)
|
||||
conn.execute(CREATE_JOB_REFERENCES)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
|
@ -221,8 +189,6 @@ def init_db(db_path: Path = DEFAULT_DB) -> None:
|
|||
conn.execute(CREATE_BACKGROUND_TASKS)
|
||||
conn.execute(CREATE_SURVEY_RESPONSES)
|
||||
conn.execute(CREATE_DIGEST_QUEUE)
|
||||
conn.execute(CREATE_REFERENCES)
|
||||
conn.execute(CREATE_JOB_REFERENCES)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
_migrate_db(db_path)
|
||||
|
|
@ -234,11 +200,10 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
|
|||
return None
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
status = job.get("status", "pending")
|
||||
cursor = conn.execute(
|
||||
"""INSERT INTO jobs
|
||||
(title, company, url, source, location, is_remote, salary, description, date_found, date_posted, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(title, company, url, source, location, is_remote, salary, description, date_found)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
job.get("title", ""),
|
||||
job.get("company", ""),
|
||||
|
|
@ -249,8 +214,6 @@ def insert_job(db_path: Path = DEFAULT_DB, job: dict = None) -> Optional[int]:
|
|||
job.get("salary", ""),
|
||||
job.get("description", ""),
|
||||
job.get("date_found", ""),
|
||||
job.get("date_posted", "") or "",
|
||||
status,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
|
@ -382,96 +345,6 @@ def get_optimized_resume(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict
|
|||
}
|
||||
|
||||
|
||||
def save_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None,
|
||||
draft_json: str = "") -> None:
|
||||
"""Persist a structured resume review draft (awaiting user approval)."""
|
||||
if job_id is None:
|
||||
return
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.execute(
|
||||
"UPDATE jobs SET resume_draft_json = ? WHERE id = ?",
|
||||
(draft_json or None, job_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_resume_draft(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict | None:
|
||||
"""Return the pending review draft, or None if no draft is waiting."""
|
||||
if job_id is None:
|
||||
return None
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
row = conn.execute(
|
||||
"SELECT resume_draft_json FROM jobs WHERE id = ?", (job_id,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if not row or not row["resume_draft_json"]:
|
||||
return None
|
||||
import json
|
||||
try:
|
||||
return json.loads(row["resume_draft_json"])
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def finalize_resume(db_path: Path = DEFAULT_DB, job_id: int = None,
|
||||
final_text: str = "") -> None:
|
||||
"""Save approved resume text, archive the previous version, and clear draft."""
|
||||
if job_id is None:
|
||||
return
|
||||
import json
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
row = conn.execute(
|
||||
"SELECT optimized_resume, resume_archive_json FROM jobs WHERE id = ?", (job_id,)
|
||||
).fetchone()
|
||||
conn.row_factory = None
|
||||
|
||||
# Archive current finalized version if present
|
||||
archive: list = []
|
||||
if row:
|
||||
if row["resume_archive_json"]:
|
||||
try:
|
||||
archive = json.loads(row["resume_archive_json"])
|
||||
except Exception:
|
||||
archive = []
|
||||
if row["optimized_resume"]:
|
||||
from datetime import datetime
|
||||
archive.append({
|
||||
"archived_at": datetime.now().isoformat()[:16],
|
||||
"text": row["optimized_resume"],
|
||||
})
|
||||
|
||||
conn.execute(
|
||||
"UPDATE jobs SET optimized_resume = ?, resume_draft_json = NULL, "
|
||||
"resume_archive_json = ? WHERE id = ?",
|
||||
(final_text, json.dumps(archive), job_id),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_resume_archive(db_path: Path = DEFAULT_DB, job_id: int = None) -> list:
|
||||
"""Return list of past finalized resume versions (newest archived first)."""
|
||||
if job_id is None:
|
||||
return []
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
row = conn.execute(
|
||||
"SELECT resume_archive_json FROM jobs WHERE id = ?", (job_id,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if not row or not row["resume_archive_json"]:
|
||||
return []
|
||||
import json
|
||||
try:
|
||||
entries = json.loads(row["resume_archive_json"])
|
||||
return list(reversed(entries)) # newest first
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
_UPDATABLE_JOB_COLS = {
|
||||
"title", "company", "url", "source", "location", "is_remote",
|
||||
"salary", "description", "match_score", "keyword_gaps",
|
||||
|
|
@ -510,19 +383,6 @@ def mark_applied(db_path: Path = DEFAULT_DB, ids: list[int] = None) -> None:
|
|||
conn.close()
|
||||
|
||||
|
||||
def cancel_task(db_path: Path = DEFAULT_DB, task_id: int = 0) -> bool:
|
||||
"""Cancel a single queued/running task by id. Returns True if a row was updated."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
count = conn.execute(
|
||||
"UPDATE background_tasks SET status='failed', error='Cancelled by user',"
|
||||
" finished_at=datetime('now') WHERE id=? AND status IN ('queued','running')",
|
||||
(task_id,),
|
||||
).rowcount
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return count > 0
|
||||
|
||||
|
||||
def kill_stuck_tasks(db_path: Path = DEFAULT_DB) -> int:
|
||||
"""Mark all queued/running background tasks as failed. Returns count killed."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
|
|
@ -958,286 +818,3 @@ def get_task_for_job(db_path: Path = DEFAULT_DB, task_type: str = "",
|
|||
).fetchone()
|
||||
conn.close()
|
||||
return dict(row) if row else None
|
||||
|
||||
|
||||
# ── Resume library helpers ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _resume_as_dict(row) -> dict:
|
||||
"""Convert a sqlite3.Row from the resumes table to a plain dict."""
|
||||
return {
|
||||
"id": row["id"],
|
||||
"name": row["name"],
|
||||
"source": row["source"],
|
||||
"job_id": row["job_id"],
|
||||
"text": row["text"],
|
||||
"struct_json": row["struct_json"],
|
||||
"word_count": row["word_count"],
|
||||
"is_default": row["is_default"],
|
||||
"created_at": row["created_at"],
|
||||
"updated_at": row["updated_at"],
|
||||
"synced_at": row["synced_at"] if "synced_at" in row.keys() else None,
|
||||
}
|
||||
|
||||
|
||||
def create_resume(
|
||||
db_path: Path = DEFAULT_DB,
|
||||
name: str = "",
|
||||
text: str = "",
|
||||
source: str = "manual",
|
||||
job_id: int | None = None,
|
||||
struct_json: str | None = None,
|
||||
) -> dict:
|
||||
"""Insert a new resume into the library. Returns the created row as a dict."""
|
||||
word_count = len(text.split()) if text else 0
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
cur = conn.execute(
|
||||
"""INSERT INTO resumes (name, source, job_id, text, struct_json, word_count)
|
||||
VALUES (?, ?, ?, ?, ?, ?)""",
|
||||
(name, source, job_id, text, struct_json, word_count),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute("SELECT * FROM resumes WHERE id=?", (cur.lastrowid,)).fetchone()
|
||||
return _resume_as_dict(row)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def list_resumes(db_path: Path = DEFAULT_DB) -> list[dict]:
|
||||
"""Return all resumes ordered by default-first then newest-first."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM resumes ORDER BY is_default DESC, created_at DESC"
|
||||
).fetchall()
|
||||
return [_resume_as_dict(r) for r in rows]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> dict | None:
|
||||
"""Return a single resume by id, or None if not found."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
|
||||
return _resume_as_dict(row) if row else None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def update_resume(
|
||||
db_path: Path = DEFAULT_DB,
|
||||
resume_id: int = 0,
|
||||
name: str | None = None,
|
||||
text: str | None = None,
|
||||
) -> dict | None:
|
||||
"""Update name and/or text of a resume. Returns updated row or None."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
if name is not None:
|
||||
conn.execute(
|
||||
"UPDATE resumes SET name=?, updated_at=datetime('now') WHERE id=?",
|
||||
(name, resume_id),
|
||||
)
|
||||
if text is not None:
|
||||
word_count = len(text.split())
|
||||
conn.execute(
|
||||
"UPDATE resumes SET text=?, word_count=?, updated_at=datetime('now') WHERE id=?",
|
||||
(text, word_count, resume_id),
|
||||
)
|
||||
conn.commit()
|
||||
row = conn.execute("SELECT * FROM resumes WHERE id=?", (resume_id,)).fetchone()
|
||||
return _resume_as_dict(row) if row else None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def delete_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
|
||||
"""Delete a resume by id."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
conn.execute("DELETE FROM resumes WHERE id=?", (resume_id,))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def set_default_resume(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
|
||||
"""Set one resume as default, clearing the flag on all others."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
conn.execute("UPDATE resumes SET is_default=0")
|
||||
conn.execute("UPDATE resumes SET is_default=1 WHERE id=?", (resume_id,))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def update_resume_synced_at(db_path: Path = DEFAULT_DB, resume_id: int = 0) -> None:
|
||||
"""Mark a library entry as synced to the profile (library→profile direction)."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE resumes SET synced_at=datetime('now') WHERE id=?",
|
||||
(resume_id,),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def update_resume_content(
|
||||
db_path: Path = DEFAULT_DB,
|
||||
resume_id: int = 0,
|
||||
text: str = "",
|
||||
struct_json: str | None = None,
|
||||
) -> None:
|
||||
"""Update text, struct_json, and synced_at for a library entry.
|
||||
|
||||
Called by the profile→library sync path (PUT /api/settings/resume).
|
||||
"""
|
||||
word_count = len(text.split()) if text else 0
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
conn.execute(
|
||||
"""UPDATE resumes
|
||||
SET text=?, struct_json=?, word_count=?,
|
||||
synced_at=datetime('now'), updated_at=datetime('now')
|
||||
WHERE id=?""",
|
||||
(text, struct_json, word_count, resume_id),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0) -> dict | None:
|
||||
"""Return the resume for a job: job-specific first, then default, then None."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
row = conn.execute(
|
||||
"""SELECT r.* FROM resumes r
|
||||
JOIN jobs j ON j.resume_id = r.id
|
||||
WHERE j.id=?""",
|
||||
(job_id,),
|
||||
).fetchone()
|
||||
if row:
|
||||
return _resume_as_dict(row)
|
||||
row = conn.execute(
|
||||
"SELECT * FROM resumes WHERE is_default=1 LIMIT 1"
|
||||
).fetchone()
|
||||
return _resume_as_dict(row) if row else None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def set_job_resume(db_path: Path = DEFAULT_DB, job_id: int = 0, resume_id: int = 0) -> None:
|
||||
"""Attach a specific resume to a job (overrides default for that job)."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
conn.execute("UPDATE jobs SET resume_id=? WHERE id=?", (resume_id, job_id))
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ── Training export helpers ───────────────────────────────────────────────────
|
||||
|
||||
def _strip_greeting(text: str) -> str:
|
||||
"""Remove 'Dear X,' greeting line from cover letter text."""
|
||||
lines = text.splitlines()
|
||||
for i, line in enumerate(lines):
|
||||
stripped_line = line.strip()
|
||||
if stripped_line.lower().startswith("dear ") and stripped_line.endswith((",", ":")):
|
||||
rest = lines[i + 1:]
|
||||
while rest and not rest[0].strip():
|
||||
rest = rest[1:]
|
||||
result = "\n".join(rest).strip()
|
||||
return result if result else text.strip()
|
||||
return text.strip()
|
||||
|
||||
|
||||
def get_db_pairs(db_path: Path) -> list[dict]:
|
||||
"""Return curation metadata for ALL qualifying jobs (included and excluded).
|
||||
|
||||
Used by the curation UI. Includes excluded=True rows so users can restore them.
|
||||
"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, title, company, description, status, "
|
||||
" excluded_from_training "
|
||||
"FROM jobs "
|
||||
"WHERE status IN ('applied','phone_screen','interviewing','offer','hired') "
|
||||
" AND cover_letter IS NOT NULL AND cover_letter != '' "
|
||||
"ORDER BY applied_at DESC",
|
||||
).fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
return [
|
||||
{
|
||||
"job_id": row["id"],
|
||||
"title": row["title"] or "",
|
||||
"company": row["company"] or "",
|
||||
"status": row["status"],
|
||||
"instruction": (
|
||||
f"Write a cover letter for the {row['title'] or 'unknown'} "
|
||||
f"position at {row['company'] or 'unknown'}."
|
||||
),
|
||||
"input_preview": (row["description"] or "")[:200],
|
||||
"excluded": bool(row["excluded_from_training"]),
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
def get_training_pairs(db_path: Path) -> list[dict]:
|
||||
"""Return Alpaca-format training pairs for non-excluded qualifying jobs.
|
||||
|
||||
Used by the JSONL export endpoint.
|
||||
"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, title, company, description, cover_letter "
|
||||
"FROM jobs "
|
||||
"WHERE status IN ('applied','phone_screen','interviewing','offer','hired') "
|
||||
" AND cover_letter IS NOT NULL AND cover_letter != '' "
|
||||
" AND excluded_from_training = 0 "
|
||||
"ORDER BY applied_at DESC",
|
||||
).fetchall()
|
||||
finally:
|
||||
conn.close()
|
||||
return [
|
||||
{
|
||||
"instruction": (
|
||||
f"Write a cover letter for the {row['title'] or 'unknown'} "
|
||||
f"position at {row['company'] or 'unknown'}."
|
||||
),
|
||||
"input": row["description"] or "",
|
||||
"output": _strip_greeting(row["cover_letter"]),
|
||||
"source": "db",
|
||||
"job_id": row["id"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
|
||||
def set_training_exclusion(db_path: Path, job_id: int, excluded: bool) -> None:
|
||||
"""Set excluded_from_training flag on a job."""
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
conn.execute(
|
||||
"UPDATE jobs SET excluded_from_training = ? WHERE id = ?",
|
||||
(1 if excluded else 0, job_id),
|
||||
)
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
|
|
|
|||
|
|
@ -1,122 +0,0 @@
|
|||
"""
|
||||
db_migrate.py — Rails-style numbered SQL migration runner for Peregrine user DBs.
|
||||
|
||||
Migration files live in migrations/ (sibling to this script's parent directory),
|
||||
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
|
||||
order and tracked in the schema_migrations table so each runs exactly once.
|
||||
|
||||
Usage:
|
||||
from scripts.db_migrate import migrate_db
|
||||
migrate_db(Path("/path/to/user.db"))
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Resolved at import time: peregrine repo root / migrations/
|
||||
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
|
||||
|
||||
_CREATE_MIGRATIONS_TABLE = """
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
version TEXT PRIMARY KEY,
|
||||
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
def migrate_db(db_path: Path) -> list[str]:
|
||||
"""Apply any pending migrations to db_path. Returns list of applied versions."""
|
||||
applied: list[str] = []
|
||||
|
||||
con = sqlite3.connect(db_path)
|
||||
try:
|
||||
con.execute(_CREATE_MIGRATIONS_TABLE)
|
||||
con.commit()
|
||||
|
||||
if not _MIGRATIONS_DIR.is_dir():
|
||||
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
|
||||
return applied
|
||||
|
||||
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
|
||||
if not migration_files:
|
||||
return applied
|
||||
|
||||
already_applied = {
|
||||
row[0] for row in con.execute("SELECT version FROM schema_migrations")
|
||||
}
|
||||
|
||||
for path in migration_files:
|
||||
version = path.stem # e.g. "001_baseline"
|
||||
if version in already_applied:
|
||||
continue
|
||||
|
||||
sql = path.read_text(encoding="utf-8")
|
||||
log.info("Applying migration %s to %s", version, db_path.name)
|
||||
try:
|
||||
# Execute statements individually so that ALTER TABLE ADD COLUMN
|
||||
# errors caused by already-existing columns (pre-migration DBs
|
||||
# created from a newer schema) are treated as no-ops rather than
|
||||
# fatal failures.
|
||||
statements = [s.strip() for s in sql.split(";") if s.strip()]
|
||||
for stmt in statements:
|
||||
# Strip leading SQL comment lines (-- ...) before processing.
|
||||
# Checking startswith("--") on the raw chunk would skip entire
|
||||
# multi-line statements whose first line is a comment.
|
||||
stripped_lines = [
|
||||
ln for ln in stmt.splitlines()
|
||||
if not ln.strip().startswith("--")
|
||||
]
|
||||
stmt = "\n".join(stripped_lines).strip()
|
||||
if not stmt:
|
||||
continue
|
||||
# Pre-check: if this is ADD COLUMN and the column already exists, skip.
|
||||
# This guards against schema_migrations being ahead of the actual schema
|
||||
# (e.g. DB reset after migrations were recorded).
|
||||
stmt_upper = stmt.upper()
|
||||
if "ALTER TABLE" in stmt_upper and "ADD COLUMN" in stmt_upper:
|
||||
# Extract table name and column name from the statement
|
||||
import re as _re
|
||||
m = _re.match(
|
||||
r"ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)",
|
||||
stmt, _re.IGNORECASE
|
||||
)
|
||||
if m:
|
||||
tbl, col = m.group(1), m.group(2)
|
||||
existing = {
|
||||
row[1]
|
||||
for row in con.execute(f"PRAGMA table_info({tbl})")
|
||||
}
|
||||
if col in existing:
|
||||
log.info(
|
||||
"Migration %s: column %s.%s already exists, skipping",
|
||||
version, tbl, col,
|
||||
)
|
||||
continue
|
||||
try:
|
||||
con.execute(stmt)
|
||||
except sqlite3.OperationalError as stmt_exc:
|
||||
msg = str(stmt_exc).lower()
|
||||
if "duplicate column name" in msg or "already exists" in msg:
|
||||
log.info(
|
||||
"Migration %s: statement already applied, skipping: %s",
|
||||
version, stmt_exc,
|
||||
)
|
||||
else:
|
||||
raise
|
||||
con.execute(
|
||||
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
|
||||
)
|
||||
con.commit()
|
||||
applied.append(version)
|
||||
log.info("Migration %s applied successfully", version)
|
||||
except Exception as exc:
|
||||
con.rollback()
|
||||
log.error("Migration %s failed: %s", version, exc)
|
||||
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
return applied
|
||||
|
|
@ -34,38 +34,11 @@ CUSTOM_SCRAPERS: dict[str, object] = {
|
|||
}
|
||||
|
||||
|
||||
def _normalize_profiles(raw: dict) -> dict:
|
||||
"""Normalize search_profiles.yaml to the canonical {profiles: [...]} format.
|
||||
|
||||
The onboarding wizard (pre-fix) wrote a flat `default: {...}` structure.
|
||||
Canonical format is `profiles: [{name, titles/job_titles, boards, ...}]`.
|
||||
This converts on load so both formats work without a migration.
|
||||
"""
|
||||
if "profiles" in raw:
|
||||
return raw
|
||||
# Wizard-written format: top-level keys are profile names (usually "default")
|
||||
profiles = []
|
||||
for name, body in raw.items():
|
||||
if not isinstance(body, dict):
|
||||
continue
|
||||
# job_boards: [{name, enabled}] → boards: [name] (enabled only)
|
||||
job_boards = body.pop("job_boards", None)
|
||||
if job_boards and "boards" not in body:
|
||||
body["boards"] = [b["name"] for b in job_boards if b.get("enabled", True)]
|
||||
# blocklist_* keys live in load_blocklist, not per-profile — drop them
|
||||
body.pop("blocklist_companies", None)
|
||||
body.pop("blocklist_industries", None)
|
||||
body.pop("blocklist_locations", None)
|
||||
profiles.append({"name": name, **body})
|
||||
return {"profiles": profiles}
|
||||
|
||||
|
||||
def load_config(config_dir: Path | None = None) -> tuple[dict, dict]:
|
||||
cfg = config_dir or CONFIG_DIR
|
||||
profiles_path = cfg / "search_profiles.yaml"
|
||||
notion_path = cfg / "notion.yaml"
|
||||
raw = yaml.safe_load(profiles_path.read_text()) or {}
|
||||
profiles = _normalize_profiles(raw)
|
||||
profiles = yaml.safe_load(profiles_path.read_text())
|
||||
notion_cfg = yaml.safe_load(notion_path.read_text()) if notion_path.exists() else {"field_map": {}, "token": None, "database_id": None}
|
||||
return profiles, notion_cfg
|
||||
|
||||
|
|
@ -239,43 +212,14 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
|
|||
_rp = profile.get("remote_preference", "both")
|
||||
_is_remote: bool | None = True if _rp == "remote" else (False if _rp == "onsite" else None)
|
||||
|
||||
# When filtering for remote-only, also drop hybrid roles at the description level.
|
||||
# Job boards (especially LinkedIn) tag hybrid listings as is_remote=True, so the
|
||||
# board-side filter alone is not reliable. We match specific work-arrangement
|
||||
# phrases to avoid false positives like "hybrid cloud" or "hybrid architecture".
|
||||
_HYBRID_PHRASES = [
|
||||
"hybrid role", "hybrid position", "hybrid work", "hybrid schedule",
|
||||
"hybrid model", "hybrid arrangement", "hybrid opportunity",
|
||||
"in-office/remote", "in office/remote", "remote/in-office",
|
||||
"remote/office", "office/remote",
|
||||
"days in office", "days per week in", "days onsite", "days on-site",
|
||||
"required to be in office", "required in office",
|
||||
]
|
||||
if _rp == "remote":
|
||||
exclude_kw = exclude_kw + _HYBRID_PHRASES
|
||||
|
||||
for location in profile["locations"]:
|
||||
|
||||
# ── JobSpy boards ──────────────────────────────────────────────────
|
||||
if boards:
|
||||
# Validate boards against the installed JobSpy Site enum.
|
||||
# One unsupported name in the list aborts the entire scrape_jobs() call.
|
||||
try:
|
||||
from jobspy import Site as _Site
|
||||
_valid = {s.value for s in _Site}
|
||||
_filtered = [b for b in boards if b in _valid]
|
||||
_dropped = [b for b in boards if b not in _valid]
|
||||
if _dropped:
|
||||
print(f" [jobspy] Skipping unsupported boards: {', '.join(_dropped)}")
|
||||
except ImportError:
|
||||
_filtered = boards # fallback: pass through unchanged
|
||||
if not _filtered:
|
||||
print(f" [jobspy] No valid boards for {location} — skipping")
|
||||
continue
|
||||
print(f" [jobspy] {location} — boards: {', '.join(_filtered)}")
|
||||
print(f" [jobspy] {location} — boards: {', '.join(boards)}")
|
||||
try:
|
||||
jobspy_kwargs: dict = dict(
|
||||
site_name=_filtered,
|
||||
site_name=boards,
|
||||
search_term=" OR ".join(f'"{t}"' for t in (profile.get("titles") or profile.get("job_titles", []))),
|
||||
location=location,
|
||||
results_wanted=results_per_board,
|
||||
|
|
@ -307,10 +251,6 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
|
|||
elif job_dict.get("salary_source") and str(job_dict["salary_source"]) not in ("nan", "None", ""):
|
||||
salary_str = str(job_dict["salary_source"])
|
||||
|
||||
_dp = job_dict.get("date_posted")
|
||||
date_posted_str = (
|
||||
_dp.isoformat() if hasattr(_dp, "isoformat") else str(_dp)
|
||||
) if _dp and str(_dp) not in ("nan", "None", "") else ""
|
||||
row = {
|
||||
"url": url,
|
||||
"title": _s(job_dict.get("title")),
|
||||
|
|
@ -320,7 +260,6 @@ def run_discovery(db_path: Path = DEFAULT_DB, notion_push: bool = False, config_
|
|||
"is_remote": bool(job_dict.get("is_remote", False)),
|
||||
"salary": salary_str,
|
||||
"description": _s(job_dict.get("description")),
|
||||
"date_posted": date_posted_str,
|
||||
"_exclude_kw": exclude_kw,
|
||||
}
|
||||
if _insert_if_new(row, _s(job_dict.get("site"))):
|
||||
|
|
|
|||
|
|
@ -323,6 +323,6 @@ if gguf_path and gguf_path.exists():
|
|||
else:
|
||||
print(f"\n{'='*60}")
|
||||
print(" Adapter saved (no GGUF produced).")
|
||||
print(" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
|
||||
print(f" Re-run without --no-gguf to generate a GGUF for Ollama registration.")
|
||||
print(f" Adapter path: {adapter_path}")
|
||||
print(f"{'='*60}\n")
|
||||
|
|
|
|||
|
|
@ -16,8 +16,6 @@ import re
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from scripts.user_profile import UserProfile
|
||||
|
|
@ -42,57 +40,107 @@ def _build_system_context(profile=None) -> str:
|
|||
return " ".join(parts)
|
||||
|
||||
SYSTEM_CONTEXT = _build_system_context()
|
||||
_candidate = _profile.name if _profile else "the candidate"
|
||||
|
||||
|
||||
# ── Mission-alignment detection ───────────────────────────────────────────────
|
||||
# Domains and their keyword signals are loaded from config/mission_domains.yaml.
|
||||
# When a company/JD signals one of these preferred industries, the cover letter
|
||||
# prompt injects a hint so Para 3 can reflect genuine personal connection.
|
||||
# This does NOT disclose any personal disability or family information.
|
||||
|
||||
_MISSION_DOMAINS_PATH = Path(__file__).parent.parent / "config" / "mission_domains.yaml"
|
||||
|
||||
|
||||
def load_mission_domains(path: Path | None = None) -> dict[str, dict]:
|
||||
"""Load mission domain config from YAML. Returns dict keyed by domain name."""
|
||||
p = path or _MISSION_DOMAINS_PATH
|
||||
if not p.exists():
|
||||
return {}
|
||||
with p.open(encoding="utf-8") as fh:
|
||||
data = yaml.safe_load(fh)
|
||||
return data.get("domains", {}) if data else {}
|
||||
|
||||
|
||||
_MISSION_DOMAINS: dict[str, dict] = load_mission_domains()
|
||||
_MISSION_SIGNALS: dict[str, list[str]] = {
|
||||
domain: cfg.get("signals", []) for domain, cfg in _MISSION_DOMAINS.items()
|
||||
"music": [
|
||||
"music", "spotify", "tidal", "soundcloud", "bandcamp", "apple music",
|
||||
"distrokid", "cd baby", "landr", "beatport", "reverb", "vinyl",
|
||||
"streaming", "artist", "label", "live nation", "ticketmaster", "aeg",
|
||||
"songkick", "concert", "venue", "festival", "audio", "podcast",
|
||||
"studio", "record", "musician", "playlist",
|
||||
],
|
||||
"animal_welfare": [
|
||||
"animal", "shelter", "rescue", "humane society", "spca", "aspca",
|
||||
"veterinary", "vet ", "wildlife", "pet ", "adoption", "foster",
|
||||
"dog", "cat", "feline", "canine", "sanctuary", "zoo",
|
||||
],
|
||||
"education": [
|
||||
"education", "school", "learning", "student", "edtech", "classroom",
|
||||
"curriculum", "tutoring", "academic", "university", "kids", "children",
|
||||
"youth", "literacy", "khan academy", "duolingo", "chegg", "coursera",
|
||||
"instructure", "canvas lms", "clever", "district", "teacher",
|
||||
"k-12", "k12", "grade", "pedagogy",
|
||||
],
|
||||
"social_impact": [
|
||||
"nonprofit", "non-profit", "501(c)", "social impact", "mission-driven",
|
||||
"public benefit", "community", "underserved", "equity", "justice",
|
||||
"humanitarian", "advocacy", "charity", "foundation", "ngo",
|
||||
"social good", "civic", "public health", "mental health", "food security",
|
||||
"housing", "homelessness", "poverty", "workforce development",
|
||||
],
|
||||
# Health is listed last — it's a genuine but lower-priority connection than
|
||||
# music/animals/education/social_impact. detect_mission_alignment returns on first
|
||||
# match, so dict order = preference order.
|
||||
"health": [
|
||||
"patient", "patients", "healthcare", "health tech", "healthtech",
|
||||
"pharma", "pharmaceutical", "clinical", "medical",
|
||||
"hospital", "clinic", "therapy", "therapist",
|
||||
"rare disease", "life sciences", "life science",
|
||||
"treatment", "prescription", "biotech", "biopharma", "medtech",
|
||||
"behavioral health", "population health",
|
||||
"care management", "care coordination", "oncology", "specialty pharmacy",
|
||||
"provider network", "payer", "health plan", "benefits administration",
|
||||
"ehr", "emr", "fhir", "hipaa",
|
||||
],
|
||||
}
|
||||
|
||||
_candidate = _profile.name if _profile else "the candidate"
|
||||
|
||||
_MISSION_DEFAULTS: dict[str, str] = {
|
||||
"music": (
|
||||
f"This company is in the music industry — an industry {_candidate} finds genuinely "
|
||||
"compelling. Para 3 should warmly and specifically reflect this authentic alignment, "
|
||||
"not as a generic fan statement, but as an honest statement of where they'd love to "
|
||||
"apply their skills."
|
||||
),
|
||||
"animal_welfare": (
|
||||
f"This organization works in animal welfare/rescue — a mission {_candidate} finds "
|
||||
"genuinely meaningful. Para 3 should reflect this authentic connection warmly and "
|
||||
"specifically, tying their skills to this mission."
|
||||
),
|
||||
"education": (
|
||||
f"This company works in education or EdTech — a domain that resonates with "
|
||||
f"{_candidate}'s values. Para 3 should reflect this authentic connection specifically "
|
||||
"and warmly."
|
||||
),
|
||||
"social_impact": (
|
||||
f"This organization is mission-driven / social impact focused — exactly the kind of "
|
||||
f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine "
|
||||
"desire to apply their skills to work that makes a real difference in people's lives."
|
||||
),
|
||||
"health": (
|
||||
f"This company works in healthcare, life sciences, or patient care. "
|
||||
f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an "
|
||||
"industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies "
|
||||
"exist to serve — those navigating complex, often invisible, or unusual health journeys; "
|
||||
"patients facing rare or poorly understood conditions; individuals whose situations don't "
|
||||
"fit a clean category. The connection is to the humans behind the data, not the industry. "
|
||||
"If the user has provided a personal note, use that to anchor Para 3 specifically."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _build_mission_notes(profile=None, candidate_name: str | None = None) -> dict[str, str]:
|
||||
"""Merge user's custom mission notes with YAML defaults.
|
||||
|
||||
For domains defined in mission_domains.yaml the default_note is used when
|
||||
the user has not provided a custom note in user.yaml mission_preferences.
|
||||
|
||||
For user-defined domains (keys in mission_preferences that are NOT in the
|
||||
YAML config), the custom note is used as-is; no signal detection applies.
|
||||
"""
|
||||
"""Merge user's custom mission notes with generic defaults."""
|
||||
p = profile or _profile
|
||||
name = candidate_name or (p.name if p else "the candidate")
|
||||
name = candidate_name or _candidate
|
||||
prefs = p.mission_preferences if p else {}
|
||||
notes: dict[str, str] = {}
|
||||
|
||||
for domain, cfg in _MISSION_DOMAINS.items():
|
||||
default_note = (cfg.get("default_note") or "").strip()
|
||||
custom = (prefs.get(domain) or "").strip()
|
||||
notes = {}
|
||||
for industry, default_note in _MISSION_DEFAULTS.items():
|
||||
custom = (prefs.get(industry) or "").strip()
|
||||
if custom:
|
||||
notes[domain] = (
|
||||
notes[industry] = (
|
||||
f"Mission alignment — {name} shared: \"{custom}\". "
|
||||
"Para 3 should warmly and specifically reflect this authentic connection."
|
||||
)
|
||||
else:
|
||||
notes[domain] = default_note
|
||||
|
||||
notes[industry] = default_note
|
||||
return notes
|
||||
|
||||
|
||||
|
|
@ -102,15 +150,12 @@ _MISSION_NOTES = _build_mission_notes()
|
|||
def detect_mission_alignment(
|
||||
company: str, description: str, mission_notes: dict | None = None
|
||||
) -> str | None:
|
||||
"""Return a mission hint string if company/JD matches a configured domain, else None.
|
||||
|
||||
Checks domains in YAML file order (dict order = match priority).
|
||||
"""
|
||||
"""Return a mission hint string if company/JD matches a preferred industry, else None."""
|
||||
notes = mission_notes if mission_notes is not None else _MISSION_NOTES
|
||||
text = f"{company} {description}".lower()
|
||||
for domain, signals in _MISSION_SIGNALS.items():
|
||||
for industry, signals in _MISSION_SIGNALS.items():
|
||||
if any(sig in text for sig in signals):
|
||||
return notes.get(domain)
|
||||
return notes[industry]
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -186,7 +231,7 @@ def build_prompt(
|
|||
)
|
||||
parts.append(f"{recruiter_note}\n")
|
||||
|
||||
parts.append("Now write a new cover letter for:")
|
||||
parts.append(f"Now write a new cover letter for:")
|
||||
parts.append(f" Role: {title}")
|
||||
parts.append(f" Company: {company}")
|
||||
if description:
|
||||
|
|
|
|||
|
|
@ -1,254 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate demo/seed.sql — committed seed INSERT statements for the demo DB.
|
||||
|
||||
Run whenever seed data needs to change:
|
||||
conda run -n cf python scripts/generate_demo_seed.py
|
||||
|
||||
Outputs pure INSERT SQL (no DDL). Schema migrations are handled by db_migrate.py
|
||||
at container startup. The seed SQL is loaded after migrations complete.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
OUT_PATH = Path(__file__).parent.parent / "demo" / "seed.sql"
|
||||
|
||||
TODAY = date.today()
|
||||
|
||||
|
||||
def _dago(n: int) -> str:
|
||||
return (TODAY - timedelta(days=n)).isoformat()
|
||||
|
||||
|
||||
def _dfrom(n: int) -> str:
|
||||
return (TODAY + timedelta(days=n)).isoformat()
|
||||
|
||||
|
||||
COVER_LETTER_SPOTIFY = """\
|
||||
Dear Hiring Manager,
|
||||
|
||||
I'm excited to apply for the UX Designer role at Spotify. With five years of
|
||||
experience designing for music discovery and cross-platform experiences, I've
|
||||
consistently shipped features that make complex audio content feel effortless to
|
||||
navigate. At my last role I led a redesign of the playlist creation flow that
|
||||
reduced drop-off by 31%.
|
||||
|
||||
Spotify's commitment to artist and listener discovery — and its recent push into
|
||||
audiobooks and podcast tooling — aligns directly with the kind of cross-format
|
||||
design challenges I'm most energised by.
|
||||
|
||||
I'd love to bring that focus to your product design team.
|
||||
|
||||
Warm regards,
|
||||
[Your name]
|
||||
"""
|
||||
|
||||
SQL_PARTS: list[str] = []
|
||||
|
||||
# ── Jobs ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Columns: title, company, url, source, location, is_remote, salary,
|
||||
# match_score, status, date_found, date_posted, cover_letter,
|
||||
# applied_at, phone_screen_at, interviewing_at, offer_at, hired_at,
|
||||
# interview_date, rejection_stage, hired_feedback
|
||||
JOBS: list[tuple] = [
|
||||
# ---- Review queue (12 jobs — mix of pending + approved) ------------------
|
||||
("UX Designer",
|
||||
"Spotify", "https://www.linkedin.com/jobs/view/1000001",
|
||||
"linkedin", "Remote", 1, "$110k–$140k",
|
||||
94.0, "approved", _dago(1), _dago(3), COVER_LETTER_SPOTIFY,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("Product Designer",
|
||||
"Duolingo", "https://www.linkedin.com/jobs/view/1000002",
|
||||
"linkedin", "Pittsburgh, PA", 0, "$95k–$120k",
|
||||
87.0, "approved", _dago(2), _dago(5), "Draft in progress — cover letter generating…",
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("UX Lead",
|
||||
"NPR", "https://www.indeed.com/viewjob?jk=1000003",
|
||||
"indeed", "Washington, DC", 1, "$120k–$150k",
|
||||
81.0, "approved", _dago(3), _dago(7), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
# Ghost post — date_posted 34 days ago → shadow indicator
|
||||
("Senior UX Designer",
|
||||
"Mozilla", "https://www.linkedin.com/jobs/view/1000004",
|
||||
"linkedin", "Remote", 1, "$105k–$130k",
|
||||
81.0, "pending", _dago(2), _dago(34), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("Interaction Designer",
|
||||
"Figma", "https://www.indeed.com/viewjob?jk=1000005",
|
||||
"indeed", "San Francisco, CA", 1, "$115k–$145k",
|
||||
78.0, "pending", _dago(4), _dago(6), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("Product Designer II",
|
||||
"Notion", "https://www.linkedin.com/jobs/view/1000006",
|
||||
"linkedin", "Remote", 1, "$100k–$130k",
|
||||
76.0, "pending", _dago(5), _dago(8), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("UX Designer",
|
||||
"Stripe", "https://www.linkedin.com/jobs/view/1000007",
|
||||
"linkedin", "Remote", 1, "$120k–$150k",
|
||||
74.0, "pending", _dago(6), _dago(9), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("UI/UX Designer",
|
||||
"Canva", "https://www.indeed.com/viewjob?jk=1000008",
|
||||
"indeed", "Remote", 1, "$90k–$115k",
|
||||
72.0, "pending", _dago(7), _dago(10), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("Senior Product Designer",
|
||||
"Asana", "https://www.linkedin.com/jobs/view/1000009",
|
||||
"linkedin", "San Francisco, CA", 1, "$125k–$155k",
|
||||
69.0, "pending", _dago(8), _dago(11), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("UX Researcher",
|
||||
"Intercom", "https://www.indeed.com/viewjob?jk=1000010",
|
||||
"indeed", "Remote", 1, "$95k–$120k",
|
||||
67.0, "pending", _dago(9), _dago(12), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("Product Designer",
|
||||
"Linear", "https://www.linkedin.com/jobs/view/1000011",
|
||||
"linkedin", "Remote", 1, "$110k–$135k",
|
||||
65.0, "pending", _dago(10), _dago(13), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
("UX Designer",
|
||||
"Loom", "https://www.indeed.com/viewjob?jk=1000012",
|
||||
"indeed", "Remote", 1, "$90k–$110k",
|
||||
62.0, "pending", _dago(11), _dago(14), None,
|
||||
None, None, None, None, None, None, None, None),
|
||||
|
||||
# ---- Pipeline jobs (applied → hired) ------------------------------------
|
||||
("Senior Product Designer",
|
||||
"Asana", "https://www.asana.com/jobs/1000013",
|
||||
"linkedin", "San Francisco, CA", 1, "$125k–$155k",
|
||||
91.0, "phone_screen", _dago(14), _dago(16), None,
|
||||
_dago(7), _dfrom(0), None, None, None,
|
||||
f"{_dfrom(0)}T14:00:00", None, None),
|
||||
|
||||
("Product Designer",
|
||||
"Notion", "https://www.notion.so/jobs/1000014",
|
||||
"indeed", "Remote", 1, "$100k–$130k",
|
||||
88.0, "interviewing", _dago(21), _dago(23), None,
|
||||
_dago(14), _dago(10), _dago(3), None, None,
|
||||
f"{_dfrom(7)}T10:00:00", None, None),
|
||||
|
||||
("Design Systems Designer",
|
||||
"Figma", "https://www.figma.com/jobs/1000015",
|
||||
"linkedin", "San Francisco, CA", 1, "$130k–$160k",
|
||||
96.0, "hired", _dago(45), _dago(47), None,
|
||||
_dago(38), _dago(32), _dago(25), _dago(14), _dago(7),
|
||||
None, None,
|
||||
'{"factors":["clear_scope","great_manager","mission_aligned"],"notes":"Excited about design systems work. Salary met expectations."}'),
|
||||
|
||||
("UX Designer",
|
||||
"Slack", "https://slack.com/jobs/1000016",
|
||||
"indeed", "Remote", 1, "$115k–$140k",
|
||||
79.0, "applied", _dago(28), _dago(30), None,
|
||||
_dago(18), None, None, None, None, None, None, None),
|
||||
]
|
||||
|
||||
|
||||
def _q(v: object) -> str:
|
||||
"""SQL-quote a Python value."""
|
||||
if v is None:
|
||||
return "NULL"
|
||||
return "'" + str(v).replace("'", "''") + "'"
|
||||
|
||||
|
||||
_JOB_COLS = (
|
||||
"title, company, url, source, location, is_remote, salary, "
|
||||
"match_score, status, date_found, date_posted, cover_letter, "
|
||||
"applied_at, phone_screen_at, interviewing_at, offer_at, hired_at, "
|
||||
"interview_date, rejection_stage, hired_feedback"
|
||||
)
|
||||
|
||||
SQL_PARTS.append("-- jobs")
|
||||
for job in JOBS:
|
||||
vals = ", ".join(_q(v) for v in job)
|
||||
SQL_PARTS.append(f"INSERT INTO jobs ({_JOB_COLS}) VALUES ({vals});")
|
||||
|
||||
# ── Contacts ──────────────────────────────────────────────────────────────────
|
||||
|
||||
# (job_id, direction, subject, from_addr, to_addr, received_at, stage_signal)
|
||||
CONTACTS: list[tuple] = [
|
||||
(1, "inbound", "Excited to connect — UX Designer role at Spotify",
|
||||
"jamie.chen@spotify.com", "you@example.com", _dago(3), "positive_response"),
|
||||
(1, "outbound", "Re: Excited to connect — UX Designer role at Spotify",
|
||||
"you@example.com", "jamie.chen@spotify.com", _dago(2), None),
|
||||
(13, "inbound", "Interview Confirmation — Senior Product Designer",
|
||||
"recruiting@asana.com", "you@example.com", _dago(2), "interview_scheduled"),
|
||||
(14, "inbound", "Your panel interview is confirmed for Apr 22",
|
||||
"recruiting@notion.so", "you@example.com", _dago(3), "interview_scheduled"),
|
||||
(14, "inbound", "Pre-interview prep resources",
|
||||
"marcus.webb@notion.so", "you@example.com", _dago(2), "positive_response"),
|
||||
(15, "inbound", "Figma Design Systems — Offer Letter",
|
||||
"offers@figma.com", "you@example.com", _dago(14), "offer_received"),
|
||||
(15, "outbound", "Re: Figma Design Systems — Offer Letter (acceptance)",
|
||||
"you@example.com", "offers@figma.com", _dago(10), None),
|
||||
(15, "inbound", "Welcome to Figma! Onboarding next steps",
|
||||
"onboarding@figma.com", "you@example.com", _dago(7), None),
|
||||
(16, "inbound", "Thanks for applying to Slack",
|
||||
"noreply@slack.com", "you@example.com", _dago(18), None),
|
||||
]
|
||||
|
||||
SQL_PARTS.append("\n-- job_contacts")
|
||||
for c in CONTACTS:
|
||||
job_id, direction, subject, from_addr, to_addr, received_at, stage_signal = c
|
||||
SQL_PARTS.append(
|
||||
f"INSERT INTO job_contacts "
|
||||
f"(job_id, direction, subject, from_addr, to_addr, received_at, stage_signal) "
|
||||
f"VALUES ({job_id}, {_q(direction)}, {_q(subject)}, {_q(from_addr)}, "
|
||||
f"{_q(to_addr)}, {_q(received_at)}, {_q(stage_signal)});"
|
||||
)
|
||||
|
||||
# ── References ────────────────────────────────────────────────────────────────
|
||||
|
||||
# (name, email, role, company, relationship, notes, tags, prep_email)
|
||||
REFERENCES: list[tuple] = [
|
||||
("Dr. Priya Nair", "priya.nair@example.com", "Director of Design", "Acme Corp",
|
||||
"former_manager",
|
||||
"Managed me for 3 years on the consumer app redesign. Enthusiastic reference.",
|
||||
'["manager","design"]',
|
||||
"Hi Priya,\n\nI hope you're doing well! I'm currently interviewing for a few senior UX roles "
|
||||
"and would be so grateful if you'd be willing to serve as a reference.\n\nThank you!\n[Your name]"),
|
||||
|
||||
("Sam Torres", "sam.torres@example.com", "Senior Product Designer", "Acme Corp",
|
||||
"former_colleague",
|
||||
"Worked together on design systems. Great at speaking to collaborative process.",
|
||||
'["colleague","design_systems"]', None),
|
||||
|
||||
("Jordan Kim", "jordan.kim@example.com", "VP of Product", "Streamline Inc",
|
||||
"former_manager",
|
||||
"Led the product team I was embedded in. Can speak to business impact of design work.",
|
||||
'["manager","product"]', None),
|
||||
]
|
||||
|
||||
SQL_PARTS.append("\n-- references_")
|
||||
for ref in REFERENCES:
|
||||
name, email, role, company, relationship, notes, tags, prep_email = ref
|
||||
SQL_PARTS.append(
|
||||
f"INSERT INTO references_ "
|
||||
f"(name, email, role, company, relationship, notes, tags, prep_email) "
|
||||
f"VALUES ({_q(name)}, {_q(email)}, {_q(role)}, {_q(company)}, "
|
||||
f"{_q(relationship)}, {_q(notes)}, {_q(tags)}, {_q(prep_email)});"
|
||||
)
|
||||
|
||||
# ── Write output ──────────────────────────────────────────────────────────────
|
||||
|
||||
output = "\n".join(SQL_PARTS) + "\n"
|
||||
OUT_PATH.write_text(output, encoding="utf-8")
|
||||
print(
|
||||
f"Wrote {OUT_PATH} "
|
||||
f"({len(JOBS)} jobs, {len(CONTACTS)} contacts, {len(REFERENCES)} references)"
|
||||
)
|
||||
|
|
@ -392,7 +392,6 @@ def _has_todo_keyword(subject: str) -> bool:
|
|||
|
||||
|
||||
_LINKEDIN_ALERT_SENDER = "jobalerts-noreply@linkedin.com"
|
||||
_INDEED_ALERT_SENDER = "jobalerts@indeed.com"
|
||||
|
||||
# Social-proof / nav lines to skip when parsing alert blocks
|
||||
_ALERT_SKIP_PHRASES = {
|
||||
|
|
@ -448,75 +447,6 @@ def parse_linkedin_alert(body: str) -> list[dict]:
|
|||
return jobs
|
||||
|
||||
|
||||
def parse_indeed_alert(body: str) -> list[dict]:
|
||||
"""
|
||||
Parse the HTML body of an Indeed Job Alert email.
|
||||
|
||||
Returns a list of dicts: {title, company, location, salary, url}.
|
||||
URL is canonicalised to https://www.indeed.com/viewjob?jk=<id>
|
||||
(tracking parameters stripped).
|
||||
"""
|
||||
try:
|
||||
from bs4 import BeautifulSoup as _BS
|
||||
except ImportError:
|
||||
return []
|
||||
|
||||
jobs: list[dict] = []
|
||||
soup = _BS(body, "html.parser")
|
||||
|
||||
# Each job card is an <a> wrapping a job title — Indeed uses several layouts
|
||||
# across their email templates. We try two strategies:
|
||||
#
|
||||
# Strategy A (2023+ layout): <td> blocks containing an <a> with /viewjob?jk=
|
||||
# Strategy B (older layout): <tr class="job"> blocks
|
||||
#
|
||||
# Both extract the canonical jk= key from the href.
|
||||
|
||||
seen_jks: set[str] = set()
|
||||
|
||||
for anchor in soup.find_all("a", href=True):
|
||||
href: str = anchor["href"]
|
||||
jk_m = re.search(r"[?&]jk=([a-z0-9]+)", href, re.IGNORECASE)
|
||||
if not jk_m:
|
||||
continue
|
||||
jk = jk_m.group(1)
|
||||
if jk in seen_jks:
|
||||
continue
|
||||
seen_jks.add(jk)
|
||||
|
||||
title = anchor.get_text(separator=" ", strip=True)
|
||||
if not title or len(title) < 3:
|
||||
continue
|
||||
|
||||
# Walk up to find the container cell/row and extract company + location
|
||||
container = anchor.find_parent(["td", "tr", "div"])
|
||||
company = location = salary = ""
|
||||
if container:
|
||||
text_lines = [
|
||||
t.strip() for t in container.get_text(separator="\n").splitlines()
|
||||
if t.strip() and t.strip().lower() != title.lower()
|
||||
]
|
||||
if text_lines:
|
||||
company = text_lines[0]
|
||||
if len(text_lines) > 1:
|
||||
location = text_lines[1]
|
||||
# salary line often contains "$" or "/yr"
|
||||
for line in text_lines[2:]:
|
||||
if "$" in line or "/yr" in line.lower() or "/hour" in line.lower():
|
||||
salary = line
|
||||
break
|
||||
|
||||
jobs.append({
|
||||
"title": title,
|
||||
"company": company,
|
||||
"location": location,
|
||||
"salary": salary,
|
||||
"url": f"https://www.indeed.com/viewjob?jk={jk}",
|
||||
})
|
||||
|
||||
return jobs
|
||||
|
||||
|
||||
def _scan_todo_label(conn: imaplib.IMAP4, cfg: dict, db_path: Path,
|
||||
active_jobs: list[dict],
|
||||
known_message_ids: set) -> int:
|
||||
|
|
@ -628,29 +558,20 @@ def _scan_unmatched_leads(conn: imaplib.IMAP4, cfg: dict,
|
|||
if mid in known_message_ids:
|
||||
continue
|
||||
|
||||
# ── Job alert digests — parse each card deterministically ───────
|
||||
from_lower = parsed["from_addr"].lower()
|
||||
alert_cards: list[dict] = []
|
||||
alert_source = ""
|
||||
if _LINKEDIN_ALERT_SENDER in from_lower:
|
||||
alert_cards = parse_linkedin_alert(parsed["body"])
|
||||
alert_source = "linkedin"
|
||||
elif _INDEED_ALERT_SENDER in from_lower:
|
||||
alert_cards = parse_indeed_alert(parsed["body"])
|
||||
alert_source = "indeed"
|
||||
|
||||
if alert_cards:
|
||||
for card in alert_cards:
|
||||
# ── LinkedIn Job Alert digest — parse each card individually ──────
|
||||
if _LINKEDIN_ALERT_SENDER in parsed["from_addr"].lower():
|
||||
cards = parse_linkedin_alert(parsed["body"])
|
||||
for card in cards:
|
||||
if card["url"] in existing_urls:
|
||||
continue
|
||||
job_id = insert_job(db_path, {
|
||||
"title": card["title"],
|
||||
"company": card["company"],
|
||||
"url": card["url"],
|
||||
"source": alert_source,
|
||||
"location": card.get("location", ""),
|
||||
"source": "linkedin",
|
||||
"location": card["location"],
|
||||
"is_remote": 0,
|
||||
"salary": card.get("salary", ""),
|
||||
"salary": "",
|
||||
"description": "",
|
||||
"date_found": datetime.now().isoformat()[:10],
|
||||
})
|
||||
|
|
@ -659,7 +580,7 @@ def _scan_unmatched_leads(conn: imaplib.IMAP4, cfg: dict,
|
|||
submit_task(db_path, "scrape_url", job_id)
|
||||
existing_urls.add(card["url"])
|
||||
new_leads += 1
|
||||
print(f"[imap] {alert_source} alert → {card['company']} — {card['title']}")
|
||||
print(f"[imap] LinkedIn alert → {card['company']} — {card['title']}")
|
||||
known_message_ids.add(mid)
|
||||
continue # skip normal LLM extraction path
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from __future__ import annotations
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from scripts.integrations.base import IntegrationBase
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -12,53 +12,10 @@ Usage:
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# Max jobs passed to the reranker (avoids excessive inference time on large stacks)
|
||||
_RERANK_POOL = 50
|
||||
|
||||
|
||||
def _try_rerank(resume_text: str, jobs: list[dict]) -> list[dict]:
|
||||
"""Rerank jobs by cross-encoder relevance to resume text.
|
||||
|
||||
Returns jobs sorted best-first by the reranker. Falls back silently to the
|
||||
input order if the reranker package is unavailable or inference fails.
|
||||
"""
|
||||
if not jobs:
|
||||
return jobs
|
||||
try:
|
||||
from circuitforge_core.reranker import rerank
|
||||
except ImportError:
|
||||
return jobs
|
||||
try:
|
||||
descriptions = [j.get("description") or j.get("title", "") for j in jobs]
|
||||
results = rerank(resume_text, descriptions, top_n=len(jobs))
|
||||
# Map ranked candidates back to job dicts, handling duplicate descriptions
|
||||
idx_queue: dict[str, list[int]] = {}
|
||||
for i, d in enumerate(descriptions):
|
||||
idx_queue.setdefault(d, []).append(i)
|
||||
reranked: list[dict] = []
|
||||
used: set[int] = set()
|
||||
for r in results:
|
||||
for idx in idx_queue.get(r.candidate, []):
|
||||
if idx not in used:
|
||||
reranked.append(jobs[idx])
|
||||
used.add(idx)
|
||||
break
|
||||
# Safety: append anything the reranker didn't return
|
||||
for i, j in enumerate(jobs):
|
||||
if i not in used:
|
||||
reranked.append(j)
|
||||
return reranked
|
||||
except Exception:
|
||||
_log.warning("Reranker pass failed; using stack_score order.", exc_info=True)
|
||||
return jobs
|
||||
|
||||
|
||||
# ── TUNING ─────────────────────────────────────────────────────────────────────
|
||||
# Adjust these constants to change how jobs are ranked.
|
||||
|
|
@ -332,7 +289,6 @@ def rank_jobs(
|
|||
user_level: int = 3,
|
||||
limit: int = 10,
|
||||
min_score: float = 20.0,
|
||||
resume_text: str = "",
|
||||
) -> list[dict]:
|
||||
"""Score and rank pending jobs; return top-N above min_score.
|
||||
|
||||
|
|
@ -343,10 +299,6 @@ def rank_jobs(
|
|||
user_level: Seniority level 1–7 (use seniority_from_experience()).
|
||||
limit: Stack size; pass 0 to return all qualifying jobs.
|
||||
min_score: Minimum stack_score to include (0–100).
|
||||
resume_text: Plain-text resume for cross-encoder reranking pass.
|
||||
When provided, the top-_RERANK_POOL candidates are
|
||||
reranked by (resume, description) relevance before
|
||||
the limit is applied. Graceful no-op when empty.
|
||||
|
||||
Returns:
|
||||
Sorted list (best first) with 'stack_score' key added to each dict.
|
||||
|
|
@ -358,10 +310,4 @@ def rank_jobs(
|
|||
scored.append({**job, "stack_score": s})
|
||||
|
||||
scored.sort(key=lambda j: j["stack_score"], reverse=True)
|
||||
|
||||
if resume_text and scored:
|
||||
pool = scored[:_RERANK_POOL]
|
||||
pool = _try_rerank(resume_text, pool)
|
||||
scored = pool + scored[_RERANK_POOL:]
|
||||
|
||||
return scored[:limit] if limit > 0 else scored
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
# BSL 1.1 — see LICENSE-BSL
|
||||
"""LLM-assisted reply draft generation for inbound job contacts (BSL 1.1)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
_SYSTEM = (
|
||||
"You are drafting a professional email reply on behalf of a job seeker. "
|
||||
"Be concise and professional. Do not fabricate facts. If you are uncertain "
|
||||
"about a detail, leave a [TODO: fill in] placeholder. "
|
||||
"Output the reply body only — no subject line, no salutation preamble."
|
||||
)
|
||||
|
||||
|
||||
def _build_prompt(subject: str, from_addr: str, body: str, user_name: str, target_role: str) -> str:
|
||||
return (
|
||||
f"ORIGINAL EMAIL:\n"
|
||||
f"Subject: {subject}\n"
|
||||
f"From: {from_addr}\n"
|
||||
f"Body:\n{body}\n\n"
|
||||
f"USER PROFILE CONTEXT:\n"
|
||||
f"Name: {user_name}\n"
|
||||
f"Target role: {target_role}\n\n"
|
||||
"Write a concise, professional reply to this email."
|
||||
)
|
||||
|
||||
|
||||
def generate_draft_reply(
|
||||
subject: str,
|
||||
from_addr: str,
|
||||
body: str,
|
||||
user_name: str,
|
||||
target_role: str,
|
||||
config_path: Optional[Path] = None,
|
||||
) -> str:
|
||||
"""Return a draft reply body string."""
|
||||
from scripts.llm_router import LLMRouter
|
||||
|
||||
router = LLMRouter(config_path=config_path)
|
||||
prompt = _build_prompt(subject, from_addr, body, user_name, target_role)
|
||||
return router.complete(system=_SYSTEM, user=prompt).strip()
|
||||
|
|
@ -1,46 +1,19 @@
|
|||
"""
|
||||
LLM abstraction layer with priority fallback chain.
|
||||
Config lookup order:
|
||||
1. <repo>/config/llm.yaml — per-install local config
|
||||
2. ~/.config/circuitforge/llm.yaml — user-level config (circuitforge-core default)
|
||||
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST, …)
|
||||
Reads config/llm.yaml. Tries backends in order; falls back on any error.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
from circuitforge_core.llm import LLMRouter as _CoreLLMRouter
|
||||
|
||||
# Kept for backwards-compatibility — external callers that import CONFIG_PATH
|
||||
# from this module continue to work.
|
||||
CONFIG_PATH = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||
|
||||
|
||||
class LLMRouter(_CoreLLMRouter):
|
||||
"""Peregrine-specific LLMRouter — tri-level config path priority.
|
||||
"""Peregrine-specific LLMRouter — defaults to Peregrine's config/llm.yaml."""
|
||||
|
||||
When ``config_path`` is supplied (e.g. in tests) it is passed straight
|
||||
through to the core. When omitted, the lookup order is:
|
||||
1. <repo>/config/llm.yaml (per-install local config)
|
||||
2. ~/.config/circuitforge/llm.yaml (user-level, circuitforge-core default)
|
||||
3. env-var auto-config (ANTHROPIC_API_KEY, OPENAI_API_KEY, OLLAMA_HOST …)
|
||||
"""
|
||||
|
||||
def __init__(self, config_path: Path | None = None) -> None:
|
||||
if config_path is not None:
|
||||
# Explicit path supplied — use it directly (e.g. tests, CLI override).
|
||||
def __init__(self, config_path: Path = CONFIG_PATH):
|
||||
super().__init__(config_path)
|
||||
return
|
||||
|
||||
local = Path(__file__).parent.parent / "config" / "llm.yaml"
|
||||
user_level = Path.home() / ".config" / "circuitforge" / "llm.yaml"
|
||||
if local.exists():
|
||||
super().__init__(local)
|
||||
elif user_level.exists():
|
||||
super().__init__(user_level)
|
||||
else:
|
||||
# No yaml found — let circuitforge-core's env-var auto-config run.
|
||||
# The core default CONFIG_PATH (~/.config/circuitforge/llm.yaml)
|
||||
# won't exist either, so _auto_config_from_env() will be triggered.
|
||||
super().__init__()
|
||||
|
||||
|
||||
# Module-level singleton for convenience
|
||||
|
|
|
|||
|
|
@ -1,285 +0,0 @@
|
|||
"""
|
||||
DB helpers for the messaging feature.
|
||||
|
||||
Messages table: manual log entries and LLM drafts (one row per message).
|
||||
Message templates table: built-in seeds and user-created templates.
|
||||
|
||||
Conventions (match scripts/db.py):
|
||||
- All functions take db_path: Path as first argument.
|
||||
- sqlite3.connect(db_path), row_factory = sqlite3.Row
|
||||
- Return plain dicts (dict(row))
|
||||
- Always close connection in finally
|
||||
"""
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _connect(db_path: Path) -> sqlite3.Connection:
|
||||
con = sqlite3.connect(db_path)
|
||||
con.row_factory = sqlite3.Row
|
||||
return con
|
||||
|
||||
|
||||
def _now_utc() -> str:
|
||||
"""Return current UTC time as ISO 8601 string."""
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Messages
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def create_message(
|
||||
db_path: Path,
|
||||
*,
|
||||
job_id: Optional[int],
|
||||
job_contact_id: Optional[int],
|
||||
type: str,
|
||||
direction: str,
|
||||
subject: Optional[str],
|
||||
body: Optional[str],
|
||||
from_addr: Optional[str],
|
||||
to_addr: Optional[str],
|
||||
template_id: Optional[int],
|
||||
logged_at: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Insert a new message row and return it as a dict."""
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
cur = con.execute(
|
||||
"""
|
||||
INSERT INTO messages
|
||||
(job_id, job_contact_id, type, direction, subject, body,
|
||||
from_addr, to_addr, logged_at, template_id)
|
||||
VALUES
|
||||
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(job_id, job_contact_id, type, direction, subject, body,
|
||||
from_addr, to_addr, logged_at or _now_utc(), template_id),
|
||||
)
|
||||
con.commit()
|
||||
row = con.execute(
|
||||
"SELECT * FROM messages WHERE id = ?", (cur.lastrowid,)
|
||||
).fetchone()
|
||||
return dict(row)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def list_messages(
|
||||
db_path: Path,
|
||||
*,
|
||||
job_id: Optional[int] = None,
|
||||
type: Optional[str] = None,
|
||||
direction: Optional[str] = None,
|
||||
limit: int = 100,
|
||||
) -> list[dict]:
|
||||
"""Return messages, optionally filtered. Ordered by logged_at DESC."""
|
||||
conditions: list[str] = []
|
||||
params: list = []
|
||||
|
||||
if job_id is not None:
|
||||
conditions.append("job_id = ?")
|
||||
params.append(job_id)
|
||||
if type is not None:
|
||||
conditions.append("type = ?")
|
||||
params.append(type)
|
||||
if direction is not None:
|
||||
conditions.append("direction = ?")
|
||||
params.append(direction)
|
||||
|
||||
where = ("WHERE " + " AND ".join(conditions)) if conditions else ""
|
||||
params.append(limit)
|
||||
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
rows = con.execute(
|
||||
f"SELECT * FROM messages {where} ORDER BY logged_at DESC LIMIT ?",
|
||||
params,
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def delete_message(db_path: Path, message_id: int) -> None:
|
||||
"""Delete a message by id. Raises KeyError if not found."""
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT id FROM messages WHERE id = ?", (message_id,)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise KeyError(f"Message {message_id} not found")
|
||||
con.execute("DELETE FROM messages WHERE id = ?", (message_id,))
|
||||
con.commit()
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def approve_message(db_path: Path, message_id: int) -> dict:
|
||||
"""Set approved_at to now for the given message. Raises KeyError if not found."""
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT id FROM messages WHERE id = ?", (message_id,)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise KeyError(f"Message {message_id} not found")
|
||||
con.execute(
|
||||
"UPDATE messages SET approved_at = ? WHERE id = ?",
|
||||
(_now_utc(), message_id),
|
||||
)
|
||||
con.commit()
|
||||
updated = con.execute(
|
||||
"SELECT * FROM messages WHERE id = ?", (message_id,)
|
||||
).fetchone()
|
||||
return dict(updated)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Templates
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def list_templates(db_path: Path) -> list[dict]:
|
||||
"""Return all templates ordered by is_builtin DESC, then title ASC."""
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
rows = con.execute(
|
||||
"SELECT * FROM message_templates ORDER BY is_builtin DESC, title ASC"
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def create_template(
|
||||
db_path: Path,
|
||||
*,
|
||||
title: str,
|
||||
category: str = "custom",
|
||||
subject_template: Optional[str] = None,
|
||||
body_template: str,
|
||||
) -> dict:
|
||||
"""Insert a new user-defined template and return it as a dict."""
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
cur = con.execute(
|
||||
"""
|
||||
INSERT INTO message_templates
|
||||
(title, category, subject_template, body_template, is_builtin)
|
||||
VALUES
|
||||
(?, ?, ?, ?, 0)
|
||||
""",
|
||||
(title, category, subject_template, body_template),
|
||||
)
|
||||
con.commit()
|
||||
row = con.execute(
|
||||
"SELECT * FROM message_templates WHERE id = ?", (cur.lastrowid,)
|
||||
).fetchone()
|
||||
return dict(row)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def update_template(db_path: Path, template_id: int, **fields) -> dict:
|
||||
"""
|
||||
Update allowed fields on a user-defined template.
|
||||
|
||||
Raises PermissionError if the template is a built-in (is_builtin=1).
|
||||
Raises KeyError if the template is not found.
|
||||
"""
|
||||
if not fields:
|
||||
# Nothing to update — just return current state
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT * FROM message_templates WHERE id = ?", (template_id,)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise KeyError(f"Template {template_id} not found")
|
||||
return dict(row)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
_ALLOWED_FIELDS = {
|
||||
"title", "category", "subject_template", "body_template",
|
||||
}
|
||||
invalid = set(fields) - _ALLOWED_FIELDS
|
||||
if invalid:
|
||||
raise ValueError(f"Cannot update field(s): {invalid}")
|
||||
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT id, is_builtin FROM message_templates WHERE id = ?",
|
||||
(template_id,),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise KeyError(f"Template {template_id} not found")
|
||||
if row["is_builtin"]:
|
||||
raise PermissionError(
|
||||
f"Template {template_id} is a built-in and cannot be modified"
|
||||
)
|
||||
|
||||
set_clause = ", ".join(f"{col} = ?" for col in fields)
|
||||
values = list(fields.values()) + [_now_utc(), template_id]
|
||||
con.execute(
|
||||
f"UPDATE message_templates SET {set_clause}, updated_at = ? WHERE id = ?",
|
||||
values,
|
||||
)
|
||||
con.commit()
|
||||
updated = con.execute(
|
||||
"SELECT * FROM message_templates WHERE id = ?", (template_id,)
|
||||
).fetchone()
|
||||
return dict(updated)
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def delete_template(db_path: Path, template_id: int) -> None:
|
||||
"""
|
||||
Delete a user-defined template.
|
||||
|
||||
Raises PermissionError if the template is a built-in (is_builtin=1).
|
||||
Raises KeyError if the template is not found.
|
||||
"""
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
row = con.execute(
|
||||
"SELECT id, is_builtin FROM message_templates WHERE id = ?",
|
||||
(template_id,),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise KeyError(f"Template {template_id} not found")
|
||||
if row["is_builtin"]:
|
||||
raise PermissionError(
|
||||
f"Template {template_id} is a built-in and cannot be deleted"
|
||||
)
|
||||
con.execute("DELETE FROM message_templates WHERE id = ?", (template_id,))
|
||||
con.commit()
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def update_message_body(db_path: Path, message_id: int, body: str) -> dict:
|
||||
"""Update the body text of a draft message before approval. Returns updated row."""
|
||||
con = _connect(db_path)
|
||||
try:
|
||||
row = con.execute("SELECT id FROM messages WHERE id=?", (message_id,)).fetchone()
|
||||
if not row:
|
||||
raise KeyError(f"message {message_id} not found")
|
||||
con.execute("UPDATE messages SET body=? WHERE id=?", (body, message_id))
|
||||
con.commit()
|
||||
updated = con.execute("SELECT * FROM messages WHERE id=?", (message_id,)).fetchone()
|
||||
return dict(updated)
|
||||
finally:
|
||||
con.close()
|
||||
|
|
@ -25,6 +25,7 @@ import argparse
|
|||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
|
||||
import yaml
|
||||
|
||||
|
|
|
|||
|
|
@ -348,14 +348,14 @@ def write_compose_override(ports: dict[str, dict]) -> None:
|
|||
for name, info in to_disable.items():
|
||||
lines += [
|
||||
f" {name}: # adopted — host service on :{info['resolved']}",
|
||||
" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]",
|
||||
" ports: []",
|
||||
" healthcheck:",
|
||||
" test: [\"CMD\", \"true\"]",
|
||||
" interval: 1s",
|
||||
" timeout: 1s",
|
||||
" start_period: 0s",
|
||||
" retries: 1",
|
||||
f" entrypoint: [\"/bin/sh\", \"-c\", \"sleep infinity\"]",
|
||||
f" ports: []",
|
||||
f" healthcheck:",
|
||||
f" test: [\"CMD\", \"true\"]",
|
||||
f" interval: 1s",
|
||||
f" timeout: 1s",
|
||||
f" start_period: 0s",
|
||||
f" retries: 1",
|
||||
]
|
||||
|
||||
OVERRIDE_YML.write_text("\n".join(lines) + "\n")
|
||||
|
|
@ -492,12 +492,6 @@ def main() -> None:
|
|||
# binds a harmless free port instead of conflicting with the external service.
|
||||
env_updates: dict[str, str] = {i["env_var"]: str(i["stub_port"]) for i in ports.values()}
|
||||
env_updates["RECOMMENDED_PROFILE"] = profile
|
||||
# When Ollama is adopted from the host process, write OLLAMA_HOST so
|
||||
# LLMRouter's env-var auto-config finds it without needing config/llm.yaml.
|
||||
ollama_info = ports.get("ollama")
|
||||
if ollama_info and ollama_info.get("external"):
|
||||
env_updates["OLLAMA_HOST"] = f"http://host.docker.internal:{ollama_info['resolved']}"
|
||||
|
||||
if offload_gb > 0:
|
||||
env_updates["CPU_OFFLOAD_GB"] = str(offload_gb)
|
||||
# GPU info for the app container (which lacks nvidia-smi access)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from __future__ import annotations
|
|||
import json
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
|
@ -69,12 +70,7 @@ def extract_jd_signals(description: str, resume_text: str = "") -> list[str]:
|
|||
# Extract JSON array from response (LLM may wrap it in markdown)
|
||||
match = re.search(r"\[.*\]", raw, re.DOTALL)
|
||||
if match:
|
||||
json_str = match.group(0)
|
||||
# LLMs occasionally emit invalid JSON escape sequences (e.g. \s, \d, \p)
|
||||
# that are valid regex but not valid JSON. Replace bare backslashes that
|
||||
# aren't followed by a recognised JSON escape character.
|
||||
json_str = re.sub(r'\\([^"\\/bfnrtu])', r'\1', json_str)
|
||||
llm_signals = json.loads(json_str)
|
||||
llm_signals = json.loads(match.group(0))
|
||||
llm_signals = [s.strip() for s in llm_signals if isinstance(s, str) and s.strip()]
|
||||
except Exception:
|
||||
log.warning("[resume_optimizer] LLM signal extraction failed", exc_info=True)
|
||||
|
|
@ -232,22 +228,6 @@ def rewrite_for_ats(
|
|||
from scripts.llm_router import LLMRouter
|
||||
router = LLMRouter()
|
||||
|
||||
# Rerank gaps by JD relevance so the most impactful terms are injected first.
|
||||
# Falls back silently to the incoming priority ordering on any error.
|
||||
jd_text = job.get("description", "")
|
||||
if jd_text and prioritized_gaps:
|
||||
try:
|
||||
from circuitforge_core.reranker import rerank as _rerank
|
||||
terms = [g["term"] for g in prioritized_gaps]
|
||||
results = _rerank(jd_text, terms, top_n=len(terms))
|
||||
term_rank = {r.candidate: r.rank for r in results}
|
||||
prioritized_gaps = sorted(
|
||||
prioritized_gaps,
|
||||
key=lambda g: term_rank.get(g["term"], len(prioritized_gaps)),
|
||||
)
|
||||
except Exception:
|
||||
pass # keep original priority ordering
|
||||
|
||||
# Group gaps by target section
|
||||
by_section: dict[str, list[str]] = {}
|
||||
for gap in prioritized_gaps:
|
||||
|
|
@ -277,8 +257,7 @@ def rewrite_for_ats(
|
|||
f"3. Only rephrase existing content — replace vague verbs/nouns with the "
|
||||
f" ATS-preferred equivalents listed above.\n"
|
||||
f"4. Keep the same number of bullet points in experience entries.\n"
|
||||
f"5. Do NOT use markdown formatting — no **, __, or * for bullets.\n"
|
||||
f"6. Return ONLY the rewritten section content, no labels or explanation."
|
||||
f"5. Return ONLY the rewritten section content, no labels or explanation."
|
||||
f"{voice_note}\n\n"
|
||||
f"Original {section} section:\n{original_content}"
|
||||
)
|
||||
|
|
@ -305,8 +284,7 @@ def _section_text_for_prompt(resume: dict[str, Any], section: str) -> str:
|
|||
for exp in resume.get("experience", []):
|
||||
lines.append(f"{exp['title']} at {exp['company']} ({exp['start_date']}–{exp['end_date']})")
|
||||
for b in exp.get("bullets", []):
|
||||
clean_b = re.sub(r"^[•\-–—*◦▪▸►\s]+", "", b).strip()
|
||||
lines.append(f" • {clean_b}")
|
||||
lines.append(f" • {b}")
|
||||
return "\n".join(lines) if lines else "(empty)"
|
||||
return "(unsupported section)"
|
||||
|
||||
|
|
@ -315,7 +293,7 @@ def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str)
|
|||
"""Return a new resume dict with the given section replaced by rewritten text."""
|
||||
updated = dict(resume)
|
||||
if section == "summary":
|
||||
updated["career_summary"] = _clean_summary_markup(rewritten)
|
||||
updated["career_summary"] = rewritten
|
||||
elif section == "skills":
|
||||
# LLM returns comma-separated or newline-separated skills
|
||||
skills = [s.strip() for s in re.split(r"[,\n•·]+", rewritten) if s.strip()]
|
||||
|
|
@ -323,23 +301,10 @@ def _apply_section_rewrite(resume: dict[str, Any], section: str, rewritten: str)
|
|||
elif section == "experience":
|
||||
# For experience, we keep the structured entries but replace the bullets.
|
||||
# The LLM rewrites the whole section as plain text; we re-parse the bullets.
|
||||
updated["experience"] = _reparse_experience_bullets(resume.get("experience", []), rewritten)
|
||||
updated["experience"] = _reparse_experience_bullets(resume["experience"], rewritten)
|
||||
return updated
|
||||
|
||||
|
||||
def _clean_summary_markup(text: str) -> str:
|
||||
"""Strip markdown/plain-text bullet markers from career summary lines.
|
||||
|
||||
LLMs sometimes format summary content with '* item' or '• item' markdown.
|
||||
This converts those lines to unmarked text so the summary renders cleanly.
|
||||
"""
|
||||
lines = []
|
||||
for line in text.splitlines():
|
||||
cleaned = re.sub(r"^[•*\-–—◦▪▸►]\s+", "", line.lstrip())
|
||||
lines.append(cleaned)
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
|
||||
def _reparse_experience_bullets(
|
||||
original_entries: list[dict],
|
||||
rewritten_text: str,
|
||||
|
|
@ -369,9 +334,9 @@ def _reparse_experience_bullets(
|
|||
chunk = remaining
|
||||
|
||||
bullets = [
|
||||
re.sub(r"^([•\-–—*◦▪▸►]\s*)+", "", line.strip()).strip()
|
||||
re.sub(r"^[•\-–—*◦▪▸►]\s*", "", line).strip()
|
||||
for line in chunk.splitlines()
|
||||
if re.match(r"^\s*[•\-–—*◦▪▸►]", line)
|
||||
if re.match(r"^[•\-–—*◦▪▸►]\s*", line.strip())
|
||||
]
|
||||
new_entry = dict(entry)
|
||||
new_entry["bullets"] = bullets if bullets else entry["bullets"]
|
||||
|
|
@ -380,208 +345,6 @@ def _reparse_experience_bullets(
|
|||
return result
|
||||
|
||||
|
||||
# ── Gap framing ───────────────────────────────────────────────────────────────
|
||||
|
||||
def frame_skill_gaps(
|
||||
struct: dict[str, Any],
|
||||
gap_framings: list[dict],
|
||||
job: dict[str, Any],
|
||||
candidate_voice: str = "",
|
||||
) -> dict[str, Any]:
|
||||
"""Inject honest framing language for skills the candidate doesn't have directly.
|
||||
|
||||
For each gap framing decision the user provided:
|
||||
- mode "adjacent": user has related experience → injects one bridging sentence
|
||||
into the most relevant experience entry's bullets
|
||||
- mode "learning": actively developing the skill → prepends a structured
|
||||
"Developing: X (context)" note to the skills list
|
||||
- mode "skip": no connection at all → no change
|
||||
|
||||
The user-supplied context text is the source of truth. The LLM's job is only
|
||||
to phrase it naturally in resume style — not to invent new claims.
|
||||
|
||||
Args:
|
||||
struct: Resume dict (already processed by apply_review_decisions).
|
||||
gap_framings: List of dicts with keys:
|
||||
skill — the ATS term the candidate lacks
|
||||
mode — "adjacent" | "learning" | "skip"
|
||||
context — candidate's own words describing their related background
|
||||
job: Job dict for role context in prompts.
|
||||
candidate_voice: Free-text style note from user.yaml.
|
||||
|
||||
Returns:
|
||||
New resume dict with framing language injected.
|
||||
"""
|
||||
from scripts.llm_router import LLMRouter
|
||||
router = LLMRouter()
|
||||
|
||||
updated = dict(struct)
|
||||
updated["experience"] = [dict(e) for e in (struct.get("experience") or [])]
|
||||
|
||||
adjacent_framings = [f for f in gap_framings if f.get("mode") == "adjacent" and f.get("context")]
|
||||
learning_framings = [f for f in gap_framings if f.get("mode") == "learning" and f.get("context")]
|
||||
|
||||
# ── Adjacent experience: inject bridging sentence into most relevant entry ─
|
||||
for framing in adjacent_framings:
|
||||
skill = framing["skill"]
|
||||
context = framing["context"]
|
||||
|
||||
# Find the experience entry most likely to be relevant (simple keyword match)
|
||||
best_entry_idx = _find_most_relevant_entry(updated["experience"], skill)
|
||||
if best_entry_idx is None:
|
||||
continue
|
||||
|
||||
entry = updated["experience"][best_entry_idx]
|
||||
bullets = list(entry.get("bullets") or [])
|
||||
|
||||
voice_note = (
|
||||
f'\n\nCandidate voice/style: "{candidate_voice}". Match this tone.'
|
||||
) if candidate_voice else ""
|
||||
|
||||
prompt = (
|
||||
f"You are adding one honest framing sentence to a resume bullet list.\n\n"
|
||||
f"The candidate does not have direct experience with '{skill}', "
|
||||
f"but they have relevant background they described as:\n"
|
||||
f' "{context}"\n\n'
|
||||
f"Job context: {job.get('title', '')} at {job.get('company', '')}.\n\n"
|
||||
f"RULES:\n"
|
||||
f"1. Add exactly ONE new bullet point that bridges their background to '{skill}'.\n"
|
||||
f"2. Do NOT fabricate anything beyond what their context description says.\n"
|
||||
f"3. Use honest language: 'adjacent experience in', 'strong foundation applicable to', "
|
||||
f" 'directly transferable background in', etc.\n"
|
||||
f"4. Return ONLY the single new bullet text — no prefix, no explanation."
|
||||
f"{voice_note}\n\n"
|
||||
f"Existing bullets for context:\n"
|
||||
+ "\n".join(f" • {b}" for b in bullets[:3])
|
||||
)
|
||||
|
||||
try:
|
||||
new_bullet = router.complete(prompt).strip()
|
||||
new_bullet = re.sub(r"^[•\-–—*◦▪▸►]\s*", "", new_bullet).strip()
|
||||
if new_bullet:
|
||||
bullets.append(new_bullet)
|
||||
new_entry = dict(entry)
|
||||
new_entry["bullets"] = bullets
|
||||
updated["experience"][best_entry_idx] = new_entry
|
||||
except Exception:
|
||||
log.warning(
|
||||
"[resume_optimizer] frame_skill_gaps adjacent failed for skill %r", skill,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# ── Learning framing: add structured note to skills list ──────────────────
|
||||
if learning_framings:
|
||||
skills = list(updated.get("skills") or [])
|
||||
for framing in learning_framings:
|
||||
skill = framing["skill"]
|
||||
context = framing["context"].strip()
|
||||
# Format: "Developing: Kubernetes (strong Docker/container orchestration background)"
|
||||
note = f"Developing: {skill} ({context})" if context else f"Developing: {skill}"
|
||||
if note not in skills:
|
||||
skills.append(note)
|
||||
updated["skills"] = skills
|
||||
|
||||
return updated
|
||||
|
||||
|
||||
def _find_most_relevant_entry(
|
||||
experience: list[dict],
|
||||
skill: str,
|
||||
) -> int | None:
|
||||
"""Return the index of the experience entry most relevant to a skill term.
|
||||
|
||||
Uses simple keyword overlap between the skill and entry title/bullets.
|
||||
Falls back to the most recent (first) entry if no match found.
|
||||
"""
|
||||
if not experience:
|
||||
return None
|
||||
|
||||
skill_words = set(skill.lower().split())
|
||||
best_idx = 0
|
||||
best_score = -1
|
||||
|
||||
for i, entry in enumerate(experience):
|
||||
entry_text = (
|
||||
(entry.get("title") or "") + " " +
|
||||
" ".join(entry.get("bullets") or [])
|
||||
).lower()
|
||||
entry_words = set(entry_text.split())
|
||||
score = len(skill_words & entry_words)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_idx = i
|
||||
|
||||
return best_idx
|
||||
|
||||
|
||||
def apply_review_decisions(
|
||||
draft: dict[str, Any],
|
||||
decisions: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""Apply user section-level review decisions to the rewritten struct.
|
||||
|
||||
Handles approved skills, summary accept/reject, and per-entry experience
|
||||
accept/reject. Returns the updated struct; does not call the LLM.
|
||||
|
||||
Args:
|
||||
draft: The review draft dict from build_review_diff (contains
|
||||
"sections" and "rewritten_struct").
|
||||
decisions: Dict of per-section decisions from the review UI:
|
||||
skills: {"approved_additions": [...]}
|
||||
summary: {"accepted": bool}
|
||||
experience: {"accepted_entries": [{"title", "company", "accepted"}]}
|
||||
|
||||
Returns:
|
||||
Updated resume struct ready for gap framing and final render.
|
||||
"""
|
||||
struct = dict(draft.get("rewritten_struct") or {})
|
||||
sections = draft.get("sections") or []
|
||||
|
||||
# ── Skills: keep original + only approved additions ────────────────────
|
||||
skills_decision = decisions.get("skills", {})
|
||||
approved_additions = set(skills_decision.get("approved_additions") or [])
|
||||
for sec in sections:
|
||||
if sec["section"] == "skills":
|
||||
original_kept = set(sec.get("kept") or [])
|
||||
struct["skills"] = sorted(original_kept | approved_additions)
|
||||
break
|
||||
|
||||
# ── Summary: accept/reject + optional user-edited text ─────────────────
|
||||
summary_dec = decisions.get("summary", {})
|
||||
if not summary_dec.get("accepted", True):
|
||||
for sec in sections:
|
||||
if sec["section"] == "summary":
|
||||
struct["career_summary"] = sec.get("original", struct.get("career_summary", ""))
|
||||
break
|
||||
else:
|
||||
edited_text = summary_dec.get("edited_text")
|
||||
if edited_text is not None:
|
||||
struct["career_summary"] = edited_text.strip()
|
||||
|
||||
# ── Experience: per-entry accept/reject + optional user-edited bullets ──
|
||||
exp_entry_map: dict[str, dict] = {
|
||||
f"{ed.get('title', '')}|{ed.get('company', '')}": ed
|
||||
for ed in (decisions.get("experience", {}).get("accepted_entries") or [])
|
||||
}
|
||||
for sec in sections:
|
||||
if sec["section"] == "experience":
|
||||
for entry_diff in (sec.get("entries") or []):
|
||||
key = f"{entry_diff['title']}|{entry_diff['company']}"
|
||||
entry_dec = exp_entry_map.get(key, {})
|
||||
accepted = entry_dec.get("accepted", True)
|
||||
edited_bullets = entry_dec.get("edited_bullets")
|
||||
for exp_entry in (struct.get("experience") or []):
|
||||
if (exp_entry.get("title") == entry_diff["title"] and
|
||||
exp_entry.get("company") == entry_diff["company"]):
|
||||
if not accepted:
|
||||
exp_entry["bullets"] = entry_diff["original_bullets"]
|
||||
elif edited_bullets is not None:
|
||||
exp_entry["bullets"] = [b for b in edited_bullets if b.strip()]
|
||||
break
|
||||
|
||||
return struct
|
||||
|
||||
|
||||
# ── Hallucination guard ───────────────────────────────────────────────────────
|
||||
|
||||
def hallucination_check(original: dict[str, Any], rewritten: dict[str, Any]) -> bool:
|
||||
|
|
@ -674,207 +437,3 @@ def render_resume_text(resume: dict[str, Any]) -> str:
|
|||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── Review diff builder ────────────────────────────────────────────────────────
|
||||
|
||||
def build_review_diff(
|
||||
original: dict[str, Any],
|
||||
rewritten: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
"""Build a structured diff between original and rewritten resume for the review UI.
|
||||
|
||||
Returns a dict with:
|
||||
sections: list of per-section diffs
|
||||
rewritten_struct: the full rewritten resume dict (used by finalize endpoint)
|
||||
|
||||
Each section diff has:
|
||||
section: "skills" | "summary" | "experience"
|
||||
type: "skills_diff" | "text_diff" | "bullets_diff"
|
||||
For skills_diff:
|
||||
added: list of new skill strings (each requires user approval)
|
||||
removed: list of removed skill strings
|
||||
kept: list of unchanged skills
|
||||
For text_diff (summary):
|
||||
original: str
|
||||
proposed: str
|
||||
For bullets_diff (experience):
|
||||
entries: list of {title, company, original_bullets, proposed_bullets}
|
||||
"""
|
||||
sections = []
|
||||
|
||||
# ── Skills diff ────────────────────────────────────────────────────────
|
||||
orig_skills = set(s.strip() for s in (original.get("skills") or []))
|
||||
new_skills = set(s.strip() for s in (rewritten.get("skills") or []))
|
||||
|
||||
added = sorted(new_skills - orig_skills)
|
||||
removed = sorted(orig_skills - new_skills)
|
||||
kept = sorted(orig_skills & new_skills)
|
||||
|
||||
if added or removed:
|
||||
sections.append({
|
||||
"section": "skills",
|
||||
"type": "skills_diff",
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"kept": kept,
|
||||
})
|
||||
|
||||
# ── Summary diff ───────────────────────────────────────────────────────
|
||||
orig_summary = (original.get("career_summary") or "").strip()
|
||||
new_summary = (rewritten.get("career_summary") or "").strip()
|
||||
|
||||
if orig_summary != new_summary and new_summary:
|
||||
sections.append({
|
||||
"section": "summary",
|
||||
"type": "text_diff",
|
||||
"original": orig_summary,
|
||||
"proposed": new_summary,
|
||||
})
|
||||
|
||||
# ── Experience diff ────────────────────────────────────────────────────
|
||||
orig_exp = original.get("experience") or []
|
||||
new_exp = rewritten.get("experience") or []
|
||||
|
||||
entry_diffs = []
|
||||
for orig_entry, new_entry in zip(orig_exp, new_exp):
|
||||
orig_bullets = orig_entry.get("bullets") or []
|
||||
new_bullets = new_entry.get("bullets") or []
|
||||
if orig_bullets != new_bullets:
|
||||
entry_diffs.append({
|
||||
"title": orig_entry.get("title", ""),
|
||||
"company": orig_entry.get("company", ""),
|
||||
"original_bullets": orig_bullets,
|
||||
"proposed_bullets": new_bullets,
|
||||
})
|
||||
|
||||
if entry_diffs:
|
||||
sections.append({
|
||||
"section": "experience",
|
||||
"type": "bullets_diff",
|
||||
"entries": entry_diffs,
|
||||
})
|
||||
|
||||
return {
|
||||
"sections": sections,
|
||||
"rewritten_struct": rewritten,
|
||||
}
|
||||
|
||||
|
||||
# ── PDF export ─────────────────────────────────────────────────────────────────
|
||||
|
||||
def export_pdf(resume: dict[str, Any], output_path: str) -> None:
|
||||
"""Render a structured resume dict to a clean PDF using reportlab.
|
||||
|
||||
Uses a single-column layout with section headers, consistent spacing,
|
||||
and a readable sans-serif body font suitable for ATS submission.
|
||||
|
||||
Args:
|
||||
resume: Structured resume dict (same format as resume_parser output).
|
||||
output_path: Absolute path for the output .pdf file.
|
||||
"""
|
||||
from reportlab.lib.pagesizes import LETTER
|
||||
from reportlab.lib.units import inch
|
||||
from reportlab.lib.styles import ParagraphStyle
|
||||
from reportlab.lib.enums import TA_CENTER, TA_LEFT
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable
|
||||
from reportlab.lib import colors
|
||||
|
||||
MARGIN = 0.75 * inch
|
||||
|
||||
name_style = ParagraphStyle(
|
||||
"name", fontName="Helvetica-Bold", fontSize=16, leading=20,
|
||||
alignment=TA_CENTER, spaceAfter=2,
|
||||
)
|
||||
contact_style = ParagraphStyle(
|
||||
"contact", fontName="Helvetica", fontSize=9, leading=12,
|
||||
alignment=TA_CENTER, spaceAfter=6,
|
||||
textColor=colors.HexColor("#555555"),
|
||||
)
|
||||
section_style = ParagraphStyle(
|
||||
"section", fontName="Helvetica-Bold", fontSize=10, leading=14,
|
||||
spaceBefore=10, spaceAfter=2,
|
||||
textColor=colors.HexColor("#1a1a2e"),
|
||||
)
|
||||
body_style = ParagraphStyle(
|
||||
"body", fontName="Helvetica", fontSize=9, leading=13, alignment=TA_LEFT,
|
||||
)
|
||||
role_style = ParagraphStyle(
|
||||
"role", fontName="Helvetica-Bold", fontSize=9, leading=13,
|
||||
)
|
||||
meta_style = ParagraphStyle(
|
||||
"meta", fontName="Helvetica-Oblique", fontSize=8, leading=12,
|
||||
textColor=colors.HexColor("#555555"), spaceAfter=2,
|
||||
)
|
||||
bullet_style = ParagraphStyle(
|
||||
"bullet", fontName="Helvetica", fontSize=9, leading=13, leftIndent=12,
|
||||
)
|
||||
|
||||
def hr():
|
||||
return HRFlowable(width="100%", thickness=0.5,
|
||||
color=colors.HexColor("#cccccc"),
|
||||
spaceAfter=4, spaceBefore=2)
|
||||
|
||||
story = []
|
||||
|
||||
if resume.get("name"):
|
||||
story.append(Paragraph(resume["name"], name_style))
|
||||
|
||||
contact_parts = [p for p in (
|
||||
resume.get("email", ""), resume.get("phone", ""),
|
||||
resume.get("location", ""), resume.get("linkedin", ""),
|
||||
) if p]
|
||||
if contact_parts:
|
||||
story.append(Paragraph(" | ".join(contact_parts), contact_style))
|
||||
|
||||
story.append(hr())
|
||||
|
||||
summary = (resume.get("career_summary") or "").strip()
|
||||
if summary:
|
||||
story.append(Paragraph("SUMMARY", section_style))
|
||||
story.append(hr())
|
||||
story.append(Paragraph(summary, body_style))
|
||||
story.append(Spacer(1, 4))
|
||||
|
||||
if resume.get("experience"):
|
||||
story.append(Paragraph("EXPERIENCE", section_style))
|
||||
story.append(hr())
|
||||
for exp in resume["experience"]:
|
||||
dates = f"{exp.get('start_date', '')}–{exp.get('end_date', '')}"
|
||||
story.append(Paragraph(
|
||||
f"{exp.get('title', '')} | {exp.get('company', '')}", role_style
|
||||
))
|
||||
story.append(Paragraph(dates, meta_style))
|
||||
for bullet in (exp.get("bullets") or []):
|
||||
story.append(Paragraph(f"• {bullet}", bullet_style))
|
||||
story.append(Spacer(1, 4))
|
||||
|
||||
if resume.get("education"):
|
||||
story.append(Paragraph("EDUCATION", section_style))
|
||||
story.append(hr())
|
||||
for edu in resume["education"]:
|
||||
degree = f"{edu.get('degree', '')} {edu.get('field', '')}".strip()
|
||||
story.append(Paragraph(
|
||||
f"{degree} | {edu.get('institution', '')} {edu.get('graduation_year', '')}".strip(),
|
||||
body_style,
|
||||
))
|
||||
story.append(Spacer(1, 4))
|
||||
|
||||
if resume.get("skills"):
|
||||
story.append(Paragraph("SKILLS", section_style))
|
||||
story.append(hr())
|
||||
story.append(Paragraph(", ".join(resume["skills"]), body_style))
|
||||
story.append(Spacer(1, 4))
|
||||
|
||||
if resume.get("achievements"):
|
||||
story.append(Paragraph("ACHIEVEMENTS", section_style))
|
||||
story.append(hr())
|
||||
for a in resume["achievements"]:
|
||||
story.append(Paragraph(f"• {a}", bullet_style))
|
||||
|
||||
doc = SimpleDocTemplate(
|
||||
output_path, pagesize=LETTER,
|
||||
leftMargin=MARGIN, rightMargin=MARGIN,
|
||||
topMargin=MARGIN, bottomMargin=MARGIN,
|
||||
)
|
||||
doc.build(story)
|
||||
|
|
|
|||
|
|
@ -9,9 +9,11 @@ Falls back to empty dict on unrecoverable errors — caller shows the form build
|
|||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
import pdfplumber
|
||||
|
|
|
|||
|
|
@ -1,217 +0,0 @@
|
|||
"""
|
||||
Resume format transform — library ↔ profile.
|
||||
|
||||
Converts between:
|
||||
- Library format: struct_json produced by resume_parser.parse_resume()
|
||||
{name, email, phone, career_summary, experience[{title,company,start_date,end_date,location,bullets[]}],
|
||||
education[{institution,degree,field,start_date,end_date}], skills[], achievements[]}
|
||||
- Profile content format: ResumePayload content fields (plain_text_resume.yaml)
|
||||
{name, surname, email, phone, career_summary,
|
||||
experience[{title,company,period,location,industry,responsibilities,skills[]}],
|
||||
education[{institution,degree,field,start_date,end_date}],
|
||||
skills[], achievements[]}
|
||||
|
||||
Profile metadata fields (salary, work prefs, self-ID, PII) are never touched here.
|
||||
|
||||
License: MIT
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from typing import Any
|
||||
|
||||
|
||||
_CONTENT_FIELDS = frozenset({
|
||||
"name", "surname", "email", "phone", "career_summary",
|
||||
"experience", "skills", "education", "achievements",
|
||||
})
|
||||
|
||||
|
||||
def library_to_profile_content(struct_json: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Transform a library struct_json to ResumePayload content fields.
|
||||
|
||||
Returns only content fields. Caller is responsible for merging with existing
|
||||
metadata fields (salary, preferences, self-ID) so they are not overwritten.
|
||||
|
||||
Lossy for experience[].industry (always blank — parser does not capture it).
|
||||
name is split on first space into name/surname.
|
||||
"""
|
||||
full_name: str = struct_json.get("name") or ""
|
||||
parts = full_name.split(" ", 1)
|
||||
name = parts[0]
|
||||
surname = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
experience = []
|
||||
for exp in struct_json.get("experience") or []:
|
||||
start = (exp.get("start_date") or "").strip()
|
||||
end = (exp.get("end_date") or "").strip()
|
||||
if start and end:
|
||||
period = f"{start} \u2013 {end}"
|
||||
elif start:
|
||||
period = start
|
||||
elif end:
|
||||
period = end
|
||||
else:
|
||||
period = ""
|
||||
|
||||
bullets: list[str] = exp.get("bullets") or []
|
||||
responsibilities = "\n".join(b for b in bullets if b)
|
||||
|
||||
experience.append({
|
||||
"title": exp.get("title") or "",
|
||||
"company": exp.get("company") or "",
|
||||
"period": period,
|
||||
"location": exp.get("location") or "",
|
||||
"industry": "", # not captured by parser
|
||||
"responsibilities": responsibilities,
|
||||
"skills": [],
|
||||
})
|
||||
|
||||
education = []
|
||||
for edu in struct_json.get("education") or []:
|
||||
education.append({
|
||||
"institution": edu.get("institution") or "",
|
||||
"degree": edu.get("degree") or "",
|
||||
"field": edu.get("field") or "",
|
||||
"start_date": edu.get("start_date") or "",
|
||||
"end_date": edu.get("end_date") or "",
|
||||
})
|
||||
|
||||
return {
|
||||
"name": name,
|
||||
"surname": surname,
|
||||
"email": struct_json.get("email") or "",
|
||||
"phone": struct_json.get("phone") or "",
|
||||
"career_summary": struct_json.get("career_summary") or "",
|
||||
"experience": experience,
|
||||
"skills": list(struct_json.get("skills") or []),
|
||||
"education": education,
|
||||
"achievements": list(struct_json.get("achievements") or []),
|
||||
}
|
||||
|
||||
|
||||
def profile_to_library(payload: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||
"""Transform ResumePayload content fields to (plain_text, struct_json).
|
||||
|
||||
Inverse of library_to_profile_content. The plain_text is a best-effort
|
||||
reconstruction for display and re-parsing. struct_json is the canonical
|
||||
structured representation stored in the resumes table.
|
||||
"""
|
||||
name_parts = [payload.get("name") or "", payload.get("surname") or ""]
|
||||
full_name = " ".join(p for p in name_parts if p).strip()
|
||||
|
||||
career_summary = (payload.get("career_summary") or "").strip()
|
||||
|
||||
lines: list[str] = []
|
||||
if full_name:
|
||||
lines.append(full_name)
|
||||
email = payload.get("email") or ""
|
||||
phone = payload.get("phone") or ""
|
||||
if email:
|
||||
lines.append(email)
|
||||
if phone:
|
||||
lines.append(phone)
|
||||
|
||||
if career_summary:
|
||||
lines += ["", "SUMMARY", career_summary]
|
||||
|
||||
experience_structs = []
|
||||
for exp in payload.get("experience") or []:
|
||||
title = (exp.get("title") or "").strip()
|
||||
company = (exp.get("company") or "").strip()
|
||||
period = (exp.get("period") or "").strip()
|
||||
location = (exp.get("location") or "").strip()
|
||||
|
||||
# Split period back to start_date / end_date.
|
||||
# Split on the dash/dash separator BEFORE normalising to plain hyphens
|
||||
# so that ISO dates like "2023-01 – 2025-03" round-trip correctly.
|
||||
if "\u2013" in period: # en-dash
|
||||
date_parts = [p.strip() for p in period.split("\u2013", 1)]
|
||||
elif "\u2014" in period: # em-dash
|
||||
date_parts = [p.strip() for p in period.split("\u2014", 1)]
|
||||
else:
|
||||
date_parts = [period.strip()] if period.strip() else []
|
||||
start_date = date_parts[0] if date_parts else ""
|
||||
end_date = date_parts[1] if len(date_parts) > 1 else ""
|
||||
|
||||
resp = (exp.get("responsibilities") or "").strip()
|
||||
bullets = [b.strip() for b in resp.split("\n") if b.strip()]
|
||||
|
||||
if title or company:
|
||||
header = " | ".join(p for p in [title, company, period] if p)
|
||||
lines += ["", header]
|
||||
if location:
|
||||
lines.append(location)
|
||||
for b in bullets:
|
||||
lines.append(f"\u2022 {b}")
|
||||
|
||||
experience_structs.append({
|
||||
"title": title,
|
||||
"company": company,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"location": location,
|
||||
"bullets": bullets,
|
||||
})
|
||||
|
||||
skills: list[str] = list(payload.get("skills") or [])
|
||||
if skills:
|
||||
lines += ["", "SKILLS", ", ".join(skills)]
|
||||
|
||||
education_structs = []
|
||||
for edu in payload.get("education") or []:
|
||||
institution = (edu.get("institution") or "").strip()
|
||||
degree = (edu.get("degree") or "").strip()
|
||||
field = (edu.get("field") or "").strip()
|
||||
start_date = (edu.get("start_date") or "").strip()
|
||||
end_date = (edu.get("end_date") or "").strip()
|
||||
if institution or degree:
|
||||
label = " ".join(p for p in [degree, field] if p)
|
||||
lines.append(f"{label} \u2014 {institution}" if institution else label)
|
||||
education_structs.append({
|
||||
"institution": institution,
|
||||
"degree": degree,
|
||||
"field": field,
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
})
|
||||
|
||||
achievements: list[str] = list(payload.get("achievements") or [])
|
||||
|
||||
struct_json: dict[str, Any] = {
|
||||
"name": full_name,
|
||||
"email": email,
|
||||
"phone": phone,
|
||||
"career_summary": career_summary,
|
||||
"experience": experience_structs,
|
||||
"skills": skills,
|
||||
"education": education_structs,
|
||||
"achievements": achievements,
|
||||
}
|
||||
|
||||
plain_text = "\n".join(lines).strip()
|
||||
return plain_text, struct_json
|
||||
|
||||
|
||||
def make_auto_backup_name(source_name: str) -> str:
|
||||
"""Generate a timestamped auto-backup name.
|
||||
|
||||
Example: "Auto-backup before Senior Engineer Resume — 2026-04-16"
|
||||
"""
|
||||
today = date.today().isoformat()
|
||||
return f"Auto-backup before {source_name} \u2014 {today}"
|
||||
|
||||
|
||||
def blank_fields_on_import(struct_json: dict[str, Any]) -> list[str]:
|
||||
"""Return content field names that will be blank after a library→profile import.
|
||||
|
||||
Used to warn the user in the confirmation modal so they know what to fill in.
|
||||
"""
|
||||
blank: list[str] = []
|
||||
if struct_json.get("experience"):
|
||||
# industry is always blank — parser never captures it
|
||||
blank.append("experience[].industry")
|
||||
# location may be blank for some entries
|
||||
if any(not (e.get("location") or "").strip() for e in struct_json["experience"]):
|
||||
blank.append("experience[].location")
|
||||
return blank
|
||||
|
|
@ -57,7 +57,7 @@ _TIMEOUT = 12
|
|||
|
||||
|
||||
def _detect_board(url: str) -> str:
|
||||
"""Return 'linkedin', 'indeed', 'glassdoor', 'jobgether', 'oracle_hcm', or 'generic'."""
|
||||
"""Return 'linkedin', 'indeed', 'glassdoor', or 'generic'."""
|
||||
url_lower = url.lower()
|
||||
if "linkedin.com" in url_lower:
|
||||
return "linkedin"
|
||||
|
|
@ -67,8 +67,6 @@ def _detect_board(url: str) -> str:
|
|||
return "glassdoor"
|
||||
if "jobgether.com" in url_lower:
|
||||
return "jobgether"
|
||||
if "oraclecloud.com" in url_lower and "hcmui" in url_lower:
|
||||
return "oracle_hcm"
|
||||
return "generic"
|
||||
|
||||
|
||||
|
|
@ -203,70 +201,6 @@ def _scrape_jobgether(url: str) -> dict:
|
|||
return {"company": company, "source": "jobgether"} if company else {}
|
||||
|
||||
|
||||
def _scrape_oracle_hcm(url: str) -> dict:
|
||||
"""Scrape an Oracle HCM CandidateExperience job page via Playwright.
|
||||
|
||||
Oracle HCM portals are React SPAs that require JS execution. The prospect
|
||||
token in the URL path grants public access — no auth needed.
|
||||
"""
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print("[scrape_url] Oracle HCM: Playwright not installed, falling back to generic")
|
||||
return _scrape_generic(url)
|
||||
|
||||
try:
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True)
|
||||
try:
|
||||
ctx = browser.new_context(user_agent=_HEADERS["User-Agent"])
|
||||
page = ctx.new_page()
|
||||
page.goto(url, timeout=30_000)
|
||||
page.wait_for_load_state("networkidle", timeout=20_000)
|
||||
|
||||
result = page.evaluate("""() => {
|
||||
const sel = (s) => document.querySelector(s)?.textContent?.trim() || '';
|
||||
const selInner = (s) => document.querySelector(s)?.innerText?.trim() || '';
|
||||
|
||||
// Title: try known HCM selectors then fall back to first h1
|
||||
const title = sel('[class*="requisition-title"]')
|
||||
|| sel('[class*="JobTitle"]')
|
||||
|| sel('.job-title')
|
||||
|| sel('h1');
|
||||
|
||||
// Company: page header logo alt text, meta, or site-name span
|
||||
const companyMeta = document.querySelector('meta[property="og:site_name"]')
|
||||
?.getAttribute('content') || '';
|
||||
const company = sel('[class*="company-name"]')
|
||||
|| sel('[class*="siteName"]')
|
||||
|| sel('[class*="site-name"]')
|
||||
|| companyMeta;
|
||||
|
||||
// Location: job detail list items
|
||||
const location = sel('[class*="job-location"]')
|
||||
|| sel('[data-testid*="location"]')
|
||||
|| sel('[class*="location"]');
|
||||
|
||||
// Description: main content div
|
||||
const description = selInner('[class*="job-description"]')
|
||||
|| selInner('[class*="requisition-description"]')
|
||||
|| selInner('[class*="JobDescription"]')
|
||||
|| selInner('main article')
|
||||
|| selInner('main');
|
||||
|
||||
return { title, company, location, description };
|
||||
}""")
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
result["source"] = "oracle_hcm"
|
||||
return {k: v for k, v in result.items() if v}
|
||||
|
||||
except Exception as exc:
|
||||
print(f"[scrape_url] Oracle HCM Playwright error for {url}: {exc}")
|
||||
return {}
|
||||
|
||||
|
||||
def _parse_json_ld_or_og(html: str) -> dict:
|
||||
"""Extract job fields from JSON-LD structured data, then og: meta tags."""
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
|
@ -344,8 +278,6 @@ def scrape_job_url(db_path: Path = DEFAULT_DB, job_id: int = None) -> dict:
|
|||
fields = _scrape_glassdoor(url)
|
||||
elif board == "jobgether":
|
||||
fields = _scrape_jobgether(url)
|
||||
elif board == "oracle_hcm":
|
||||
fields = _scrape_oracle_hcm(url)
|
||||
else:
|
||||
fields = _scrape_generic(url)
|
||||
except requests.RequestException as exc:
|
||||
|
|
|
|||
|
|
@ -1,85 +0,0 @@
|
|||
# MIT License — see LICENSE
|
||||
"""Survey assistant: prompt builders and LLM inference for culture-fit survey analysis.
|
||||
|
||||
Extracted from dev-api.py so task_runner can import this without importing the
|
||||
FastAPI application. Callable directly or via the survey_analyze background task.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
SURVEY_SYSTEM = (
|
||||
"You are a job application advisor helping a candidate answer a culture-fit survey. "
|
||||
"The candidate values collaborative teamwork, clear communication, growth, and impact. "
|
||||
"Choose answers that present them in the best professional light."
|
||||
)
|
||||
|
||||
|
||||
def build_text_prompt(text: str, mode: str) -> str:
|
||||
if mode == "quick":
|
||||
return (
|
||||
"Answer each survey question below. For each, give ONLY the letter of the best "
|
||||
"option and a single-sentence reason. Format exactly as:\n"
|
||||
"1. B — reason here\n2. A — reason here\n\n"
|
||||
f"Survey:\n{text}"
|
||||
)
|
||||
return (
|
||||
"Analyze each survey question below. For each question:\n"
|
||||
"- Briefly evaluate each option (1 sentence each)\n"
|
||||
"- State your recommendation with reasoning\n\n"
|
||||
f"Survey:\n{text}"
|
||||
)
|
||||
|
||||
|
||||
def build_image_prompt(mode: str) -> str:
|
||||
if mode == "quick":
|
||||
return (
|
||||
"This is a screenshot of a culture-fit survey. Read all questions and answer each "
|
||||
"with the letter of the best option for a collaborative, growth-oriented candidate. "
|
||||
"Format: '1. B — brief reason' on separate lines."
|
||||
)
|
||||
return (
|
||||
"This is a screenshot of a culture-fit survey. For each question, evaluate each option "
|
||||
"and recommend the best choice for a collaborative, growth-oriented candidate. "
|
||||
"Include a brief breakdown per option and a clear recommendation."
|
||||
)
|
||||
|
||||
|
||||
def run_survey_analyze(
|
||||
text: Optional[str],
|
||||
image_b64: Optional[str],
|
||||
mode: str,
|
||||
config_path: Optional[Path] = None,
|
||||
) -> dict:
|
||||
"""Run LLM inference for survey analysis.
|
||||
|
||||
Returns {"output": str, "source": "text_paste" | "screenshot"}.
|
||||
Raises on LLM failure — caller is responsible for error handling.
|
||||
"""
|
||||
from scripts.llm_router import LLMRouter
|
||||
|
||||
router = LLMRouter(config_path=config_path) if config_path else LLMRouter()
|
||||
|
||||
if image_b64:
|
||||
prompt = build_image_prompt(mode)
|
||||
output = router.complete(
|
||||
prompt,
|
||||
images=[image_b64],
|
||||
fallback_order=router.config.get("vision_fallback_order"),
|
||||
)
|
||||
source = "screenshot"
|
||||
else:
|
||||
prompt = build_text_prompt(text or "", mode)
|
||||
output = router.complete(
|
||||
prompt,
|
||||
system=SURVEY_SYSTEM,
|
||||
fallback_order=router.config.get("research_fallback_order"),
|
||||
)
|
||||
source = "text_paste"
|
||||
|
||||
return {"output": output, "source": source}
|
||||
|
|
@ -16,61 +16,6 @@ from pathlib import Path
|
|||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _normalize_aihawk_resume(raw: dict) -> dict:
|
||||
"""Convert a plain_text_resume.yaml (AIHawk format) into the optimizer struct.
|
||||
|
||||
Handles two AIHawk variants:
|
||||
- Newer Peregrine wizard output: already uses bullets/start_date/end_date/career_summary
|
||||
- Older raw AIHawk format: uses responsibilities (str), period ("YYYY – Present")
|
||||
"""
|
||||
import re as _re
|
||||
|
||||
def _split_responsibilities(text: str) -> list[str]:
|
||||
lines = [ln.strip() for ln in text.strip().splitlines() if ln.strip()]
|
||||
return lines if lines else [text.strip()]
|
||||
|
||||
def _parse_period(period: str) -> tuple[str, str]:
|
||||
parts = _re.split(r"\s*[–—-]\s*", period, maxsplit=1)
|
||||
start = parts[0].strip() if parts else ""
|
||||
end = parts[1].strip() if len(parts) > 1 else "Present"
|
||||
return start, end
|
||||
|
||||
experience = []
|
||||
for entry in raw.get("experience", []):
|
||||
if "responsibilities" in entry:
|
||||
bullets = _split_responsibilities(entry["responsibilities"])
|
||||
else:
|
||||
bullets = entry.get("bullets", [])
|
||||
|
||||
if "period" in entry:
|
||||
start_date, end_date = _parse_period(entry["period"])
|
||||
else:
|
||||
start_date = entry.get("start_date", "")
|
||||
end_date = entry.get("end_date", "Present")
|
||||
|
||||
experience.append({
|
||||
"title": entry.get("title", ""),
|
||||
"company": entry.get("company", ""),
|
||||
"start_date": start_date,
|
||||
"end_date": end_date,
|
||||
"bullets": bullets,
|
||||
})
|
||||
|
||||
# career_summary may be a string or absent; assessment field is a legacy bool in some profiles
|
||||
career_summary = raw.get("career_summary", "")
|
||||
if not isinstance(career_summary, str):
|
||||
career_summary = ""
|
||||
|
||||
return {
|
||||
"career_summary": career_summary,
|
||||
"experience": experience,
|
||||
"education": raw.get("education", []),
|
||||
"skills": raw.get("skills", []),
|
||||
"achievements": raw.get("achievements", []),
|
||||
}
|
||||
|
||||
|
||||
from scripts.db import (
|
||||
DEFAULT_DB,
|
||||
insert_task,
|
||||
|
|
@ -251,12 +196,9 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
|||
|
||||
elif task_type == "company_research":
|
||||
from scripts.company_research import research_company
|
||||
_cfg_dir = Path(db_path).parent / "config"
|
||||
_user_llm_cfg = _cfg_dir / "llm.yaml"
|
||||
result = research_company(
|
||||
job,
|
||||
on_stage=lambda s: update_task_stage(db_path, task_id, s),
|
||||
config_path=_user_llm_cfg if _user_llm_cfg.exists() else None,
|
||||
)
|
||||
save_research(db_path, job_id=job_id, **result)
|
||||
|
||||
|
|
@ -341,28 +283,17 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
|||
prioritize_gaps,
|
||||
rewrite_for_ats,
|
||||
hallucination_check,
|
||||
render_resume_text,
|
||||
)
|
||||
from scripts.user_profile import load_user_profile
|
||||
|
||||
_user_yaml = Path(db_path).parent / "config" / "user.yaml"
|
||||
description = job.get("description", "")
|
||||
resume_path = load_user_profile(str(_user_yaml)).get("resume_path", "")
|
||||
resume_path = load_user_profile().get("resume_path", "")
|
||||
|
||||
# Parse the candidate's resume
|
||||
update_task_stage(db_path, task_id, "parsing resume")
|
||||
_plain_yaml = Path(db_path).parent / "config" / "plain_text_resume.yaml"
|
||||
if resume_path and Path(resume_path).exists():
|
||||
resume_text = Path(resume_path).read_text(errors="replace")
|
||||
resume_text = Path(resume_path).read_text(errors="replace") if resume_path else ""
|
||||
resume_struct, parse_err = structure_resume(resume_text)
|
||||
elif _plain_yaml.exists():
|
||||
import yaml as _yaml
|
||||
_raw = _yaml.safe_load(_plain_yaml.read_text(encoding="utf-8")) or {}
|
||||
resume_struct = _normalize_aihawk_resume(_raw)
|
||||
resume_text = resume_struct.get("career_summary", "")
|
||||
parse_err = ""
|
||||
else:
|
||||
resume_text = ""
|
||||
resume_struct, parse_err = structure_resume("")
|
||||
|
||||
# Extract keyword gaps and build gap report (free tier)
|
||||
update_task_stage(db_path, task_id, "extracting keyword gaps")
|
||||
|
|
@ -370,56 +301,21 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
|||
prioritized = prioritize_gaps(gaps, resume_struct)
|
||||
gap_report = _json.dumps(prioritized, indent=2)
|
||||
|
||||
# Full rewrite (paid tier only) → enters awaiting_review, not completed
|
||||
# Full rewrite (paid tier only)
|
||||
rewritten_text = ""
|
||||
p = _json.loads(params or "{}")
|
||||
selected_gaps = p.get("selected_gaps", None)
|
||||
if selected_gaps is not None:
|
||||
selected_set = set(selected_gaps)
|
||||
prioritized = [g for g in prioritized if g.get("term") in selected_set]
|
||||
if p.get("full_rewrite", False):
|
||||
update_task_stage(db_path, task_id, "rewriting resume sections")
|
||||
candidate_voice = load_user_profile(str(_user_yaml)).get("candidate_voice", "")
|
||||
candidate_voice = load_user_profile().get("candidate_voice", "")
|
||||
rewritten = rewrite_for_ats(resume_struct, prioritized, job, candidate_voice)
|
||||
if hallucination_check(resume_struct, rewritten):
|
||||
from scripts.resume_optimizer import build_review_diff
|
||||
from scripts.db import save_resume_draft
|
||||
draft = build_review_diff(resume_struct, rewritten)
|
||||
# Attach gap report to draft for reference in the review UI
|
||||
draft["gap_report"] = prioritized
|
||||
save_resume_draft(db_path, job_id=job_id,
|
||||
draft_json=_json.dumps(draft))
|
||||
# Save gap report now; final text written after user review
|
||||
save_optimized_resume(db_path, job_id=job_id,
|
||||
text="", gap_report=gap_report)
|
||||
# Park task in awaiting_review — finalize endpoint resolves it
|
||||
update_task_status(db_path, task_id, "awaiting_review")
|
||||
return
|
||||
rewritten_text = render_resume_text(rewritten)
|
||||
else:
|
||||
log.warning("[task_runner] resume_optimize hallucination check failed for job %d", job_id)
|
||||
save_optimized_resume(db_path, job_id=job_id,
|
||||
text="", gap_report=gap_report)
|
||||
else:
|
||||
# Gap-only run (free tier): save report, no draft
|
||||
save_optimized_resume(db_path, job_id=job_id,
|
||||
text="", gap_report=gap_report)
|
||||
|
||||
elif task_type == "survey_analyze":
|
||||
import json as _json
|
||||
from scripts.survey_assistant import run_survey_analyze
|
||||
p = _json.loads(params or "{}")
|
||||
_cfg_path = Path(db_path).parent / "config" / "llm.yaml"
|
||||
update_task_stage(db_path, task_id, "analyzing survey")
|
||||
result = run_survey_analyze(
|
||||
text=p.get("text"),
|
||||
image_b64=p.get("image_b64"),
|
||||
mode=p.get("mode", "quick"),
|
||||
config_path=_cfg_path if _cfg_path.exists() else None,
|
||||
)
|
||||
update_task_status(
|
||||
db_path, task_id, "completed",
|
||||
error=_json.dumps(result),
|
||||
)
|
||||
return
|
||||
save_optimized_resume(db_path, job_id=job_id,
|
||||
text=rewritten_text,
|
||||
gap_report=gap_report)
|
||||
|
||||
elif task_type == "prepare_training":
|
||||
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
|
||||
|
|
|
|||
|
|
@ -15,13 +15,14 @@ Public API (unchanged — callers do not need to change):
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
from circuitforge_core.tasks.scheduler import (
|
||||
LocalScheduler as _CoreTaskScheduler,
|
||||
TaskSpec, # noqa: F401 — re-exported as part of public API; tests import from here
|
||||
TaskSpec, # re-export unchanged
|
||||
TaskScheduler as _CoreTaskScheduler,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -33,7 +34,6 @@ LLM_TASK_TYPES: frozenset[str] = frozenset({
|
|||
"company_research",
|
||||
"wizard_generate",
|
||||
"resume_optimize",
|
||||
"survey_analyze",
|
||||
})
|
||||
|
||||
# Conservative peak VRAM estimates (GB) per task type.
|
||||
|
|
@ -43,7 +43,6 @@ DEFAULT_VRAM_BUDGETS: dict[str, float] = {
|
|||
"company_research": 5.0, # llama3.1:8b or vllm model
|
||||
"wizard_generate": 2.5, # same model family as cover_letter
|
||||
"resume_optimize": 5.0, # section-by-section rewrite; same budget as research
|
||||
"survey_analyze": 2.5, # text: phi3:mini; visual: vision service (own VRAM pool)
|
||||
}
|
||||
|
||||
_DEFAULT_MAX_QUEUE_DEPTH = 500
|
||||
|
|
@ -95,6 +94,15 @@ class TaskScheduler(_CoreTaskScheduler):
|
|||
def __init__(self, db_path: Path, run_task_fn: Callable) -> None:
|
||||
budgets, max_depth = _load_config_overrides(db_path)
|
||||
|
||||
# Resolve VRAM using module-level _get_gpus so tests can monkeypatch it
|
||||
try:
|
||||
gpus = _get_gpus()
|
||||
available_vram: float = (
|
||||
sum(g["vram_total_gb"] for g in gpus) if gpus else 999.0
|
||||
)
|
||||
except Exception:
|
||||
available_vram = 999.0
|
||||
|
||||
# Warn under this module's logger for any task types with no VRAM budget
|
||||
# (mirrors the core warning but captures under scripts.task_scheduler
|
||||
# so existing tests using caplog.at_level(logger="scripts.task_scheduler") pass)
|
||||
|
|
@ -105,12 +113,19 @@ class TaskScheduler(_CoreTaskScheduler):
|
|||
"defaulting to 0.0 GB (unlimited concurrency for this type)", t
|
||||
)
|
||||
|
||||
coordinator_url = os.environ.get(
|
||||
"CF_ORCH_URL", "http://localhost:7700"
|
||||
).rstrip("/")
|
||||
|
||||
super().__init__(
|
||||
db_path=db_path,
|
||||
run_task_fn=run_task_fn,
|
||||
task_types=LLM_TASK_TYPES,
|
||||
vram_budgets=budgets,
|
||||
available_vram_gb=available_vram,
|
||||
max_queue_depth=max_depth,
|
||||
coordinator_url=coordinator_url,
|
||||
service_name="peregrine",
|
||||
)
|
||||
|
||||
def enqueue(
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ _DEFAULTS = {
|
|||
"tier": "free",
|
||||
"dev_tier_override": None,
|
||||
"wizard_complete": False,
|
||||
"training_export_opt_in": False,
|
||||
"wizard_step": 0,
|
||||
"dismissed_banners": [],
|
||||
"ui_preference": "streamlit",
|
||||
|
|
@ -78,7 +77,6 @@ class UserProfile:
|
|||
self.tier: str = data.get("tier", "free")
|
||||
self.dev_tier_override: str | None = data.get("dev_tier_override") or None
|
||||
self.wizard_complete: bool = bool(data.get("wizard_complete", False))
|
||||
self.training_export_opt_in: bool = bool(data.get("training_export_opt_in", False))
|
||||
self.wizard_step: int = int(data.get("wizard_step", 0))
|
||||
self.dismissed_banners: list[str] = list(data.get("dismissed_banners", []))
|
||||
raw_pref = data.get("ui_preference", "streamlit")
|
||||
|
|
@ -106,7 +104,6 @@ class UserProfile:
|
|||
"tier": self.tier,
|
||||
"dev_tier_override": self.dev_tier_override,
|
||||
"wizard_complete": self.wizard_complete,
|
||||
"training_export_opt_in": self.training_export_opt_in,
|
||||
"wizard_step": self.wizard_step,
|
||||
"dismissed_banners": self.dismissed_banners,
|
||||
"ui_preference": self.ui_preference,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
# install.sh — Peregrine dependency installer
|
||||
# setup.sh — Peregrine dependency installer
|
||||
# Installs Docker, Docker Compose v2, and (optionally) NVIDIA Container Toolkit.
|
||||
# Supports: Ubuntu/Debian, Fedora/RHEL/CentOS, Arch Linux, macOS (Homebrew).
|
||||
# Windows: not supported — use WSL2 with Ubuntu.
|
||||
|
|
@ -90,11 +90,15 @@ configure_git_safe_dir() {
|
|||
}
|
||||
|
||||
activate_git_hooks() {
|
||||
local repo_dir
|
||||
local repo_dir hooks_installer
|
||||
repo_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
if [[ -d "$repo_dir/.githooks" ]]; then
|
||||
hooks_installer="/Library/Development/CircuitForge/circuitforge-hooks/install.sh"
|
||||
if [[ -f "$hooks_installer" ]]; then
|
||||
bash "$hooks_installer" --quiet
|
||||
success "CircuitForge hooks activated (circuitforge-hooks)."
|
||||
elif [[ -d "$repo_dir/.githooks" ]]; then
|
||||
git -C "$repo_dir" config core.hooksPath .githooks
|
||||
success "Git hooks activated (.githooks/)."
|
||||
success "Git hooks activated (.githooks/) — circuitforge-hooks not found, using local fallback."
|
||||
fi
|
||||
}
|
||||
|
||||
|
|
@ -337,31 +341,6 @@ setup_env() {
|
|||
fi
|
||||
}
|
||||
|
||||
# ── License key (optional) ────────────────────────────────────────────────────
|
||||
capture_license_key() {
|
||||
[[ ! -t 0 ]] && return # skip in non-interactive installs (curl | bash)
|
||||
local env_file
|
||||
env_file="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.env"
|
||||
[[ ! -f "$env_file" ]] && return # setup_env() creates it; nothing to write into yet
|
||||
|
||||
echo ""
|
||||
info "License key (optional)"
|
||||
echo -e " Peregrine works without a key for personal self-hosted use."
|
||||
echo -e " Paid-tier users: enter your ${YELLOW}CFG-XXXX-…${NC} key to unlock cloud LLM and integrations."
|
||||
echo ""
|
||||
read -rp " CircuitForge license key [press Enter to skip]: " _key || true
|
||||
if [[ -n "$_key" ]]; then
|
||||
if echo "$_key" | grep -qE '^CFG-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}$'; then
|
||||
_update_env_key "$env_file" "CF_LICENSE_KEY" "$_key"
|
||||
_update_env_key "$env_file" "HEIMDALL_URL" "https://license.circuitforge.tech"
|
||||
success "License key saved — paid-tier features enabled."
|
||||
else
|
||||
warn "Key format looks wrong (expected CFG-XXXX-AAAA-BBBB-CCCC) — skipping."
|
||||
info "Add it manually to .env as CF_LICENSE_KEY= later."
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Model weights storage ───────────────────────────────────────────────────────
|
||||
_update_env_key() {
|
||||
# Portable in-place key=value update for .env files (Linux + macOS).
|
||||
|
|
@ -437,15 +416,8 @@ main() {
|
|||
fi
|
||||
install_ollama_macos
|
||||
setup_env
|
||||
capture_license_key
|
||||
configure_model_paths
|
||||
|
||||
# Read the actual port from .env so next-steps reflects any customisation
|
||||
local _script_dir _port
|
||||
_script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
_port="$(grep -E '^STREAMLIT_PORT=' "$_script_dir/.env" 2>/dev/null | cut -d= -f2-)"
|
||||
_port="${_port:-8502}"
|
||||
|
||||
echo ""
|
||||
success "All dependencies installed."
|
||||
echo ""
|
||||
|
|
@ -457,7 +429,7 @@ main() {
|
|||
else
|
||||
echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (CPU)"
|
||||
fi
|
||||
echo -e " 2. Open ${YELLOW}http://localhost:${_port}${NC} — the setup wizard will guide you"
|
||||
echo -e " 2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
|
||||
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
|
||||
echo ""
|
||||
if groups "$USER" 2>/dev/null | grep -q docker; then
|
||||
|
|
@ -12,7 +12,7 @@ import pytest
|
|||
from dotenv import load_dotenv
|
||||
from playwright.sync_api import Page, BrowserContext
|
||||
|
||||
from tests.e2e.models import ErrorRecord, ModeConfig
|
||||
from tests.e2e.models import ErrorRecord, ModeConfig, diff_errors
|
||||
from tests.e2e.modes.demo import DEMO
|
||||
from tests.e2e.modes.cloud import CLOUD
|
||||
from tests.e2e.modes.local import LOCAL
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ from __future__ import annotations
|
|||
import pytest
|
||||
|
||||
from tests.e2e.conftest import (
|
||||
wait_for_streamlit, screenshot_on_fail,
|
||||
wait_for_streamlit, get_page_errors, screenshot_on_fail,
|
||||
)
|
||||
from tests.e2e.models import diff_errors
|
||||
from tests.e2e.models import ModeConfig, diff_errors
|
||||
from tests.e2e.pages.home_page import HomePage
|
||||
from tests.e2e.pages.job_review_page import JobReviewPage
|
||||
from tests.e2e.pages.apply_page import ApplyPage
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ Run: pytest tests/e2e/test_smoke.py --mode=demo
|
|||
from __future__ import annotations
|
||||
import pytest
|
||||
|
||||
from tests.e2e.conftest import wait_for_streamlit, screenshot_on_fail
|
||||
from tests.e2e.conftest import wait_for_streamlit, get_page_errors, get_console_errors, screenshot_on_fail
|
||||
from tests.e2e.models import ModeConfig
|
||||
from tests.e2e.pages.home_page import HomePage
|
||||
from tests.e2e.pages.job_review_page import JobReviewPage
|
||||
from tests.e2e.pages.apply_page import ApplyPage
|
||||
|
|
|
|||