From e6410498afeaad9d21d7e50e456acf7a0b308e2b Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Wed, 25 Feb 2026 12:05:49 -0800 Subject: [PATCH] =?UTF-8?q?docs:=20mkdocs=20wiki=20=E2=80=94=20installatio?= =?UTF-8?q?n,=20user=20guide,=20developer=20guide,=20reference?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a full MkDocs documentation site under docs/ with Material theme. Getting Started: installation walkthrough, 7-step first-run wizard guide, Docker Compose profile reference with GPU memory guidance and preflight.py description. User Guide: job discovery (search profiles, custom boards, enrichment), job review (sorting, match scores, batch actions), apply workspace (cover letter gen, PDF export, mark applied), interviews (kanban stages, company research auto-trigger, survey assistant), email sync (IMAP, Gmail App Password, classification labels, stage auto-updates), integrations (all 13 drivers with tier requirements), settings (every tab documented). Developer Guide: contributing (dev env setup, code style, branch naming, PR checklist), architecture (ASCII layer diagram, design decisions), adding scrapers (full scrape() interface, registration, search profile config, test patterns), adding integrations (IntegrationBase full interface, auto- discovery, tier gating, test patterns), testing (patterns, fixtures, what not to test). Reference: tier system (full FEATURES table, can_use/tier_label API, dev override, adding gates), LLM router (backend types, complete() signature, fallback chains, vision routing, __auto__ resolution, adding backends), config files (every file with field-level docs and gitignore status). Also adds CONTRIBUTING.md at repo root pointing to the docs site. --- CONTRIBUTING.md | 13 + docs/developer-guide/adding-integrations.md | 249 ++++++++++++++ docs/developer-guide/adding-scrapers.md | 244 ++++++++++++++ docs/developer-guide/architecture.md | 168 ++++++++++ docs/developer-guide/contributing.md | 120 +++++++ docs/developer-guide/testing.md | 181 ++++++++++ docs/getting-started/docker-profiles.md | 118 +++++++ docs/getting-started/first-run-wizard.md | 165 +++++++++ docs/getting-started/installation.md | 134 ++++++++ docs/index.md | 65 ++++ docs/reference/config-files.md | 353 ++++++++++++++++++++ docs/reference/llm-router.md | 231 +++++++++++++ docs/reference/tier-system.md | 159 +++++++++ docs/user-guide/apply-workspace.md | 76 +++++ docs/user-guide/email-sync.md | 119 +++++++ docs/user-guide/integrations.md | 147 ++++++++ docs/user-guide/interviews.md | 96 ++++++ docs/user-guide/job-discovery.md | 123 +++++++ docs/user-guide/job-review.md | 70 ++++ docs/user-guide/settings.md | 152 +++++++++ mkdocs.yml | 67 ++++ 21 files changed, 3050 insertions(+) create mode 100644 CONTRIBUTING.md create mode 100644 docs/developer-guide/adding-integrations.md create mode 100644 docs/developer-guide/adding-scrapers.md create mode 100644 docs/developer-guide/architecture.md create mode 100644 docs/developer-guide/contributing.md create mode 100644 docs/developer-guide/testing.md create mode 100644 docs/getting-started/docker-profiles.md create mode 100644 docs/getting-started/first-run-wizard.md create mode 100644 docs/getting-started/installation.md create mode 100644 docs/index.md create mode 100644 docs/reference/config-files.md create mode 100644 docs/reference/llm-router.md create mode 100644 docs/reference/tier-system.md create mode 100644 docs/user-guide/apply-workspace.md create mode 100644 docs/user-guide/email-sync.md create mode 100644 docs/user-guide/integrations.md create mode 100644 docs/user-guide/interviews.md create mode 100644 docs/user-guide/job-discovery.md create mode 100644 docs/user-guide/job-review.md create mode 100644 docs/user-guide/settings.md create mode 100644 mkdocs.yml diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..8eb2a32 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,13 @@ +# Contributing to Peregrine + +See the full contributing guide in the documentation: +https://docs.circuitforge.io/peregrine/developer-guide/contributing/ + +## Quick start + +1. Fork the repo and create a feature branch (`feat/my-feature`) +2. Set up the dev environment: `conda env create -f environment.yml` +3. Run tests: `conda run -n job-seeker python -m pytest tests/ -v` +4. Open a pull request — all CI checks must pass + +See the docs for: adding custom scrapers, adding integrations, code style, and PR checklist. diff --git a/docs/developer-guide/adding-integrations.md b/docs/developer-guide/adding-integrations.md new file mode 100644 index 0000000..89181b4 --- /dev/null +++ b/docs/developer-guide/adding-integrations.md @@ -0,0 +1,249 @@ +# Adding an Integration + +Peregrine's integration system is auto-discovered — add a class and a config example, and it appears in the wizard and Settings automatically. No registration step is needed. + +--- + +## Step 1 — Create the integration module + +Create `scripts/integrations/myservice.py`: + +```python +# scripts/integrations/myservice.py + +from scripts.integrations.base import IntegrationBase + + +class MyServiceIntegration(IntegrationBase): + name = "myservice" # must be unique; matches config filename + label = "My Service" # display name shown in the UI + tier = "free" # "free" | "paid" | "premium" + + def fields(self) -> list[dict]: + """Return form field definitions for the connection card in the wizard/Settings UI.""" + return [ + { + "key": "api_key", + "label": "API Key", + "type": "password", # "text" | "password" | "url" | "checkbox" + "placeholder": "sk-...", + "required": True, + "help": "Get your key at myservice.com/settings/api", + }, + { + "key": "workspace_id", + "label": "Workspace ID", + "type": "text", + "placeholder": "ws_abc123", + "required": True, + "help": "Found in your workspace URL", + }, + ] + + def connect(self, config: dict) -> bool: + """ + Store credentials in memory. Return True if all required fields are present. + Does NOT verify credentials — call test() for that. + """ + self._api_key = config.get("api_key", "").strip() + self._workspace_id = config.get("workspace_id", "").strip() + return bool(self._api_key and self._workspace_id) + + def test(self) -> bool: + """ + Verify the stored credentials actually work. + Returns True on success, False on any failure. + """ + try: + import requests + r = requests.get( + "https://api.myservice.com/v1/ping", + headers={"Authorization": f"Bearer {self._api_key}"}, + params={"workspace": self._workspace_id}, + timeout=5, + ) + return r.ok + except Exception: + return False + + def sync(self, jobs: list[dict]) -> int: + """ + Optional: push jobs to the external service. + Return the count of successfully synced jobs. + The default implementation in IntegrationBase returns 0 (no-op). + Only override this if your integration supports job syncing + (e.g. Notion, Airtable, Google Sheets). + """ + synced = 0 + for job in jobs: + try: + self._push_job(job) + synced += 1 + except Exception as e: + print(f"[myservice] sync error for job {job.get('id')}: {e}") + return synced + + def _push_job(self, job: dict) -> None: + import requests + requests.post( + "https://api.myservice.com/v1/records", + headers={"Authorization": f"Bearer {self._api_key}"}, + json={ + "workspace": self._workspace_id, + "title": job.get("title", ""), + "company": job.get("company", ""), + "status": job.get("status", "pending"), + "url": job.get("url", ""), + }, + timeout=10, + ).raise_for_status() +``` + +--- + +## Step 2 — Create the config example file + +Create `config/integrations/myservice.yaml.example`: + +```yaml +# config/integrations/myservice.yaml.example +# Copy to config/integrations/myservice.yaml and fill in your credentials. +# This file is gitignored — never commit the live credentials. +api_key: "" +workspace_id: "" +``` + +The live credentials file (`config/integrations/myservice.yaml`) is gitignored automatically via the `config/integrations/` entry in `.gitignore`. + +--- + +## Step 3 — Auto-discovery + +No registration step is needed. The integration registry (`scripts/integrations/__init__.py`) imports all `.py` files in the `integrations/` directory and discovers subclasses of `IntegrationBase` automatically. + +On next startup, `myservice` will appear in: +- The first-run wizard Step 7 (Integrations) +- **Settings → Integrations** with a connection card rendered from `fields()` + +--- + +## Step 4 — Tier-gate new features (optional) + +If you want to gate a specific action (not just the integration itself) behind a tier, add an entry to `app/wizard/tiers.py`: + +```python +FEATURES: dict[str, str] = { + # ...existing entries... + "myservice_sync": "paid", # or "free" | "premium" +} +``` + +Then guard the action in the relevant UI page: + +```python +from app.wizard.tiers import can_use +from scripts.user_profile import UserProfile + +user = UserProfile() +if can_use(user.tier, "myservice_sync"): + # show the sync button +else: + st.info("MyService sync requires a Paid plan.") +``` + +--- + +## Step 5 — Write a test + +Create or add to `tests/test_integrations.py`: + +```python +# tests/test_integrations.py (add to existing file) + +import pytest +from unittest.mock import patch, MagicMock +from pathlib import Path +from scripts.integrations.myservice import MyServiceIntegration + + +def test_fields_returns_required_keys(): + integration = MyServiceIntegration() + fields = integration.fields() + assert len(fields) >= 1 + for field in fields: + assert "key" in field + assert "label" in field + assert "type" in field + assert "required" in field + + +def test_connect_returns_true_with_valid_config(): + integration = MyServiceIntegration() + result = integration.connect({"api_key": "sk-abc", "workspace_id": "ws-123"}) + assert result is True + + +def test_connect_returns_false_with_missing_required_field(): + integration = MyServiceIntegration() + result = integration.connect({"api_key": "", "workspace_id": "ws-123"}) + assert result is False + + +def test_test_returns_true_on_200(tmp_path): + integration = MyServiceIntegration() + integration.connect({"api_key": "sk-abc", "workspace_id": "ws-123"}) + + mock_resp = MagicMock() + mock_resp.ok = True + + with patch("scripts.integrations.myservice.requests.get", return_value=mock_resp): + assert integration.test() is True + + +def test_test_returns_false_on_error(tmp_path): + integration = MyServiceIntegration() + integration.connect({"api_key": "sk-abc", "workspace_id": "ws-123"}) + + with patch("scripts.integrations.myservice.requests.get", side_effect=Exception("timeout")): + assert integration.test() is False + + +def test_is_configured_reflects_file_presence(tmp_path): + config_dir = tmp_path / "config" + config_dir.mkdir() + (config_dir / "integrations").mkdir() + + assert MyServiceIntegration.is_configured(config_dir) is False + + (config_dir / "integrations" / "myservice.yaml").write_text("api_key: sk-abc\n") + assert MyServiceIntegration.is_configured(config_dir) is True +``` + +--- + +## IntegrationBase Reference + +All integrations inherit from `scripts/integrations/base.py`. Here is the full interface: + +| Method / attribute | Required | Description | +|-------------------|----------|-------------| +| `name: str` | Yes | Machine key — must be unique. Matches the YAML config filename. | +| `label: str` | Yes | Human-readable display name for the UI. | +| `tier: str` | Yes | Minimum tier: `"free"`, `"paid"`, or `"premium"`. | +| `fields() -> list[dict]` | Yes | Returns form field definitions. Each dict: `key`, `label`, `type`, `placeholder`, `required`, `help`. | +| `connect(config: dict) -> bool` | Yes | Stores credentials in memory. Returns `True` if required fields are present. Does NOT verify credentials. | +| `test() -> bool` | Yes | Makes a real network call to verify stored credentials. Returns `True` on success. | +| `sync(jobs: list[dict]) -> int` | No | Pushes jobs to the external service. Returns count synced. Default is a no-op returning 0. | +| `config_path(config_dir: Path) -> Path` | Inherited | Returns `config_dir / "integrations" / f"{name}.yaml"`. | +| `is_configured(config_dir: Path) -> bool` | Inherited | Returns `True` if the config YAML file exists. | +| `save_config(config: dict, config_dir: Path)` | Inherited | Writes config dict to the YAML file. Call after `test()` returns `True`. | +| `load_config(config_dir: Path) -> dict` | Inherited | Loads and returns the YAML config, or `{}` if not configured. | + +### Field type values + +| `type` value | UI widget rendered | +|-------------|-------------------| +| `"text"` | Plain text input | +| `"password"` | Password input (masked) | +| `"url"` | URL input | +| `"checkbox"` | Boolean checkbox | diff --git a/docs/developer-guide/adding-scrapers.md b/docs/developer-guide/adding-scrapers.md new file mode 100644 index 0000000..0aba019 --- /dev/null +++ b/docs/developer-guide/adding-scrapers.md @@ -0,0 +1,244 @@ +# Adding a Custom Job Board Scraper + +Peregrine supports pluggable custom job board scrapers. Standard boards use the JobSpy library. Custom scrapers handle boards with non-standard APIs, paywalls, or SSR-rendered pages. + +This guide walks through adding a new scraper from scratch. + +--- + +## Step 1 — Create the scraper module + +Create `scripts/custom_boards/myboard.py`. Every custom scraper must implement one function: + +```python +# scripts/custom_boards/myboard.py + +def scrape(profile: dict, db_path: str) -> list[dict]: + """ + Scrape job listings from MyBoard for the given search profile. + + Args: + profile: The active search profile dict from search_profiles.yaml. + Keys include: titles (list), locations (list), + hours_old (int), results_per_board (int). + db_path: Absolute path to staging.db. Use this if you need to + check for existing URLs before returning. + + Returns: + List of job dicts. Each dict must contain at minimum: + title (str) — job title + company (str) — company name + url (str) — canonical job URL (used as unique key) + source (str) — board identifier, e.g. "myboard" + location (str) — "Remote" or "City, State" + is_remote (bool) — True if remote + salary (str) — salary string or "" if unknown + description (str) — full job description text or "" if unavailable + date_found (str) — ISO 8601 datetime string, e.g. "2026-02-25T12:00:00" + """ + jobs = [] + + for title in profile.get("titles", []): + for location in profile.get("locations", []): + results = _fetch_from_myboard(title, location, profile) + jobs.extend(results) + + return jobs + + +def _fetch_from_myboard(title: str, location: str, profile: dict) -> list[dict]: + """Internal helper — call the board's API and transform results.""" + import requests + from datetime import datetime + + params = { + "q": title, + "l": location, + "limit": profile.get("results_per_board", 50), + } + + try: + resp = requests.get( + "https://api.myboard.com/jobs", + params=params, + timeout=15, + ) + resp.raise_for_status() + data = resp.json() + except Exception as e: + print(f"[myboard] fetch error: {e}") + return [] + + jobs = [] + for item in data.get("results", []): + jobs.append({ + "title": item.get("title", ""), + "company": item.get("company", ""), + "url": item.get("url", ""), + "source": "myboard", + "location": item.get("location", ""), + "is_remote": "remote" in item.get("location", "").lower(), + "salary": item.get("salary", ""), + "description": item.get("description", ""), + "date_found": datetime.utcnow().isoformat(), + }) + + return jobs +``` + +### Required fields + +| Field | Type | Notes | +|-------|------|-------| +| `title` | str | Job title | +| `company` | str | Company name | +| `url` | str | **Unique key** — must be stable and canonical | +| `source` | str | Short board identifier, e.g. `"myboard"` | +| `location` | str | `"Remote"` or `"City, ST"` | +| `is_remote` | bool | `True` if remote | +| `salary` | str | Salary string or `""` | +| `description` | str | Full description text or `""` | +| `date_found` | str | ISO 8601 UTC datetime | + +### Deduplication + +`discover.py` deduplicates by `url` before inserting into the database. If a job with the same URL already exists, it is silently skipped. You do not need to handle deduplication inside your scraper. + +### Rate limiting + +Be a good citizen: +- Add a `time.sleep(0.5)` between paginated requests +- Respect `Retry-After` headers +- Do not scrape faster than a human browsing the site +- If the site provides an official API, prefer that over scraping HTML + +### Credentials + +If your scraper requires API keys or credentials: +- Create `config/myboard.yaml.example` as a template +- Create `config/myboard.yaml` (gitignored) for live credentials +- Read it in your scraper with `yaml.safe_load(open("config/myboard.yaml"))` +- Document the credential setup in comments at the top of your module + +--- + +## Step 2 — Register the scraper + +Open `scripts/discover.py` and add your scraper to the `CUSTOM_SCRAPERS` dict: + +```python +from scripts.custom_boards import adzuna, theladders, craigslist, myboard + +CUSTOM_SCRAPERS = { + "adzuna": adzuna.scrape, + "theladders": theladders.scrape, + "craigslist": craigslist.scrape, + "myboard": myboard.scrape, # add this line +} +``` + +--- + +## Step 3 — Activate in a search profile + +Open `config/search_profiles.yaml` and add `myboard` to `custom_boards` in any profile: + +```yaml +profiles: + - name: cs_leadership + boards: + - linkedin + - indeed + custom_boards: + - adzuna + - myboard # add this line + titles: + - Customer Success Manager + locations: + - Remote +``` + +--- + +## Step 4 — Write a test + +Create `tests/test_myboard.py`. Mock the HTTP call to avoid hitting the live API during tests: + +```python +# tests/test_myboard.py + +from unittest.mock import patch +from scripts.custom_boards.myboard import scrape + +MOCK_RESPONSE = { + "results": [ + { + "title": "Customer Success Manager", + "company": "Acme Corp", + "url": "https://myboard.com/jobs/12345", + "location": "Remote", + "salary": "$80,000 - $100,000", + "description": "We are looking for a CSM...", + } + ] +} + +def test_scrape_returns_correct_shape(): + profile = { + "titles": ["Customer Success Manager"], + "locations": ["Remote"], + "results_per_board": 10, + "hours_old": 240, + } + + with patch("scripts.custom_boards.myboard.requests.get") as mock_get: + mock_get.return_value.ok = True + mock_get.return_value.raise_for_status = lambda: None + mock_get.return_value.json.return_value = MOCK_RESPONSE + + jobs = scrape(profile, db_path="nonexistent.db") + + assert len(jobs) == 1 + job = jobs[0] + + # Required fields + assert "title" in job + assert "company" in job + assert "url" in job + assert "source" in job + assert "location" in job + assert "is_remote" in job + assert "salary" in job + assert "description" in job + assert "date_found" in job + + assert job["source"] == "myboard" + assert job["title"] == "Customer Success Manager" + assert job["url"] == "https://myboard.com/jobs/12345" + + +def test_scrape_handles_http_error_gracefully(): + profile = { + "titles": ["Customer Success Manager"], + "locations": ["Remote"], + "results_per_board": 10, + "hours_old": 240, + } + + with patch("scripts.custom_boards.myboard.requests.get") as mock_get: + mock_get.side_effect = Exception("Connection refused") + + jobs = scrape(profile, db_path="nonexistent.db") + + assert jobs == [] +``` + +--- + +## Existing Scrapers as Reference + +| Scraper | Notes | +|---------|-------| +| `scripts/custom_boards/adzuna.py` | REST API with `app_id` + `app_key` authentication | +| `scripts/custom_boards/theladders.py` | SSR scraper using `curl_cffi` to parse `__NEXT_DATA__` JSON embedded in the page | +| `scripts/custom_boards/craigslist.py` | RSS feed scraper | diff --git a/docs/developer-guide/architecture.md b/docs/developer-guide/architecture.md new file mode 100644 index 0000000..e6c1e22 --- /dev/null +++ b/docs/developer-guide/architecture.md @@ -0,0 +1,168 @@ +# Architecture + +This page describes Peregrine's system structure, layer boundaries, and key design decisions. + +--- + +## System Overview + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Docker Compose │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌───────┐ ┌───────────────┐ │ +│ │ app │ │ ollama │ │ vllm │ │ vision │ │ +│ │ :8501 │ │ :11434 │ │ :8000 │ │ :8002 │ │ +│ │Streamlit │ │ Local LLM│ │ vLLM │ │ Moondream2 │ │ +│ └────┬─────┘ └──────────┘ └───────┘ └───────────────┘ │ +│ │ │ +│ ┌────┴───────┐ ┌─────────────┐ │ +│ │ searxng │ │ staging.db │ │ +│ │ :8888 │ │ (SQLite) │ │ +│ └────────────┘ └─────────────┘ │ +└─────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────┐ +│ Streamlit App Layer │ +│ │ +│ app/app.py (entry point, navigation, sidebar task badge) │ +│ │ +│ app/pages/ │ +│ 0_Setup.py First-run wizard (gates everything) │ +│ 1_Job_Review.py Approve / reject queue │ +│ 2_Settings.py All user configuration │ +│ 4_Apply.py Cover letter gen + PDF export │ +│ 5_Interviews.py Kanban: phone_screen → hired │ +│ 6_Interview_Prep.py Research brief + practice Q&A │ +│ 7_Survey.py Culture-fit survey assistant │ +│ │ +│ app/wizard/ │ +│ step_hardware.py ... step_integrations.py │ +│ tiers.py Feature gate definitions │ +└─────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────┐ +│ Scripts Layer │ +│ (framework-independent — could be called by FastAPI) │ +│ │ +│ discover.py JobSpy + custom board orchestration │ +│ match.py Resume keyword scoring │ +│ db.py All SQLite helpers (single source) │ +│ llm_router.py LLM fallback chain │ +│ generate_cover_letter.py Cover letter generation │ +│ company_research.py Pre-interview research brief │ +│ task_runner.py Background daemon thread executor │ +│ imap_sync.py IMAP email fetch + classify │ +│ sync.py Push to external integrations │ +│ user_profile.py UserProfile wrapper for user.yaml │ +│ preflight.py Port + resource check │ +│ │ +│ custom_boards/ Per-board scrapers │ +│ integrations/ Per-service integration drivers │ +│ vision_service/ FastAPI Moondream2 inference server │ +└─────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────┐ +│ Config Layer │ +│ │ +│ config/user.yaml Personal data + wizard state │ +│ config/llm.yaml LLM backends + fallback chains │ +│ config/search_profiles.yaml Job search configuration │ +│ config/resume_keywords.yaml Scoring keywords │ +│ config/blocklist.yaml Excluded companies/domains │ +│ config/email.yaml IMAP credentials │ +│ config/integrations/ Per-integration credentials │ +└─────────────────────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────────────────────┐ +│ Database Layer │ +│ │ +│ staging.db (SQLite, local, gitignored) │ +│ │ +│ jobs Core pipeline — all job data │ +│ job_contacts Email thread log per job │ +│ company_research LLM-generated research briefs │ +│ background_tasks Async task queue state │ +│ survey_responses Culture-fit survey Q&A pairs │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Layer Boundaries + +### App layer (app/) + +The Streamlit UI layer. Its only responsibilities are: + +- Reading from `scripts/db.py` helpers +- Calling `scripts/` functions directly or via `task_runner.submit_task()` +- Rendering results to the browser + +The app layer does not contain business logic. Database queries, LLM calls, and integrations all live in `scripts/`. + +### Scripts layer (scripts/) + +This is the stable public API of Peregrine. Scripts are designed to be framework-independent — they do not import Streamlit and can be called from a CLI, FastAPI endpoint, or background thread without modification. + +All personal data access goes through `scripts/user_profile.py` (`UserProfile` class). Scripts never read `config/user.yaml` directly. + +All database access goes through `scripts/db.py`. No script does raw SQLite outside of `db.py`. + +### Config layer (config/) + +Plain YAML files. Gitignored files contain secrets; `.example` files are committed as templates. + +--- + +## Background Tasks + +`scripts/task_runner.py` provides a simple background thread executor for long-running LLM tasks. + +```python +from scripts.task_runner import submit_task + +# Queue a cover letter generation task +submit_task(db_path, task_type="cover_letter", job_id=42) + +# Queue a company research task +submit_task(db_path, task_type="company_research", job_id=42) +``` + +Tasks are recorded in the `background_tasks` table with statuses: `queued → running → completed / failed`. + +**Dedup rule:** Only one `queued` or `running` task per `(task_type, job_id)` pair is allowed at a time. Submitting a duplicate is a silent no-op. + +**On startup:** `app/app.py` resets any `running` or `queued` rows to `failed` to clear tasks that were interrupted by a server restart. + +**Sidebar indicator:** `app/app.py` polls the `background_tasks` table every 3 seconds via a Streamlit fragment and displays a badge in the sidebar. + +--- + +## LLM Router + +`scripts/llm_router.py` provides a single `complete()` call that tries backends in priority order and falls back transparently. See [LLM Router](../reference/llm-router.md) for full documentation. + +--- + +## Key Design Decisions + +### scripts/ is framework-independent + +The scripts layer was deliberately kept free of Streamlit imports. This means the full pipeline can be migrated to a FastAPI or Celery backend without rewriting business logic. + +### All personal data via UserProfile + +`scripts/user_profile.py` is the single source of truth for all user data. This makes it easy to swap the storage backend (e.g. from YAML to a database) without touching every script. + +### SQLite as staging layer + +`staging.db` acts as the staging layer between discovery and external integrations. This lets discovery, matching, and the UI all run independently without network dependencies. External integrations (Notion, Airtable, etc.) are push-only and optional. + +### Tier system in app/wizard/tiers.py + +`FEATURES` is a single dict that maps feature key → minimum tier. `can_use(tier, feature)` is the single gating function. New features are added to `FEATURES` in one place. + +### Vision service is a separate process + +Moondream2 requires `torch` and `transformers`, which are incompatible with the lightweight main conda environment. The vision service runs as a separate FastAPI process in a separate conda environment (`job-seeker-vision`), keeping the main env free of GPU dependencies. diff --git a/docs/developer-guide/contributing.md b/docs/developer-guide/contributing.md new file mode 100644 index 0000000..d160182 --- /dev/null +++ b/docs/developer-guide/contributing.md @@ -0,0 +1,120 @@ +# Contributing + +Thank you for your interest in contributing to Peregrine. This guide covers the development environment, code standards, test requirements, and pull request process. + +!!! note "License" + Peregrine uses a dual licence. The discovery pipeline (`scripts/discover.py`, `scripts/match.py`, `scripts/db.py`, `scripts/custom_boards/`) is MIT. All AI features, the UI, and everything else is BSL 1.1. + Do not add `Co-Authored-By:` trailers or AI-attribution notices to commits — this is a commercial repository. + +--- + +## Fork and Clone + +```bash +git clone https://git.circuitforge.io/circuitforge/peregrine +cd peregrine +``` + +Create a feature branch from `main`: + +```bash +git checkout -b feat/my-feature +``` + +--- + +## Dev Environment Setup + +Peregrine's Python dependencies are managed with conda. The same `job-seeker` environment is used for both the legacy personal app and Peregrine. + +```bash +# Create the environment from the lockfile +conda env create -f environment.yml + +# Activate +conda activate job-seeker +``` + +Alternatively, install from `requirements.txt` into an existing Python 3.12 environment: + +```bash +pip install -r requirements.txt +``` + +!!! warning "Keep the env lightweight" + Do not add `torch`, `sentence-transformers`, `bitsandbytes`, `transformers`, or any other CUDA/GPU package to the main environment. These live in separate conda environments (`job-seeker-vision` for the vision service, `ogma` for fine-tuning). Adding them to the main env causes out-of-memory failures during test runs. + +--- + +## Running Tests + +```bash +conda run -n job-seeker python -m pytest tests/ -v +``` + +Or with the direct binary (avoids runaway process spawning): + +```bash +/path/to/miniconda3/envs/job-seeker/bin/pytest tests/ -v +``` + +The `pytest.ini` file scopes collection to the `tests/` directory only — do not widen this. + +All tests must pass before submitting a PR. See [Testing](testing.md) for patterns and conventions. + +--- + +## Code Style + +- **PEP 8** for all Python code — use `flake8` or `ruff` to check +- **Type hints preferred** on function signatures — not required but strongly encouraged +- **Docstrings** on all public functions and classes +- **No print statements** in library code (`scripts/`); use Python's `logging` module or return status in the return value. `print` is acceptable in one-off scripts and `discover.py`-style entry points. + +--- + +## Branch Naming + +| Prefix | Use for | +|--------|---------| +| `feat/` | New features | +| `fix/` | Bug fixes | +| `docs/` | Documentation only | +| `refactor/` | Code reorganisation without behaviour change | +| `test/` | Test additions or corrections | +| `chore/` | Dependency updates, CI, tooling | + +Example: `feat/add-greenhouse-scraper`, `fix/email-imap-timeout`, `docs/add-integration-guide` + +--- + +## PR Checklist + +Before opening a pull request: + +- [ ] All tests pass: `conda run -n job-seeker python -m pytest tests/ -v` +- [ ] New behaviour is covered by at least one test +- [ ] No new dependencies added to `environment.yml` or `requirements.txt` without a clear justification in the PR description +- [ ] Documentation updated if the PR changes user-visible behaviour (update the relevant page in `docs/`) +- [ ] Config file changes are reflected in the `.example` file +- [ ] No secrets, tokens, or personal data in any committed file +- [ ] Gitignored files (`config/*.yaml`, `staging.db`, `aihawk/`, `.env`) are not committed + +--- + +## What NOT to Do + +- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored +- Do not commit `staging.db` +- Do not add `torch`, `bitsandbytes`, `transformers`, or `sentence-transformers` to the main environment +- Do not add `Co-Authored-By:` or AI-attribution lines to commit messages +- Do not force-push to `main` + +--- + +## Getting Help + +Open an issue on the repository with the `question` label. Include: +- Your OS and Docker version +- The `inference_profile` from your `config/user.yaml` +- Relevant log output from `make logs` diff --git a/docs/developer-guide/testing.md b/docs/developer-guide/testing.md new file mode 100644 index 0000000..18a66f7 --- /dev/null +++ b/docs/developer-guide/testing.md @@ -0,0 +1,181 @@ +# Testing + +Peregrine has a test suite covering the core scripts layer, LLM router, integrations, wizard steps, and database helpers. + +--- + +## Running the Test Suite + +```bash +conda run -n job-seeker python -m pytest tests/ -v +``` + +Or using the direct binary (recommended to avoid runaway process spawning): + +```bash +/path/to/miniconda3/envs/job-seeker/bin/pytest tests/ -v +``` + +`pytest.ini` scopes test collection to `tests/` only: + +```ini +[pytest] +testpaths = tests +``` + +Do not widen this — the `aihawk/` subtree has its own test files that pull in GPU dependencies. + +--- + +## What Is Covered + +The suite currently has approximately 219 tests covering: + +| Module | What is tested | +|--------|---------------| +| `scripts/db.py` | CRUD helpers, status transitions, dedup logic | +| `scripts/llm_router.py` | Fallback chain, backend selection, vision routing, error handling | +| `scripts/match.py` | Keyword scoring, gap calculation | +| `scripts/imap_sync.py` | Email parsing, classification label mapping | +| `scripts/company_research.py` | Prompt construction, output parsing | +| `scripts/generate_cover_letter.py` | Mission alignment detection, prompt injection | +| `scripts/task_runner.py` | Task submission, dedup, status transitions | +| `scripts/user_profile.py` | Accessor methods, defaults, YAML round-trip | +| `scripts/integrations/` | Base class contract, per-driver `fields()` and `connect()` | +| `app/wizard/tiers.py` | `can_use()`, `tier_label()`, edge cases | +| `scripts/custom_boards/` | Scraper return shape, HTTP error handling | + +--- + +## Test Structure + +Tests live in `tests/`. File naming mirrors the module being tested: + +``` +tests/ + test_db.py + test_llm_router.py + test_match.py + test_imap_sync.py + test_company_research.py + test_cover_letter.py + test_task_runner.py + test_user_profile.py + test_integrations.py + test_tiers.py + test_adzuna.py + test_theladders.py +``` + +--- + +## Key Patterns + +### tmp_path for YAML files + +Use pytest's built-in `tmp_path` fixture for any test that reads or writes YAML config files: + +```python +def test_user_profile_reads_name(tmp_path): + config = tmp_path / "user.yaml" + config.write_text("name: Alice\nemail: alice@example.com\n") + + from scripts.user_profile import UserProfile + profile = UserProfile(config_path=config) + assert profile.name == "Alice" +``` + +### Mocking LLM calls + +Never make real LLM calls in tests. Patch `LLMRouter.complete`: + +```python +from unittest.mock import patch + +def test_cover_letter_calls_llm(tmp_path): + with patch("scripts.generate_cover_letter.LLMRouter") as MockRouter: + MockRouter.return_value.complete.return_value = "Dear Hiring Manager,\n..." + from scripts.generate_cover_letter import generate + result = generate(job={...}, user_profile={...}) + + assert "Dear Hiring Manager" in result + MockRouter.return_value.complete.assert_called_once() +``` + +### Mocking HTTP in scraper tests + +```python +from unittest.mock import patch + +def test_adzuna_returns_jobs(): + with patch("scripts.custom_boards.adzuna.requests.get") as mock_get: + mock_get.return_value.ok = True + mock_get.return_value.raise_for_status = lambda: None + mock_get.return_value.json.return_value = {"results": [...]} + + from scripts.custom_boards.adzuna import scrape + jobs = scrape(profile={...}, db_path="nonexistent.db") + + assert len(jobs) > 0 +``` + +### In-memory SQLite for DB tests + +```python +import sqlite3, tempfile, os + +def test_insert_job(): + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + db_path = f.name + try: + from scripts.db import init_db, insert_job + init_db(db_path) + insert_job(db_path, title="CSM", company="Acme", url="https://example.com/1", ...) + # assert... + finally: + os.unlink(db_path) +``` + +--- + +## What NOT to Test + +- **Streamlit widget rendering** — Streamlit has no headless test support. Do not try to test `st.button()` or `st.text_input()` calls. Test the underlying script functions instead. +- **Real network calls** — always mock HTTP and LLM clients +- **Real GPU inference** — mock the vision service and LLM router + +--- + +## Adding Tests for New Code + +### New scraper + +Create `tests/test_myboard.py`. Required test cases: +1. Happy path: mock HTTP returns valid data → correct job dict shape +2. HTTP error: mock raises `Exception` → function returns `[]` (does not raise) +3. Empty results: API returns `{"results": []}` → function returns `[]` + +### New integration + +Add to `tests/test_integrations.py`. Required test cases: +1. `fields()` returns list of dicts with required keys +2. `connect()` returns `True` with valid config, `False` with missing required field +3. `test()` returns `True` with mocked successful HTTP, `False` with exception +4. `is_configured()` reflects file presence in `tmp_path` + +### New wizard step + +Add to `tests/test_wizard_steps.py`. Test the step's pure-logic functions (validation, data extraction). Do not test the Streamlit rendering. + +### New tier feature gate + +Add to `tests/test_tiers.py`: + +```python +from app.wizard.tiers import can_use + +def test_my_new_feature_requires_paid(): + assert can_use("free", "my_new_feature") is False + assert can_use("paid", "my_new_feature") is True + assert can_use("premium", "my_new_feature") is True +``` diff --git a/docs/getting-started/docker-profiles.md b/docs/getting-started/docker-profiles.md new file mode 100644 index 0000000..347c9a6 --- /dev/null +++ b/docs/getting-started/docker-profiles.md @@ -0,0 +1,118 @@ +# Docker Profiles + +Peregrine uses Docker Compose profiles to start only the services your hardware can support. Choose a profile with `make start PROFILE=`. + +--- + +## Profile Reference + +| Profile | Services started | Use case | +|---------|----------------|----------| +| `remote` | `app`, `searxng` | No GPU. LLM calls go to an external API (Anthropic, OpenAI-compatible). | +| `cpu` | `app`, `ollama`, `searxng` | No GPU. Runs local models on CPU — functional but slow. | +| `single-gpu` | `app`, `ollama`, `vision`, `searxng` | One NVIDIA GPU. Covers cover letters, research, and vision (survey screenshots). | +| `dual-gpu` | `app`, `ollama`, `vllm`, `vision`, `searxng` | Two NVIDIA GPUs. GPU 0 = Ollama (cover letters), GPU 1 = vLLM (research). | + +--- + +## Service Descriptions + +| Service | Image / Source | Port | Purpose | +|---------|---------------|------|---------| +| `app` | `Dockerfile` (Streamlit) | 8501 | The main Peregrine UI | +| `ollama` | `ollama/ollama` | 11434 | Local model inference — cover letters and general tasks | +| `vllm` | `vllm/vllm-openai` | 8000 | High-throughput local inference — research tasks | +| `vision` | `scripts/vision_service/` | 8002 | Moondream2 — survey screenshot analysis | +| `searxng` | `searxng/searxng` | 8888 | Private meta-search engine — company research web scraping | + +--- + +## Choosing a Profile + +### remote + +Use `remote` if: +- You have no NVIDIA GPU +- You plan to use Anthropic Claude or another API-hosted model exclusively +- You want the fastest startup (only two containers) + +You must configure at least one external LLM backend in **Settings → LLM Backends**. + +### cpu + +Use `cpu` if: +- You have no GPU but want to run models locally (e.g. for privacy) +- Acceptable for light use; cover letter generation may take several minutes per request + +Pull a model after the container starts: + +```bash +docker exec -it peregrine-ollama-1 ollama pull llama3.1:8b +``` + +### single-gpu + +Use `single-gpu` if: +- You have one NVIDIA GPU with at least 8 GB VRAM +- Recommended for most single-user installs +- The vision service (Moondream2) starts on the same GPU using 4-bit quantisation (~1.5 GB VRAM) + +### dual-gpu + +Use `dual-gpu` if: +- You have two or more NVIDIA GPUs +- GPU 0 handles Ollama (cover letters, quick tasks) +- GPU 1 handles vLLM (research, long-context tasks) +- The vision service shares GPU 0 with Ollama + +--- + +## GPU Memory Guidance + +| GPU VRAM | Recommended profile | Notes | +|----------|-------------------|-------| +| < 4 GB | `cpu` | GPU too small for practical model loading | +| 4–8 GB | `single-gpu` | Run smaller models (3B–8B parameters) | +| 8–16 GB | `single-gpu` | Run 8B–13B models comfortably | +| 16–24 GB | `single-gpu` | Run 13B–34B models | +| 24 GB+ | `single-gpu` or `dual-gpu` | 70B models with quantisation | + +--- + +## How preflight.py Works + +`make start` calls `scripts/preflight.py` before launching Docker. Preflight does the following: + +1. **Port conflict detection** — checks whether `STREAMLIT_PORT`, `OLLAMA_PORT`, `VLLM_PORT`, `SEARXNG_PORT`, and `VISION_PORT` are already in use. Reports any conflicts and suggests alternatives. + +2. **GPU enumeration** — queries `nvidia-smi` for GPU count and VRAM per card. + +3. **RAM check** — reads `/proc/meminfo` (Linux) or `vm_stat` (macOS) to determine available system RAM. + +4. **KV cache offload** — if GPU VRAM is less than 10 GB, preflight calculates `CPU_OFFLOAD_GB` (the amount of KV cache to spill to system RAM) and writes it to `.env`. The vLLM container picks this up via `--cpu-offload-gb`. + +5. **Profile recommendation** — writes `RECOMMENDED_PROFILE` to `.env`. This is informational; `make start` uses the `PROFILE` variable you specify (defaulting to `remote`). + +You can run preflight independently: + +```bash +make preflight +# or +python scripts/preflight.py +``` + +--- + +## Customising Ports + +Edit `.env` before running `make start`: + +```bash +STREAMLIT_PORT=8501 +OLLAMA_PORT=11434 +VLLM_PORT=8000 +SEARXNG_PORT=8888 +VISION_PORT=8002 +``` + +All containers read from `.env` via the `env_file` directive in `compose.yml`. diff --git a/docs/getting-started/first-run-wizard.md b/docs/getting-started/first-run-wizard.md new file mode 100644 index 0000000..aaa413c --- /dev/null +++ b/docs/getting-started/first-run-wizard.md @@ -0,0 +1,165 @@ +# First-Run Wizard + +When you open Peregrine for the first time, the setup wizard launches automatically. It walks through seven steps and saves your progress after each one — if your browser closes or the server restarts, it resumes where you left off. + +--- + +## Step 1 — Hardware + +Peregrine detects NVIDIA GPUs using `nvidia-smi` and reports: + +- Number of GPUs found +- VRAM per GPU +- Available system RAM + +Based on this, it recommends a Docker Compose profile: + +| Recommendation | Condition | +|---------------|-----------| +| `remote` | No GPU detected | +| `cpu` | GPU detected but VRAM < 4 GB | +| `single-gpu` | One GPU with VRAM >= 4 GB | +| `dual-gpu` | Two or more GPUs | + +You can override the recommendation and select any profile manually. The selection is written to `config/user.yaml` as `inference_profile`. + +--- + +## Step 2 — Tier + +Select your Peregrine tier: + +| Tier | Description | +|------|-------------| +| **Free** | Job discovery, matching, and basic pipeline — no LLM features | +| **Paid** | Adds cover letters, company research, email sync, integrations, and all AI features | +| **Premium** | Adds fine-tuning and multi-user support | + +Your tier is written to `config/user.yaml` as `tier`. + +**Dev tier override** — for local testing without a paid licence, set `dev_tier_override: premium` in `config/user.yaml`. This is for development use only and has no effect on production deployments. + +See [Tier System](../reference/tier-system.md) for the full feature gate table. + +--- + +## Step 3 — Identity + +Enter your personal details. These are stored locally in `config/user.yaml` and used to personalise cover letters and research briefs. + +| Field | Description | +|-------|-------------| +| Name | Your full name | +| Email | Primary contact email | +| Phone | Contact phone number | +| LinkedIn | LinkedIn profile URL | +| Career summary | 2–4 sentence professional summary — used in cover letters and interview prep | + +**LLM-assisted writing (Paid):** If you have a paid tier, the wizard offers to generate your career summary from a few bullet points using your configured LLM backend. + +--- + +## Step 4 — Resume + +Two paths are available: + +### Upload PDF or DOCX + +Upload your existing resume. The LLM parses it and extracts: +- Work experience (employer, title, dates, bullets) +- Education +- Skills +- Certifications + +The extracted data is stored in `config/user.yaml` and used when generating cover letters. + +### Guided form builder + +Fill in each section manually using structured form fields. Useful if you do not have a digital resume file ready, or if the parser misses something important. + +Both paths produce the same data structure. You can mix them — upload first, then edit the result in the form. + +--- + +## Step 5 — Inference + +Configure which LLM backends Peregrine uses. Backends are tried in priority order; if the first fails, Peregrine falls back to the next. + +Available backend types: + +| Type | Examples | Notes | +|------|---------|-------| +| `openai_compat` | Ollama, vLLM, Claude Code wrapper, Copilot wrapper | Any OpenAI-compatible API | +| `anthropic` | Claude via Anthropic API | Requires `ANTHROPIC_API_KEY` env var | +| `vision_service` | Moondream2 local service | Used for survey screenshot analysis only | + +For each backend you want to enable: + +1. Enter the base URL (e.g. `http://localhost:11434/v1` for Ollama) +2. Enter an API key if required (Anthropic, OpenAI) +3. Click **Test** — Peregrine pings the `/health` endpoint and attempts a short completion + +The full backend configuration is written to `config/llm.yaml`. You can edit it directly later via **Settings → LLM Backends**. + +!!! tip "Recommended minimum" + Enable at least Ollama with a general-purpose model (e.g. `llama3.1:8b`) for research tasks, and either Ollama or Anthropic for cover letter generation. The wizard will not block you if no backend is configured, but most features will not work. + +--- + +## Step 6 — Search + +Define what jobs to look for. Search configuration is written to `config/search_profiles.yaml`. + +| Field | Description | +|-------|-------------| +| Profile name | A label for this search profile (e.g. `cs_leadership`) | +| Job titles | List of titles to search for (e.g. `Customer Success Manager`, `TAM`) | +| Locations | City/region strings or `Remote` | +| Boards | Standard boards: `linkedin`, `indeed`, `glassdoor`, `zip_recruiter`, `google` | +| Custom boards | Additional scrapers: `adzuna`, `theladders`, `craigslist` | +| Exclude keywords | Jobs containing these words in the title are dropped | +| Results per board | Max jobs to fetch per board per run | +| Hours old | Only fetch jobs posted within this many hours | + +You can create multiple profiles (e.g. one for remote roles, one for a target industry). Run them all from the Home page or run a specific one. + +--- + +## Step 7 — Integrations + +Connect optional external services. All integrations are optional — skip this step if you want to use Peregrine without external accounts. + +Available integrations: + +**Job tracking (Paid):** Notion, Airtable, Google Sheets + +**Document storage (Free):** Google Drive, Dropbox, OneDrive, MEGA, Nextcloud + +**Calendar (Paid):** Google Calendar, Apple Calendar (CalDAV) + +**Notifications (Paid for Slack; Free for Discord and Home Assistant):** Slack, Discord, Home Assistant + +Each integration has a connection card with the required credentials. Click **Test** to verify the connection before saving. Credentials are written to `config/integrations/.yaml` (gitignored). + +See [Integrations](../user-guide/integrations.md) for per-service details. + +--- + +## Crash Recovery + +The wizard saves your progress to `config/user.yaml` after each step is completed (`wizard_step` field). If anything goes wrong: + +- Restart Peregrine and navigate to http://localhost:8501 +- The wizard resumes at the last completed step + +--- + +## Re-entering the Wizard + +To go through the wizard again (e.g. to change your search profile or swap LLM backends): + +1. Open **Settings** +2. Go to the **Developer** tab +3. Click **Reset wizard** + +This sets `wizard_complete: false` and `wizard_step: 0` in `config/user.yaml`. Your previously entered data is preserved as defaults. diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..bb106b7 --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,134 @@ +# Installation + +This page walks through a full Peregrine installation from scratch. + +--- + +## Prerequisites + +- **Git** — to clone the repository +- **Internet connection** — `setup.sh` downloads Docker and other dependencies +- **Operating system**: Ubuntu/Debian, Fedora/RHEL, Arch Linux, or macOS (with Docker Desktop) + +!!! warning "Windows" + Windows is not supported. Use [WSL2 with Ubuntu](https://docs.microsoft.com/windows/wsl/install) instead. + +--- + +## Step 1 — Clone the repository + +```bash +git clone https://git.circuitforge.io/circuitforge/peregrine +cd peregrine +``` + +--- + +## Step 2 — Run setup.sh + +```bash +bash setup.sh +``` + +`setup.sh` performs the following automatically: + +1. **Detects your platform** (Ubuntu/Debian, Fedora/RHEL, Arch, macOS) +2. **Installs Git** if not already present +3. **Installs Docker Engine** and the Docker Compose v2 plugin via the official Docker repositories +4. **Adds your user to the `docker` group** so you do not need `sudo` for docker commands (Linux only — log out and back in after this) +5. **Detects NVIDIA GPUs** — if `nvidia-smi` is present and working, installs the NVIDIA Container Toolkit and configures Docker to use it +6. **Creates `.env` from `.env.example`** — edit `.env` to customise ports and model storage paths before starting + +!!! note "macOS" + `setup.sh` installs Docker Desktop via Homebrew (`brew install --cask docker`) then exits. Open Docker Desktop, start it, then re-run the script. + +!!! note "GPU requirement" + For GPU support, `nvidia-smi` must return output before you run `setup.sh`. Install your NVIDIA driver first. The Container Toolkit installation will fail silently if the driver is not present. + +--- + +## Step 3 — (Optional) Edit .env + +The `.env` file controls ports and volume mount paths. The defaults work for most single-user installs: + +```bash +# Default ports +STREAMLIT_PORT=8501 +OLLAMA_PORT=11434 +VLLM_PORT=8000 +SEARXNG_PORT=8888 +VISION_PORT=8002 +``` + +Change `STREAMLIT_PORT` if 8501 is taken on your machine. + +--- + +## Step 4 — Start Peregrine + +Choose a profile based on your hardware: + +```bash +make start # remote — no GPU, use API-only LLMs +make start PROFILE=cpu # cpu — local models on CPU (slow) +make start PROFILE=single-gpu # single-gpu — one NVIDIA GPU +make start PROFILE=dual-gpu # dual-gpu — GPU 0 = Ollama, GPU 1 = vLLM +``` + +`make start` runs `preflight.py` first, which checks for port conflicts and writes GPU/RAM recommendations back to `.env`. Then it calls `docker compose --profile up -d`. + +--- + +## Step 5 — Open the UI + +Navigate to **http://localhost:8501** (or whatever `STREAMLIT_PORT` you set). + +The first-run wizard launches automatically. See [First-Run Wizard](first-run-wizard.md) for a step-by-step guide through all seven steps. + +--- + +## Supported Platforms + +| Platform | Tested | Notes | +|----------|--------|-------| +| Ubuntu 22.04 / 24.04 | Yes | Primary target | +| Debian 12 | Yes | | +| Fedora 39/40 | Yes | | +| RHEL / Rocky / AlmaLinux | Yes | | +| Arch Linux / Manjaro | Yes | | +| macOS (Apple Silicon) | Yes | Docker Desktop required; no GPU support | +| macOS (Intel) | Yes | Docker Desktop required; no GPU support | +| Windows | No | Use WSL2 with Ubuntu | + +--- + +## GPU Support + +Only NVIDIA GPUs are supported. AMD ROCm is not currently supported. + +Requirements: +- NVIDIA driver installed and `nvidia-smi` working before running `setup.sh` +- CUDA 12.x recommended (CUDA 11.x may work but is untested) +- Minimum 8 GB VRAM for `single-gpu` profile with default models +- For `dual-gpu`: GPU 0 is assigned to Ollama, GPU 1 to vLLM + +If your GPU has less than 10 GB VRAM, `preflight.py` will calculate a `CPU_OFFLOAD_GB` value and write it to `.env`. The vLLM container picks this up via `--cpu-offload-gb` to overflow KV cache to system RAM. + +--- + +## Stopping Peregrine + +```bash +make stop # stop all containers +make restart # stop then start again (runs preflight first) +``` + +--- + +## Reinstalling / Clean State + +```bash +make clean # removes containers, images, and data volumes (destructive) +``` + +You will be prompted to type `yes` to confirm. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..73d4fc8 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,65 @@ +# Peregrine + +**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)** + +Peregrine automates the full job search lifecycle: discovery, matching, cover letter generation, application tracking, and interview preparation. It is privacy-first and local-first — your data never leaves your machine unless you configure an external integration. + +--- + +## Quick Start + +```bash +# 1. Clone and install dependencies +git clone https://git.circuitforge.io/circuitforge/peregrine +cd peregrine +bash setup.sh + +# 2. Start Peregrine +make start # no GPU, API-only +make start PROFILE=single-gpu # one NVIDIA GPU +make start PROFILE=dual-gpu # dual GPU (Ollama + vLLM) + +# 3. Open the UI +# http://localhost:8501 +``` + +The first-run wizard guides you through hardware detection, tier selection, identity, resume, LLM configuration, search profiles, and integrations. See [Installation](getting-started/installation.md) for the full walkthrough. + +--- + +## Feature Overview + +| Feature | Free | Paid | Premium | +|---------|------|------|---------| +| Job discovery (JobSpy + custom boards) | Yes | Yes | Yes | +| Resume keyword matching | Yes | Yes | Yes | +| Cover letter generation | - | Yes | Yes | +| Company research briefs | - | Yes | Yes | +| Interview prep & practice Q&A | - | Yes | Yes | +| Email sync & auto-classification | - | Yes | Yes | +| Survey assistant (culture-fit Q&A) | - | Yes | Yes | +| Integration connectors (Notion, Airtable, etc.) | Partial | Yes | Yes | +| Calendar sync (Google, Apple) | - | Yes | Yes | +| Cover letter model fine-tuning | - | - | Yes | +| Multi-user support | - | - | Yes | + +See [Tier System](reference/tier-system.md) for the full feature gate table. + +--- + +## Documentation Sections + +- **[Getting Started](getting-started/installation.md)** — Install, configure, and launch Peregrine +- **[User Guide](user-guide/job-discovery.md)** — How to use every feature in the UI +- **[Developer Guide](developer-guide/contributing.md)** — Add scrapers, integrations, and contribute code +- **[Reference](reference/tier-system.md)** — Tier system, LLM router, and config file schemas + +--- + +## License + +Core discovery pipeline: [MIT](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-MIT) + +AI features (cover letter generation, company research, interview prep, UI): [BSL 1.1](https://git.circuitforge.io/circuitforge/peregrine/src/branch/main/LICENSE-BSL) + +© 2026 Circuit Forge LLC diff --git a/docs/reference/config-files.md b/docs/reference/config-files.md new file mode 100644 index 0000000..26bf4f2 --- /dev/null +++ b/docs/reference/config-files.md @@ -0,0 +1,353 @@ +# Config Files + +All Peregrine configuration lives in the `config/` directory. Gitignored files contain secrets or personal data; `.example` files are committed as templates. + +--- + +## Gitignore Status + +| File | Gitignored | Notes | +|------|-----------|-------| +| `config/user.yaml` | Yes | Personal data + wizard state | +| `config/llm.yaml` | No | LLM backends (no secrets by default) | +| `config/search_profiles.yaml` | No | Search configuration (no secrets) | +| `config/resume_keywords.yaml` | No | Scoring keywords (no secrets) | +| `config/blocklist.yaml` | No | Excluded companies (no secrets) | +| `config/email.yaml` | Yes | IMAP credentials | +| `config/notion.yaml` | Yes | Notion token | +| `config/adzuna.yaml` | Yes | Adzuna API credentials | +| `config/craigslist.yaml` | Yes | Craigslist target cities | +| `config/integrations/*.yaml` | Yes | All integration credentials | +| `.env` | Yes | Docker port and path overrides | + +--- + +## config/user.yaml + +The primary personal data file. Created by the first-run wizard. + +```yaml +# Identity +name: "Your Name" +email: "you@example.com" +phone: "555-000-0000" +linkedin: "linkedin.com/in/yourprofile" +career_summary: > + Experienced professional with X years in [field]. + +# Privacy +nda_companies: [] # company names to redact from research briefs + +# Mission alignment +mission_preferences: + music: "" # personal note injected into cover letter para 3 + animal_welfare: "" + education: "" + +# Research brief options (personal decision-making only) +candidate_accessibility_focus: false # adds ADA/WCAG/ERG section +candidate_lgbtq_focus: false # adds LGBTQIA+ inclusion section + +# Tier +tier: free # free | paid | premium +dev_tier_override: null # overrides tier locally for testing + +# Wizard state +wizard_complete: false +wizard_step: 0 +dismissed_banners: [] + +# Storage paths +docs_dir: "~/Documents/JobSearch" +ollama_models_dir: "~/models/ollama" +vllm_models_dir: "~/models/vllm" + +# Inference +inference_profile: "remote" # remote | cpu | single-gpu | dual-gpu + +# Service connection settings +services: + streamlit_port: 8501 + ollama_host: localhost + ollama_port: 11434 + ollama_ssl: false + ollama_ssl_verify: true + vllm_host: localhost + vllm_port: 8000 + vllm_ssl: false + vllm_ssl_verify: true + searxng_host: localhost + searxng_port: 8888 + searxng_ssl: false + searxng_ssl_verify: true +``` + +All personal data access in `scripts/` goes through `scripts/user_profile.py` (`UserProfile` class) — never read this file directly in scripts. + +--- + +## config/llm.yaml + +LLM backend definitions and fallback chains. Not gitignored (contains no secrets by default — API keys come from environment variables). + +```yaml +backends: + ollama: + type: openai_compat + base_url: http://localhost:11434/v1 + api_key: ollama # placeholder; Ollama ignores the key + model: llama3.1:8b + enabled: true + supports_images: false + + ollama_research: + type: openai_compat + base_url: http://localhost:11434/v1 + api_key: ollama + model: llama3.1:8b # can be a different model for research + enabled: true + supports_images: false + + vllm: + type: openai_compat + base_url: http://localhost:8000/v1 + api_key: "" + model: __auto__ # auto-detect first loaded model + enabled: true + supports_images: false + + claude_code: + type: openai_compat + base_url: http://localhost:3009/v1 + api_key: any + model: claude-code-terminal + enabled: false + supports_images: true + + github_copilot: + type: openai_compat + base_url: http://localhost:3010/v1 + api_key: any + model: gpt-4o + enabled: false + supports_images: false + + anthropic: + type: anthropic + api_key_env: ANTHROPIC_API_KEY # name of environment variable + model: claude-sonnet-4-6 + enabled: false + supports_images: true + + vision_service: + type: vision_service + base_url: http://localhost:8002 + enabled: true + supports_images: true + +fallback_order: + - ollama + - claude_code + - vllm + - github_copilot + - anthropic + +research_fallback_order: + - claude_code + - vllm + - ollama_research + - github_copilot + - anthropic + +vision_fallback_order: + - vision_service + - claude_code + - anthropic +``` + +See [LLM Router](llm-router.md) for full documentation. + +--- + +## config/search_profiles.yaml + +Defines what jobs to search for. Multiple profiles can coexist. + +```yaml +profiles: + - name: cs_leadership # unique profile identifier + titles: + - Customer Success Manager + - Director of Customer Success + locations: + - Remote + - San Francisco Bay Area, CA + boards: + - linkedin + - indeed + - glassdoor + - zip_recruiter + - google + custom_boards: + - adzuna + - theladders + - craigslist + exclude_keywords: # job titles containing these are dropped + - sales + - account executive + - SDR + results_per_board: 75 + hours_old: 240 # only fetch jobs posted in last N hours + mission_tags: # optional: links to mission_preferences + - music +``` + +--- + +## config/resume_keywords.yaml + +Keywords extracted from your resume, used for match scoring. Managed via **Settings → Skills**. + +```yaml +keywords: + - Customer Success + - Churn reduction + - Salesforce + - SQL + - Stakeholder management + - QBR + - onboarding +``` + +--- + +## config/blocklist.yaml + +Companies or domains to exclude from discovery results entirely. + +```yaml +blocked_companies: + - "Pyramid Scheme Inc" + - "Sketchy Startup" + +blocked_domains: + - "mlm-company.com" +``` + +--- + +## config/email.yaml + +IMAP email sync credentials. Gitignored. See [Email Sync](../user-guide/email-sync.md) for setup. + +```yaml +host: imap.gmail.com +port: 993 +use_ssl: true +username: your.email@gmail.com +password: xxxx-xxxx-xxxx-xxxx # Gmail App Password (16 chars, no spaces) +sent_folder: "" # leave blank to auto-detect +lookback_days: 90 +todo_label: "" # optional: Gmail label to monitor +``` + +--- + +## config/notion.yaml + +Notion integration credentials. Gitignored. + +```yaml +token: "secret_..." # Notion integration token +database_id: "1bd75cff-..." # database ID from the URL + +# Notion property names → Peregrine field names +field_map: + title: "Salary" # Notion title property (unusual — it's the page title) + status: "Status of Application" + company: "Company" + url: "Role Link" + source: "Job Source" # multi_select type + location: "Location" + applied_at: "Date Applied" +``` + +Field names in Notion are non-obvious. Always read them from `field_map` rather than guessing. + +--- + +## config/adzuna.yaml + +Adzuna Jobs API credentials. Gitignored. + +```yaml +app_id: "12345678" +app_key: "abcdefgh1234567890abcdefgh123456" +country: "us" # two-letter country code +``` + +Get credentials at [developer.adzuna.com](https://developer.adzuna.com/). + +--- + +## config/craigslist.yaml + +Target city slugs for the Craigslist scraper. Gitignored. + +```yaml +cities: + - sfbay + - nyc + - seattle + - chicago +``` + +Find slugs at `https://www.craigslist.org/about/sites`. + +--- + +## config/integrations/ + +One YAML file per integration, created when you test and save credentials in the wizard or Settings. All files in this directory are gitignored. + +``` +config/integrations/ + notion.yaml + airtable.yaml + google_sheets.yaml + google_drive.yaml + dropbox.yaml + onedrive.yaml + mega.yaml + nextcloud.yaml + google_calendar.yaml + apple_calendar.yaml + slack.yaml + discord.yaml + home_assistant.yaml +``` + +Each file contains only the fields defined by that integration's `fields()` method. Example for Discord: + +```yaml +webhook_url: "https://discord.com/api/webhooks/..." +``` + +--- + +## .env + +Docker port and path overrides. Created from `.env.example` by `setup.sh`. Gitignored. + +```bash +# Ports (change if defaults conflict with existing services) +STREAMLIT_PORT=8501 +OLLAMA_PORT=11434 +VLLM_PORT=8000 +SEARXNG_PORT=8888 +VISION_PORT=8002 + +# GPU settings (written by preflight.py) +RECOMMENDED_PROFILE=single-gpu +CPU_OFFLOAD_GB=0 # KV cache RAM offload for low-VRAM GPUs +``` diff --git a/docs/reference/llm-router.md b/docs/reference/llm-router.md new file mode 100644 index 0000000..e44050e --- /dev/null +++ b/docs/reference/llm-router.md @@ -0,0 +1,231 @@ +# LLM Router + +`scripts/llm_router.py` provides a unified LLM interface with automatic fallback. All LLM calls in Peregrine go through `LLMRouter.complete()`. + +--- + +## How It Works + +`LLMRouter` reads `config/llm.yaml` on instantiation. When `complete()` is called: + +1. It iterates through the active fallback order +2. For each backend, it checks: + - Is the backend `enabled`? + - Is it reachable (health check ping)? + - Does it support the request type (text-only vs. vision)? +3. On the first backend that succeeds, it returns the completion +4. On any error (network, model error, timeout), it logs the failure and tries the next backend +5. If all backends are exhausted, it raises `RuntimeError("All LLM backends exhausted")` + +``` +fallback_order: [ollama, claude_code, vllm, github_copilot, anthropic] + ↓ try + ↓ unreachable? → skip + ↓ disabled? → skip + ↓ error? → next + → return completion +``` + +--- + +## Backend Types + +### `openai_compat` + +Any backend that speaks the OpenAI Chat Completions API. This includes: +- Ollama (`http://localhost:11434/v1`) +- vLLM (`http://localhost:8000/v1`) +- Claude Code wrapper (`http://localhost:3009/v1`) +- GitHub Copilot wrapper (`http://localhost:3010/v1`) + +Health check: `GET {base_url}/health` (strips `/v1` suffix) + +### `anthropic` + +Calls the Anthropic Python SDK directly. Reads the API key from the environment variable named in `api_key_env`. + +Health check: skips health check; proceeds if `api_key_env` is set in the environment. + +### `vision_service` + +The local Moondream2 inference service. Only used when `images` is provided to `complete()`. + +Health check: `GET {base_url}/health` + +Request: `POST {base_url}/analyze` with `{"prompt": ..., "image_base64": ...}` + +--- + +## `complete()` Signature + +```python +def complete( + prompt: str, + system: str | None = None, + model_override: str | None = None, + fallback_order: list[str] | None = None, + images: list[str] | None = None, +) -> str: +``` + +| Parameter | Description | +|-----------|-------------| +| `prompt` | The user message | +| `system` | Optional system prompt (passed as the `system` role) | +| `model_override` | Overrides the configured model for `openai_compat` backends (e.g. pass a research-specific Ollama model) | +| `fallback_order` | Override the fallback chain for this call only (e.g. `config["research_fallback_order"]`) | +| `images` | Optional list of base64-encoded PNG/JPG strings. When provided, backends without `supports_images: true` are skipped automatically. | + +--- + +## Fallback Chains + +Three named chains are defined in `config/llm.yaml`: + +| Config key | Used for | +|-----------|---------| +| `fallback_order` | Cover letter generation and general tasks | +| `research_fallback_order` | Company research briefs | +| `vision_fallback_order` | Survey screenshot analysis (requires `images`) | + +Pass a chain explicitly: + +```python +router = LLMRouter() + +# Use the research chain +result = router.complete( + prompt=research_prompt, + system=system_prompt, + fallback_order=router.config["research_fallback_order"], +) + +# Use the vision chain with an image +result = router.complete( + prompt="Describe what you see in this survey", + fallback_order=router.config["vision_fallback_order"], + images=[base64_image_string], +) +``` + +--- + +## Vision Routing + +When `images` is provided: + +- Backends with `supports_images: false` are skipped +- `vision_service` backends are tried (POST to `/analyze`) +- `openai_compat` backends with `supports_images: true` receive images as multipart content in the user message +- `anthropic` backends with `supports_images: true` receive images as base64 content blocks + +When `images` is NOT provided: + +- `vision_service` backends are skipped entirely + +--- + +## `__auto__` Model Resolution + +vLLM can serve different models depending on what is loaded. Set `model: __auto__` in `config/llm.yaml` for the vLLM backend: + +```yaml +vllm: + type: openai_compat + base_url: http://localhost:8000/v1 + model: __auto__ +``` + +`LLMRouter` calls `client.models.list()` and uses the first model returned. This avoids hard-coding a model name that may change when you swap the loaded model. + +--- + +## Adding a Backend + +1. Add an entry to `config/llm.yaml`: + +```yaml +backends: + my_backend: + type: openai_compat # or "anthropic" | "vision_service" + base_url: http://localhost:9000/v1 + api_key: my-key + model: my-model-name + enabled: true + supports_images: false +``` + +2. Add it to one or more fallback chains: + +```yaml +fallback_order: + - ollama + - my_backend # add here + - claude_code + - anthropic +``` + +3. No code changes are needed — the router reads the config at startup. + +--- + +## Module-Level Convenience Function + +A module-level singleton is provided for simple one-off calls: + +```python +from scripts.llm_router import complete + +result = complete("Write a brief summary of this company.", system="You are a research assistant.") +``` + +This uses the default `fallback_order` from `config/llm.yaml`. For per-task chain overrides, instantiate `LLMRouter` directly. + +--- + +## Config Reference + +```yaml +# config/llm.yaml + +backends: + ollama: + type: openai_compat + base_url: http://localhost:11434/v1 + api_key: ollama + model: llama3.1:8b + enabled: true + supports_images: false + + anthropic: + type: anthropic + api_key_env: ANTHROPIC_API_KEY # env var name (not the key itself) + model: claude-sonnet-4-6 + enabled: false + supports_images: true + + vision_service: + type: vision_service + base_url: http://localhost:8002 + enabled: true + supports_images: true + +fallback_order: + - ollama + - claude_code + - vllm + - github_copilot + - anthropic + +research_fallback_order: + - claude_code + - vllm + - ollama_research + - github_copilot + - anthropic + +vision_fallback_order: + - vision_service + - claude_code + - anthropic +``` diff --git a/docs/reference/tier-system.md b/docs/reference/tier-system.md new file mode 100644 index 0000000..6cc406a --- /dev/null +++ b/docs/reference/tier-system.md @@ -0,0 +1,159 @@ +# Tier System + +Peregrine uses a three-tier feature gate system defined in `app/wizard/tiers.py`. + +--- + +## Tiers + +``` +free < paid < premium +``` + +| Tier | Description | +|------|-------------| +| `free` | Core discovery pipeline, resume matching, and basic UI — no LLM features | +| `paid` | All AI features: cover letters, research, email, integrations, calendar, notifications | +| `premium` | Adds fine-tuning and multi-user support | + +--- + +## Feature Gate Table + +Features listed here require a minimum tier. Features not in this table are available to all tiers (free by default). + +### Wizard LLM generation + +| Feature key | Minimum tier | Description | +|-------------|-------------|-------------| +| `llm_career_summary` | paid | LLM-assisted career summary generation in the wizard | +| `llm_expand_bullets` | paid | LLM expansion of resume bullet points | +| `llm_suggest_skills` | paid | LLM skill suggestions from resume content | +| `llm_voice_guidelines` | premium | LLM writing voice and tone guidelines | +| `llm_job_titles` | paid | LLM-suggested job title variations for search | +| `llm_keywords_blocklist` | paid | LLM-suggested blocklist keywords | +| `llm_mission_notes` | paid | LLM-generated mission alignment notes | + +### App features + +| Feature key | Minimum tier | Description | +|-------------|-------------|-------------| +| `company_research` | paid | Auto-generated company research briefs pre-interview | +| `interview_prep` | paid | Live reference sheet and practice Q&A during calls | +| `email_classifier` | paid | IMAP email sync with LLM classification | +| `survey_assistant` | paid | Culture-fit survey Q&A helper (text + screenshot) | +| `model_fine_tuning` | premium | Cover letter model fine-tuning on personal writing | +| `shared_cover_writer_model` | paid | Access to shared fine-tuned cover letter model | +| `multi_user` | premium | Multiple user profiles on one instance | + +### Integrations (paid) + +| Feature key | Minimum tier | Description | +|-------------|-------------|-------------| +| `notion_sync` | paid | Sync jobs to Notion database | +| `google_sheets_sync` | paid | Sync jobs to Google Sheets | +| `airtable_sync` | paid | Sync jobs to Airtable | +| `google_calendar_sync` | paid | Create interview events in Google Calendar | +| `apple_calendar_sync` | paid | Create interview events in Apple Calendar (CalDAV) | +| `slack_notifications` | paid | Pipeline event notifications via Slack | + +### Free integrations (not gated) + +The following integrations are free for all tiers and are not in the `FEATURES` dict: + +- `google_drive_sync` — upload documents to Google Drive +- `dropbox_sync` — upload documents to Dropbox +- `onedrive_sync` — upload documents to OneDrive +- `mega_sync` — upload documents to MEGA +- `nextcloud_sync` — upload documents to Nextcloud +- `discord_notifications` — pipeline notifications via Discord webhook +- `home_assistant` — pipeline events to Home Assistant REST API + +--- + +## API Reference + +### `can_use(tier, feature) -> bool` + +Returns `True` if the given tier has access to the feature. + +```python +from app.wizard.tiers import can_use + +can_use("free", "company_research") # False +can_use("paid", "company_research") # True +can_use("premium", "company_research") # True + +can_use("free", "unknown_feature") # True — ungated features return True +can_use("invalid", "company_research") # False — invalid tier string +``` + +### `tier_label(feature) -> str` + +Returns a display badge string for locked features, or `""` if the feature is free or unknown. + +```python +from app.wizard.tiers import tier_label + +tier_label("company_research") # "🔒 Paid" +tier_label("model_fine_tuning") # "⭐ Premium" +tier_label("job_discovery") # "" (ungated) +``` + +--- + +## Dev Tier Override + +For local development and testing without a paid licence, set `dev_tier_override` in `config/user.yaml`: + +```yaml +tier: free +dev_tier_override: premium # overrides tier locally for testing +``` + +`UserProfile.tier` returns `dev_tier_override` when set, falling back to `tier` otherwise. + +!!! warning + `dev_tier_override` is for local development only. It has no effect on production deployments that validate licences server-side. + +--- + +## Adding a New Feature Gate + +1. Add the feature to `FEATURES` in `app/wizard/tiers.py`: + +```python +FEATURES: dict[str, str] = { + # ...existing entries... + "my_new_feature": "paid", # or "free" | "premium" +} +``` + +2. Guard the feature in the UI: + +```python +from app.wizard.tiers import can_use, tier_label +from scripts.user_profile import UserProfile + +user = UserProfile() +if can_use(user.tier, "my_new_feature"): + # show the feature + pass +else: + st.info(f"My New Feature requires a {tier_label('my_new_feature').replace('🔒 ', '').replace('⭐ ', '')} plan.") +``` + +3. Add a test in `tests/test_tiers.py`: + +```python +def test_my_new_feature_requires_paid(): + assert can_use("free", "my_new_feature") is False + assert can_use("paid", "my_new_feature") is True + assert can_use("premium", "my_new_feature") is True +``` + +--- + +## Future: Ultra Tier + +An `ultra` tier is reserved for future use (e.g. enterprise SLA, dedicated inference). The tier ordering in `TIERS = ["free", "paid", "premium"]` can be extended without breaking `can_use()`, since it uses `list.index()` for comparison. diff --git a/docs/user-guide/apply-workspace.md b/docs/user-guide/apply-workspace.md new file mode 100644 index 0000000..899b637 --- /dev/null +++ b/docs/user-guide/apply-workspace.md @@ -0,0 +1,76 @@ +# Apply Workspace + +The Apply Workspace is where you generate cover letters, export application documents, and record that you have applied to a job. + +--- + +## Accessing the Workspace + +Navigate to page **4 — Apply** in the sidebar. The workspace lists all jobs with status `approved`, sorted by date approved. + +--- + +## Cover Letter Generation + +Click **Generate Cover Letter** on any job card. Peregrine runs as a background task so you can continue navigating the UI. + +### What the generator uses + +- Your **career summary** and **resume data** from `config/user.yaml` +- The **job title** and **job description** +- **Company name** — used to detect mission-aligned industries +- **Mission alignment notes** from `config/user.yaml` (e.g. a personal note about why you care about music-industry companies) + +### Fallback chain + +Cover letters use the cover letter fallback order from `config/llm.yaml`. By default: `ollama → claude_code → vllm → github_copilot → anthropic`. See [LLM Router](../reference/llm-router.md) for details. + +### Mission alignment + +If the company or job description matches one of your configured mission industries (music, animal welfare, education), the generator injects a personalised paragraph 3 hint into the prompt. This produces a cover letter that reflects authentic alignment rather than generic enthusiasm. + +--- + +## Editing the Cover Letter + +After generation, the cover letter appears in an editable text area. Edit freely — changes are saved locally and do not trigger a re-generation. + +Click **Save** to write the updated text back to the database. + +--- + +## PDF Export + +Click **Export PDF** to generate a formatted PDF of the cover letter. The PDF is saved to your `docs_dir` (configured in `config/user.yaml`, default: `~/Documents/JobSearch`). + +The filename format is: `{Company}_{Title}_{Date}_CoverLetter.pdf` + +--- + +## Marking Applied + +Once you have submitted your application externally, click **Mark Applied**. This: + +- Sets the job status to `applied` +- Records `applied_at` timestamp +- Moves the job out of the Apply Workspace and into the Interviews kanban (in `applied` pre-stage) + +--- + +## Rejecting a Listing + +Changed your mind about a job you approved? Click **Reject Listing** to set it to `rejected` status. This removes it from the workspace without affecting your cover letter draft (the text remains in the database). + +--- + +## Cover Letter Background Task Status + +The sidebar shows a live indicator (updated every 3 seconds) of running and queued background tasks. If a cover letter generation is in progress you will see it there. + +A task can have these statuses: +- **queued** — waiting to start +- **running** — actively generating +- **completed** — finished; reload the page to see the result +- **failed** — generation failed; check the logs + +Only one queued or running task per job is allowed at a time. Clicking **Generate Cover Letter** on a job that already has a task in progress is a no-op. diff --git a/docs/user-guide/email-sync.md b/docs/user-guide/email-sync.md new file mode 100644 index 0000000..8da0c1e --- /dev/null +++ b/docs/user-guide/email-sync.md @@ -0,0 +1,119 @@ +# Email Sync + +Peregrine monitors your inbox for job-related emails and automatically updates job stages when it detects interview requests, rejections, offers, and survey links. + +--- + +## Configuration + +Email sync is configured in `config/email.yaml` (gitignored). Copy the example template to get started: + +```bash +cp config/email.yaml.example config/email.yaml +``` + +Then fill in your credentials: + +```yaml +host: imap.gmail.com +port: 993 +use_ssl: true +username: your.email@gmail.com +password: xxxx-xxxx-xxxx-xxxx # see Gmail App Password below +sent_folder: "" # leave blank to auto-detect +lookback_days: 90 # how many days back to scan +todo_label: "" # optional Gmail label to monitor +``` + +You can also configure email sync via **Settings → Email** in the UI. + +--- + +## Gmail Setup + +Gmail requires an **App Password** instead of your regular account password. Your regular password will not work. + +1. Enable **2-Step Verification** on your Google Account at [myaccount.google.com/security](https://myaccount.google.com/security) +2. Go to [myaccount.google.com/apppasswords](https://myaccount.google.com/apppasswords) +3. Create a new app password — name it "Peregrine" or similar +4. Copy the 16-character code (no spaces) and paste it as `password` in `config/email.yaml` +5. Enable IMAP in Gmail: **Settings → See all settings → Forwarding and POP/IMAP → Enable IMAP** + +--- + +## Outlook / Office 365 + +```yaml +host: outlook.office365.com +port: 993 +use_ssl: true +username: your.email@company.com +password: your-password # or App Password if MFA is enabled +``` + +--- + +## Gmail Label Monitoring (Optional) + +If you use a Gmail label to flag action-needed job emails (e.g. "TO DO JOBS"), set: + +```yaml +todo_label: "TO DO JOBS" +``` + +Emails in this label are matched to pipeline jobs by company name, then filtered by action keywords in the subject line (e.g. "interview", "next steps", "offer"). + +--- + +## Email Classification Labels + +The email classifier assigns one of six labels to each relevant email: + +| Label | Meaning | +|-------|---------| +| `interview_request` | Recruiter or hiring manager requesting a call or interview | +| `rejection` | Automated or personal rejection | +| `offer` | Job offer letter or verbal offer notification | +| `follow_up` | Candidate or recruiter follow-up with no stage change | +| `survey_received` | Link or request to complete a culture-fit or skills assessment | +| `other` | Job-related but does not fit any category above | + +Classification is performed by your configured LLM backend. The classifier uses the email subject and body as input. + +!!! note "Tier requirement" + Email classification is a Paid feature. + +--- + +## Stage Auto-Updates + +When a classified email is matched to a job in your pipeline, Peregrine updates the job stage automatically: + +| Classification | Stage action | +|---------------|-------------| +| `interview_request` | Moves `applied` → `phone_screen` | +| `rejection` | Moves job → `rejected` (captures `rejection_stage`) | +| `offer` | Flags job for review; moves toward `offer` stage | +| `survey_received` | Moves job → `survey` pre-stage | + +Emails are matched to jobs by comparing the sender domain and company name in the email body against company names in your pipeline. + +--- + +## Running Email Sync + +### From the UI + +Click **Sync Emails** on the Home page. This runs as a background task — you can navigate away while it processes. + +### Non-blocking background sync + +Email sync runs in a daemon thread via `scripts/task_runner.py` and does not block the UI. The sidebar background task indicator shows sync progress. + +--- + +## Email Thread Log + +All matched emails are stored in the `job_contacts` table (one row per email thread per job). You can view the thread log for any job from the Job Review detail view or the Interviews kanban card. + +Columns stored: `direction` (inbound/outbound), `subject`, `from`, `to`, `body`, `received_at`. diff --git a/docs/user-guide/integrations.md b/docs/user-guide/integrations.md new file mode 100644 index 0000000..a45bf5c --- /dev/null +++ b/docs/user-guide/integrations.md @@ -0,0 +1,147 @@ +# Integrations + +Peregrine supports 13 optional integration connectors for job tracking, document storage, calendar sync, and notifications. Configure them in **Settings → Integrations** or during the first-run wizard (Step 7). + +All integration credentials are stored in `config/integrations/.yaml` (gitignored — never committed). + +--- + +## Job Tracking + +### Notion + +**Tier:** Paid + +Syncs approved and applied jobs to a Notion database. Peregrine creates or updates a Notion page per job with status, salary, company, URL, and cover letter text. + +Required credentials: Notion integration token and database ID. + +Configure in `config/integrations/notion.yaml`. + +### Airtable + +**Tier:** Paid + +Syncs the job pipeline to an Airtable base. Each job maps to a row in your configured table. + +Required credentials: Airtable personal access token, base ID, and table name. + +### Google Sheets + +**Tier:** Paid + +Appends job data to a Google Sheet. Useful for sharing pipeline data or building custom dashboards. + +Required credentials: Google service account JSON key file, spreadsheet ID, and sheet name. + +--- + +## Document Storage + +### Google Drive + +**Tier:** Free + +Uploads generated cover letters and exported PDFs to a Google Drive folder automatically when you export from the Apply Workspace. + +Required credentials: Google service account JSON key file and target folder ID. + +### Dropbox + +**Tier:** Free + +Uploads cover letters and PDFs to a Dropbox folder. + +Required credentials: Dropbox access token and target folder path. + +### OneDrive + +**Tier:** Free + +Uploads cover letters and PDFs to a OneDrive folder via the Microsoft Graph API. + +Required credentials: Microsoft OAuth client ID, client secret, tenant ID, and target folder path. + +### MEGA + +**Tier:** Free + +Uploads documents to MEGA cloud storage. + +Required credentials: MEGA account email and password, target folder path. + +### Nextcloud + +**Tier:** Free + +Uploads documents to a self-hosted Nextcloud instance via WebDAV. + +Required credentials: Nextcloud server URL, username, password, and target folder path. + +--- + +## Calendar + +### Google Calendar + +**Tier:** Paid + +Creates calendar events for scheduled interviews. When you set an `interview_date` on a job in the kanban, Peregrine creates a Google Calendar event with a reminder. + +Required credentials: Google service account JSON key file and calendar ID. + +### Apple Calendar (CalDAV) + +**Tier:** Paid + +Creates calendar events on an Apple Calendar or any CalDAV-compatible server. + +Required credentials: CalDAV server URL, username, and password. For iCloud, use an app-specific password. + +--- + +## Notifications + +### Slack + +**Tier:** Paid + +Sends notifications to a Slack channel for key pipeline events: new high-match jobs discovered, stage changes, and research completion. + +Required credentials: Slack incoming webhook URL. + +### Discord + +**Tier:** Free + +Sends notifications to a Discord channel via a webhook. Same events as Slack. + +Required credentials: Discord webhook URL. + +### Home Assistant + +**Tier:** Free + +Sends pipeline events to Home Assistant via the REST API. Useful for smart home dashboards or custom automation triggers. + +Required credentials: Home Assistant base URL and long-lived access token. + +--- + +## Integration Status + +The Settings → Integrations tab shows the connection status of each integration: + +| Status | Meaning | +|--------|---------| +| Connected | Credentials file exists and last test passed | +| Not configured | No credentials file found | +| Error | Credentials file exists but last test failed | + +Click **Test** to re-verify the connection at any time. + +--- + +## Adding a Custom Integration + +See [Adding an Integration](../developer-guide/adding-integrations.md) in the developer guide. diff --git a/docs/user-guide/interviews.md b/docs/user-guide/interviews.md new file mode 100644 index 0000000..58512fe --- /dev/null +++ b/docs/user-guide/interviews.md @@ -0,0 +1,96 @@ +# Interviews + +The Interviews page is a kanban board that tracks your progress through the interview pipeline after you have applied to a job. + +--- + +## Kanban Stages + +Jobs move left to right through the pipeline: + +``` +applied → phone_screen → interviewing → offer → hired + ↓ + (any stage) → rejected +``` + +| Stage | Description | +|-------|-------------| +| `applied` | Pre-kanban holding area — job applied to but no response yet | +| `phone_screen` | Initial recruiter/HR screen scheduled or completed | +| `interviewing` | Active interview loop (first-round, technical, panel, etc.) | +| `offer` | Offer received; evaluating | +| `hired` | Offer accepted | +| `rejected` | Declined or ghosted at any stage (captures `rejection_stage`) | + +--- + +## Moving Jobs Between Stages + +Drag a job card to the target column, or use the stage-advance button on each card. Moving a job to `phone_screen` triggers an automatic company research task (see below). + +--- + +## Company Research (Auto-trigger) + +When a job moves to `phone_screen`, Peregrine automatically queues a **company research** background task (`scripts/company_research.py`). The research brief is generated in three phases: + +1. **SearXNG web scrape** — queries the SearXNG meta-search engine (running locally on port 8888) for company information from public sources +2. **SearXNG news snippets** — fetches recent news about the company +3. **LLM synthesis** — combines the scraped content into a structured brief + +The brief includes: +- Company overview (mission, size, funding stage) +- CEO / leadership summary +- Talking points tailored to your role +- Optional: Inclusion and Accessibility section (ADA signals, WCAG, ERGs) +- Optional: LGBTQIA+ inclusion section (non-discrimination policies, culture signals) + +Both optional sections are controlled by `candidate_accessibility_focus` and `candidate_lgbtq_focus` booleans in `config/user.yaml`. They are for personal decision-making only and are never included in applications. + +--- + +## Interview Prep Page + +Navigate to page **6 — Interview Prep** for a job in the `phone_screen` or `interviewing` stage. This page provides: + +- The full company research brief (generated automatically when the job moved to `phone_screen`) +- A live reference sheet you can keep open during a call +- **Practice Q&A** — a back-and-forth interview simulation powered by your LLM backend + +!!! note "Tier requirement" + Interview prep is a Paid feature. See [Tier System](../reference/tier-system.md). + +--- + +## Survey Assistant + +When a job moves to the `survey` stage (via the "Survey" button on an applied job), the Survey Assistant page (page 7) becomes active for that job. It helps you complete culture-fit surveys by: + +- Accepting pasted survey text +- Accepting screenshot uploads (analysed by the Moondream2 vision service) +- Generating suggested answers via your configured LLM backend + +After completing the survey, move the job to `phone_screen` to continue the pipeline. + +!!! note "Tier requirement" + Survey assistant is a Paid feature. + +--- + +## Rejection Tracking + +When you reject a job from the kanban (at any stage), Peregrine captures the `rejection_stage` — the stage at which the rejection occurred. This data is available for pipeline analytics. + +--- + +## Email-Driven Stage Updates + +If email sync is configured (see [Email Sync](email-sync.md)), Peregrine can automatically advance jobs based on incoming email: + +| Email classification | Stage action | +|---------------------|-------------| +| `interview_request` | Moves job toward `phone_screen` if still `applied` | +| `rejection` | Moves job to `rejected` (captures `rejection_stage`) | +| `offer` | Flags job for review; moves toward `offer` | +| `survey_received` | Moves job to `survey` stage | diff --git a/docs/user-guide/job-discovery.md b/docs/user-guide/job-discovery.md new file mode 100644 index 0000000..1a6fd89 --- /dev/null +++ b/docs/user-guide/job-discovery.md @@ -0,0 +1,123 @@ +# Job Discovery + +Peregrine discovers new job listings by running search profiles against multiple job boards simultaneously. Results are deduplicated by URL and stored in the local SQLite database (`staging.db`). + +--- + +## How Discovery Works + +1. **Search profiles** in `config/search_profiles.yaml` define what to search for +2. The Home page **Run Discovery** button triggers `scripts/discover.py` +3. `discover.py` calls each configured board (standard + custom) for each active profile +4. Results are inserted into the `jobs` table with status `pending` +5. Jobs with URLs already in the database are silently skipped (URL is the unique key) +6. After insertion, `scripts/match.py` runs keyword scoring on all new jobs + +--- + +## Search Profiles + +Profiles are defined in `config/search_profiles.yaml`. You can have multiple profiles running simultaneously. + +### Profile fields + +```yaml +profiles: + - name: cs_leadership # unique identifier + titles: + - Customer Success Manager + - Director of Customer Success + locations: + - Remote + - San Francisco Bay Area, CA + boards: + - linkedin + - indeed + - glassdoor + - zip_recruiter + - google + custom_boards: + - adzuna + - theladders + - craigslist + exclude_keywords: # titles containing these words are dropped + - sales + - account executive + - SDR + results_per_board: 75 # max jobs per board per run + hours_old: 240 # only fetch jobs posted in last N hours + mission_tags: # optional — triggers mission-alignment cover letter hints + - music +``` + +### Adding a new profile + +Open `config/search_profiles.yaml` and add an entry under `profiles:`. The next discovery run picks it up automatically — no restart required. + +### Mission tags + +`mission_tags` links a profile to industries you care about. When cover letters are generated for jobs from a mission-tagged profile, the LLM prompt includes a personal alignment note (configured in `config/user.yaml` under `mission_preferences`). Supported tags: `music`, `animal_welfare`, `education`. + +--- + +## Standard Job Boards + +These boards are powered by the [JobSpy](https://github.com/Bunsly/JobSpy) library: + +| Board key | Source | +|-----------|--------| +| `linkedin` | LinkedIn Jobs | +| `indeed` | Indeed | +| `glassdoor` | Glassdoor | +| `zip_recruiter` | ZipRecruiter | +| `google` | Google Jobs | + +--- + +## Custom Job Board Scrapers + +Custom scrapers are in `scripts/custom_boards/`. They are registered in `discover.py` and activated per-profile via the `custom_boards` list. + +| Key | Source | Notes | +|-----|--------|-------| +| `adzuna` | [Adzuna Jobs API](https://developer.adzuna.com/) | Requires `config/adzuna.yaml` with `app_id` and `app_key` | +| `theladders` | The Ladders | SSR scraper via `curl_cffi`; no credentials needed | +| `craigslist` | Craigslist | Requires `config/craigslist.yaml` with target city slugs | + +To add your own scraper, see [Adding a Scraper](../developer-guide/adding-scrapers.md). + +--- + +## Running Discovery + +### From the UI + +1. Open the **Home** page +2. Click **Run Discovery** +3. Peregrine runs all active search profiles in sequence +4. A progress bar shows board-by-board status +5. A summary shows how many new jobs were inserted vs. already known + +### From the command line + +```bash +conda run -n job-seeker python scripts/discover.py +``` + +--- + +## Filling Missing Descriptions + +Some boards (particularly Glassdoor) return only a short description snippet. Click **Fill Missing Descriptions** on the Home page to trigger the `enrich_descriptions` background task. + +The enricher visits each job URL and attempts to extract the full description from the page HTML. This runs as a background task so you can continue using the UI. + +You can also enrich a specific job from the Job Review page by clicking the refresh icon next to its description. + +--- + +## Keyword Matching + +After discovery, `scripts/match.py` scores each new job by comparing the job description against your resume keywords (from `config/resume_keywords.yaml`). The score is stored as `match_score` (0–100). Gaps are stored as `keyword_gaps` (comma-separated missing keywords). + +Both fields appear in the Job Review queue and can be used to sort and prioritise jobs. diff --git a/docs/user-guide/job-review.md b/docs/user-guide/job-review.md new file mode 100644 index 0000000..f58bcdb --- /dev/null +++ b/docs/user-guide/job-review.md @@ -0,0 +1,70 @@ +# Job Review + +The Job Review page is where you approve or reject newly discovered jobs before they enter the application pipeline. + +--- + +## The Pending Queue + +All jobs with status `pending` appear in the review queue. Jobs with email leads (matching email threads already in the `job_contacts` table) are sorted to the top of the queue automatically. + +--- + +## Sorting Options + +Use the sort control at the top of the page to order the queue: + +| Sort option | Description | +|-------------|-------------| +| **Match score (high to low)** | Jobs with the strongest keyword match appear first | +| **Match score (low to high)** | Useful for finding niche roles that scored low but are still interesting | +| **Date found (newest)** | Most recently discovered jobs first | +| **Date found (oldest)** | Oldest jobs first (useful for clearing a backlog) | +| **Company (A-Z)** | Alphabetical by company name | + +--- + +## Match Score and Keyword Gaps + +Each job card shows: + +- **Match score** (0–100) — percentage of your resume keywords found in the job description +- **Keyword gaps** — specific keywords from your profile that the job description is missing + +A high match score does not guarantee a good fit; use it as a signal to prioritise your review, not as a final filter. + +--- + +## Reviewing Jobs + +For each job in the queue you can: + +- **Approve** — moves the job to `approved` status, making it available in the Apply Workspace +- **Reject** — moves the job to `rejected` status and removes it from the queue +- **Skip** — leaves the job in `pending` for a later review session + +### Batch actions + +Use the checkboxes to select multiple jobs at once, then click **Approve selected** or **Reject selected** to process them in bulk. + +--- + +## Job Detail View + +Click a job title to expand the full detail view, which shows: + +- Full job description +- Company name and location +- Source board and original URL +- Salary (if available) +- Remote/on-site status +- Match score and keyword gaps +- Any email threads already linked to this job + +--- + +## After Approval + +Approved jobs appear in the **Apply Workspace** (page 4). From there you can generate a cover letter, export a PDF, and mark the job as applied. + +If you decide not to apply after approving, you can reject the listing from within the Apply Workspace without losing your cover letter draft. diff --git a/docs/user-guide/settings.md b/docs/user-guide/settings.md new file mode 100644 index 0000000..23ab8eb --- /dev/null +++ b/docs/user-guide/settings.md @@ -0,0 +1,152 @@ +# Settings + +The Settings page is accessible from the sidebar. It contains all configuration for Peregrine, organised into tabs. + +--- + +## My Profile + +Personal information used in cover letters, research briefs, and interview prep. + +| Field | Description | +|-------|-------------| +| Name | Your full name | +| Email | Contact email address | +| Phone | Contact phone number | +| LinkedIn | LinkedIn profile URL | +| Career summary | 2–4 sentence professional summary | +| NDA companies | Companies you cannot mention in research briefs (previous employers under NDA) | +| Docs directory | Where PDFs and exported documents are saved (default: `~/Documents/JobSearch`) | + +### Mission Preferences + +Optional notes about industries you genuinely care about. When the cover letter generator detects alignment with one of these industries, it injects your note into paragraph 3 of the cover letter. + +| Field | Tag | Example | +|-------|-----|---------| +| Music industry note | `music` | "I've played in bands for 15 years and care deeply about how artists get paid" | +| Animal welfare note | `animal_welfare` | "I volunteer at my local shelter every weekend" | +| Education note | `education` | "I tutored underserved kids and care deeply about literacy" | + +Leave a field blank to use a generic default when alignment is detected. + +### Research Brief Preferences + +Controls optional sections in company research briefs. Both are for personal decision-making only and are never included in applications. + +| Setting | Section added | +|---------|--------------| +| Candidate accessibility focus | Disability inclusion and accessibility signals (ADA, ERGs, WCAG) | +| Candidate LGBTQIA+ focus | LGBTQIA+ inclusion signals (ERGs, non-discrimination policies, culture) | + +--- + +## Search + +Manage search profiles. Equivalent to editing `config/search_profiles.yaml` directly, but with a form UI. + +- Add, edit, and delete profiles +- Configure titles, locations, boards, custom boards, exclude keywords, and mission tags +- Changes are saved to `config/search_profiles.yaml` + +--- + +## LLM Backends + +Configure which LLM backends Peregrine uses and in what order. + +| Setting | Description | +|---------|-------------| +| Enabled toggle | Whether a backend is considered in the fallback chain | +| Base URL | API endpoint (for `openai_compat` backends) | +| Model | Model name or `__auto__` (vLLM auto-detects the loaded model) | +| API key | API key if required | +| Test button | Sends a short ping to verify the backend is reachable | + +### Fallback chains + +Three independent fallback chains are configured: + +| Chain | Used for | +|-------|---------| +| `fallback_order` | Cover letter generation and general tasks | +| `research_fallback_order` | Company research briefs | +| `vision_fallback_order` | Survey screenshot analysis | + +--- + +## Notion + +Configure Notion integration credentials. Requires: +- Notion integration token (from [notion.so/my-integrations](https://www.notion.so/my-integrations)) +- Database ID (from the Notion database URL) + +The field map controls which Notion properties correspond to which Peregrine fields. Edit `config/notion.yaml` directly for advanced field mapping. + +--- + +## Services + +Connection settings for local services: + +| Service | Default host:port | +|---------|-----------------| +| Ollama | localhost:11434 | +| vLLM | localhost:8000 | +| SearXNG | localhost:8888 | + +Each service has SSL and SSL-verify toggles for reverse-proxy setups. + +--- + +## Resume Profile + +Edit your parsed resume data (work experience, education, skills, certifications). This is the same data extracted during the first-run wizard Resume step. + +Changes here affect all future cover letter generations. + +--- + +## Email + +Configure IMAP email sync. See [Email Sync](email-sync.md) for full setup instructions. + +--- + +## Skills + +Manage your `config/resume_keywords.yaml` — the list of skills and keywords used for match scoring. + +Add or remove keywords. Higher-weighted keywords count more toward the match score. + +--- + +## Integrations + +Connection cards for all 13 integrations. See [Integrations](integrations.md) for per-service details. + +--- + +## Fine-Tune + +**Tier: Premium** + +Tools for fine-tuning a cover letter model on your personal writing style. + +- Export cover letter training data as JSONL +- Configure training parameters (rank, epochs, learning rate) +- Start a fine-tuning run (requires `ogma` conda environment with Unsloth) +- Register the output model with Ollama + +--- + +## Developer + +Developer and debugging tools. + +| Option | Description | +|--------|-------------| +| Reset wizard | Sets `wizard_complete: false` and `wizard_step: 0`; resumes at step 1 on next page load | +| Dev tier override | Set `dev_tier_override` to `paid` or `premium` to test tier-gated features locally | +| Clear stuck tasks | Manually sets any `running` or `queued` background tasks to `failed` (also runs on app startup) | +| View raw config | Shows the current `config/user.yaml` contents | diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..b908b75 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,67 @@ +site_name: Peregrine +site_description: AI-powered job search pipeline +site_author: Circuit Forge LLC +site_url: https://docs.circuitforge.io/peregrine +repo_url: https://git.circuitforge.io/circuitforge/peregrine +repo_name: circuitforge/peregrine + +theme: + name: material + palette: + - scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: indigo + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + features: + - navigation.tabs + - navigation.sections + - navigation.expand + - navigation.top + - search.suggest + - search.highlight + - content.code.copy + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.tabbed: + alternate_style: true + - tables + - toc: + permalink: true + +nav: + - Home: index.md + - Getting Started: + - Installation: getting-started/installation.md + - First-Run Wizard: getting-started/first-run-wizard.md + - Docker Profiles: getting-started/docker-profiles.md + - User Guide: + - Job Discovery: user-guide/job-discovery.md + - Job Review: user-guide/job-review.md + - Apply Workspace: user-guide/apply-workspace.md + - Interviews: user-guide/interviews.md + - Email Sync: user-guide/email-sync.md + - Integrations: user-guide/integrations.md + - Settings: user-guide/settings.md + - Developer Guide: + - Contributing: developer-guide/contributing.md + - Architecture: developer-guide/architecture.md + - Adding a Scraper: developer-guide/adding-scrapers.md + - Adding an Integration: developer-guide/adding-integrations.md + - Testing: developer-guide/testing.md + - Reference: + - Tier System: reference/tier-system.md + - LLM Router: reference/llm-router.md + - Config Files: reference/config-files.md