Compare commits
No commits in common. "bc5f771e43a82e85577bc9aaa42b66c3cc3d6497" and "11f6334f28319767d41072cf2593680e68eb6650" have entirely different histories.
bc5f771e43
...
11f6334f28
52 changed files with 220 additions and 2256 deletions
|
|
@ -1,30 +0,0 @@
|
|||
---
|
||||
name: Bug report
|
||||
about: Something isn't working correctly
|
||||
labels: bug
|
||||
---
|
||||
|
||||
## Describe the bug
|
||||
|
||||
<!-- A clear description of what went wrong. -->
|
||||
|
||||
## Steps to reproduce
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
## Expected behaviour
|
||||
|
||||
## Actual behaviour
|
||||
|
||||
<!-- Paste relevant log output below (redact any API keys or personal info): -->
|
||||
|
||||
```
|
||||
|
||||
## Environment
|
||||
|
||||
- Peregrine version: <!-- output of `./manage.sh status` or git tag -->
|
||||
- OS:
|
||||
- Runtime: Docker / conda-direct
|
||||
- GPU profile: remote / cpu / single-gpu / dual-gpu
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
---
|
||||
name: Feature request
|
||||
about: Suggest an improvement or new capability
|
||||
labels: enhancement
|
||||
---
|
||||
|
||||
## Problem statement
|
||||
|
||||
<!-- What are you trying to do that's currently hard or impossible? -->
|
||||
|
||||
## Proposed solution
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
## Which tier would this belong to?
|
||||
|
||||
- [ ] Free
|
||||
- [ ] Paid
|
||||
- [ ] Premium
|
||||
- [ ] Ultra (human-in-the-loop)
|
||||
- [ ] Not sure
|
||||
|
||||
## Would you be willing to contribute a PR?
|
||||
|
||||
- [ ] Yes
|
||||
- [ ] No
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# .githooks/commit-msg — enforces conventional commit format
|
||||
# Format: type: description OR type(scope): description
|
||||
set -euo pipefail
|
||||
|
||||
RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'
|
||||
|
||||
VALID_TYPES="feat|fix|docs|chore|test|refactor|perf|ci|build"
|
||||
MSG_FILE="$1"
|
||||
MSG=$(head -1 "$MSG_FILE")
|
||||
|
||||
if [[ -z "${MSG// }" ]]; then
|
||||
echo -e "${RED}Commit rejected:${NC} Commit message is empty."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! echo "$MSG" | grep -qE "^($VALID_TYPES)(\(.+\))?: .+"; then
|
||||
echo -e "${RED}Commit rejected:${NC} Message does not follow conventional commit format."
|
||||
echo ""
|
||||
echo -e " Required: ${YELLOW}type: description${NC} or ${YELLOW}type(scope): description${NC}"
|
||||
echo -e " Valid types: ${YELLOW}$VALID_TYPES${NC}"
|
||||
echo ""
|
||||
echo -e " Your message: ${YELLOW}$MSG${NC}"
|
||||
echo ""
|
||||
echo -e " Examples:"
|
||||
echo -e " ${YELLOW}feat: add cover letter refinement${NC}"
|
||||
echo -e " ${YELLOW}fix(wizard): handle missing user.yaml gracefully${NC}"
|
||||
echo -e " ${YELLOW}docs: update tier system reference${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
# .githooks/pre-commit — blocks sensitive files and API key patterns
|
||||
set -euo pipefail
|
||||
|
||||
RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'
|
||||
|
||||
BLOCKED_PATHS=(
|
||||
"config/user.yaml"
|
||||
"config/server.yaml"
|
||||
"config/llm.yaml"
|
||||
"config/notion.yaml"
|
||||
"config/adzuna.yaml"
|
||||
"config/label_tool.yaml"
|
||||
".env"
|
||||
)
|
||||
|
||||
BLOCKED_PATTERNS=(
|
||||
"data/.*\.db$"
|
||||
"data/.*\.jsonl$"
|
||||
"demo/data/.*\.db$"
|
||||
)
|
||||
|
||||
KEY_REGEXES=(
|
||||
'sk-[A-Za-z0-9]{20,}'
|
||||
'Bearer [A-Za-z0-9\-_]{20,}'
|
||||
'api_key:[[:space:]]*["\x27]?[A-Za-z0-9\-_]{16,}'
|
||||
)
|
||||
|
||||
ERRORS=0
|
||||
|
||||
# Get list of staged files
|
||||
EMPTY_TREE="4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
||||
mapfile -t staged_files < <(git diff-index --cached --name-only HEAD 2>/dev/null || \
|
||||
git diff-index --cached --name-only "$EMPTY_TREE")
|
||||
|
||||
for file in "${staged_files[@]}"; do
|
||||
# Exact path blocklist
|
||||
for blocked in "${BLOCKED_PATHS[@]}"; do
|
||||
if [[ "$file" == "$blocked" ]]; then
|
||||
echo -e "${RED}BLOCKED:${NC} $file is in the sensitive file blocklist."
|
||||
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Pattern blocklist
|
||||
for pattern in "${BLOCKED_PATTERNS[@]}"; do
|
||||
if echo "$file" | grep -qE "$pattern"; then
|
||||
echo -e "${RED}BLOCKED:${NC} $file matches sensitive path pattern ($pattern)."
|
||||
echo -e " Add to .gitignore or: ${YELLOW}git restore --staged $file${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Content scan for key patterns (only on existing staged files)
|
||||
if [[ -f "$file" ]]; then
|
||||
staged_content=$(git diff --cached -- "$file" 2>/dev/null | grep '^+' | grep -v '^+++' || true)
|
||||
for regex in "${KEY_REGEXES[@]}"; do
|
||||
if echo "$staged_content" | grep -qE "$regex"; then
|
||||
echo -e "${RED}BLOCKED:${NC} $file appears to contain an API key or token."
|
||||
echo -e " Pattern matched: ${YELLOW}$regex${NC}"
|
||||
echo -e " Review with: ${YELLOW}git diff --cached -- $file${NC}"
|
||||
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $ERRORS -gt 0 ]]; then
|
||||
echo ""
|
||||
echo -e "${RED}Commit blocked.${NC} Fix the issues above and try again."
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
30
.github/ISSUE_TEMPLATE/bug_report.md
vendored
30
.github/ISSUE_TEMPLATE/bug_report.md
vendored
|
|
@ -1,30 +0,0 @@
|
|||
---
|
||||
name: Bug report
|
||||
about: Something isn't working correctly
|
||||
labels: bug
|
||||
---
|
||||
|
||||
## Describe the bug
|
||||
|
||||
<!-- A clear description of what went wrong. -->
|
||||
|
||||
## Steps to reproduce
|
||||
|
||||
1.
|
||||
2.
|
||||
3.
|
||||
|
||||
## Expected behaviour
|
||||
|
||||
## Actual behaviour
|
||||
|
||||
<!-- Paste relevant log output below (redact any API keys or personal info): -->
|
||||
|
||||
```
|
||||
|
||||
## Environment
|
||||
|
||||
- Peregrine version: <!-- output of `./manage.sh status` or git tag -->
|
||||
- OS:
|
||||
- Runtime: Docker / conda-direct
|
||||
- GPU profile: remote / cpu / single-gpu / dual-gpu
|
||||
5
.github/ISSUE_TEMPLATE/config.yml
vendored
5
.github/ISSUE_TEMPLATE/config.yml
vendored
|
|
@ -1,5 +0,0 @@
|
|||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Security vulnerability
|
||||
url: mailto:security@circuitforge.tech
|
||||
about: Do not open a public issue for security vulnerabilities. Email us instead.
|
||||
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
|
|
@ -1,26 +0,0 @@
|
|||
---
|
||||
name: Feature request
|
||||
about: Suggest an improvement or new capability
|
||||
labels: enhancement
|
||||
---
|
||||
|
||||
## Problem statement
|
||||
|
||||
<!-- What are you trying to do that's currently hard or impossible? -->
|
||||
|
||||
## Proposed solution
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
## Which tier would this belong to?
|
||||
|
||||
- [ ] Free
|
||||
- [ ] Paid
|
||||
- [ ] Premium
|
||||
- [ ] Ultra (human-in-the-loop)
|
||||
- [ ] Not sure
|
||||
|
||||
## Would you be willing to contribute a PR?
|
||||
|
||||
- [ ] Yes
|
||||
- [ ] No
|
||||
27
.github/pull_request_template.md
vendored
27
.github/pull_request_template.md
vendored
|
|
@ -1,27 +0,0 @@
|
|||
## Summary
|
||||
|
||||
<!-- What does this PR do? -->
|
||||
|
||||
## Related issue(s)
|
||||
|
||||
Closes #
|
||||
|
||||
## Type of change
|
||||
|
||||
- [ ] feat — new feature
|
||||
- [ ] fix — bug fix
|
||||
- [ ] docs — documentation only
|
||||
- [ ] chore — tooling, deps, refactor
|
||||
- [ ] test — test coverage
|
||||
|
||||
## Testing
|
||||
|
||||
<!-- What did you run to verify this works? -->
|
||||
|
||||
```bash
|
||||
pytest tests/ -v
|
||||
```
|
||||
|
||||
## CLA
|
||||
|
||||
- [ ] I agree that my contribution is licensed under the project's [BSL 1.1](./LICENSE-BSL) terms.
|
||||
26
.github/workflows/ci.yml
vendored
26
.github/workflows/ci.yml
vendored
|
|
@ -1,26 +0,0 @@
|
|||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: pip
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install -r requirements.txt
|
||||
|
||||
- name: Run tests
|
||||
run: pytest tests/ -v --tb=short
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -37,10 +37,4 @@ config/user.yaml.working
|
|||
CLAUDE.md
|
||||
|
||||
data/email_score.jsonl
|
||||
data/email_label_queue.jsonl
|
||||
data/email_compare_sample.jsonl
|
||||
|
||||
config/label_tool.yaml
|
||||
config/server.yaml
|
||||
|
||||
demo/data/*.db
|
||||
|
|
|
|||
|
|
@ -1,83 +1,13 @@
|
|||
# Contributing to Peregrine
|
||||
|
||||
Thanks for your interest. Peregrine is developed primarily at
|
||||
[git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/pyr0ball/peregrine).
|
||||
GitHub and Codeberg are push mirrors — issues and PRs are welcome on either platform.
|
||||
See the full contributing guide in the documentation:
|
||||
https://docs.circuitforge.io/peregrine/developer-guide/contributing/
|
||||
|
||||
---
|
||||
## Quick start
|
||||
|
||||
## License
|
||||
1. Fork the repo and create a feature branch (`feat/my-feature`)
|
||||
2. Set up the dev environment: `conda env create -f environment.yml`
|
||||
3. Run tests: `conda run -n job-seeker python -m pytest tests/ -v`
|
||||
4. Open a pull request — all CI checks must pass
|
||||
|
||||
Peregrine is licensed under **[BSL 1.1](./LICENSE-BSL)** — Business Source License.
|
||||
|
||||
What this means for you:
|
||||
|
||||
| Use case | Allowed? |
|
||||
|----------|----------|
|
||||
| Personal self-hosting, non-commercial | ✅ Free |
|
||||
| Contributing code, fixing bugs, writing docs | ✅ Free |
|
||||
| Commercial SaaS / hosted service | 🔒 Requires a paid license |
|
||||
| After 4 years from each release date | ✅ Converts to MIT |
|
||||
|
||||
**By submitting a pull request you agree that your contribution is licensed under the
|
||||
project's BSL 1.1 terms.** The PR template includes this as a checkbox.
|
||||
|
||||
---
|
||||
|
||||
## Dev Setup
|
||||
|
||||
See [`docs/getting-started/installation.md`](docs/getting-started/installation.md) for
|
||||
full instructions.
|
||||
|
||||
**Quick start (Docker — recommended):**
|
||||
|
||||
```bash
|
||||
git clone https://git.opensourcesolarpunk.com/pyr0ball/peregrine.git
|
||||
cd peregrine
|
||||
./setup.sh # installs deps, activates git hooks
|
||||
./manage.sh start
|
||||
```
|
||||
|
||||
**Conda (no Docker):**
|
||||
|
||||
```bash
|
||||
conda run -n job-seeker pip install -r requirements.txt
|
||||
streamlit run app/app.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Commit Format
|
||||
|
||||
Hooks enforce [Conventional Commits](https://www.conventionalcommits.org/):
|
||||
|
||||
```
|
||||
type: short description
|
||||
type(scope): short description
|
||||
```
|
||||
|
||||
Valid types: `feat` `fix` `docs` `chore` `test` `refactor` `perf` `ci` `build`
|
||||
|
||||
The hook will tell you exactly what went wrong if your message is rejected.
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
1. Fork and branch from `main`
|
||||
2. Write tests first (we use `pytest`)
|
||||
3. Run `pytest tests/ -v` — all tests must pass
|
||||
4. Open a PR on GitHub or Codeberg
|
||||
5. PRs are reviewed and cherry-picked to Forgejo (the canonical repo) — you don't need a Forgejo account
|
||||
|
||||
---
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
Use the issue templates:
|
||||
|
||||
- **Bug** — steps to reproduce, version, OS, Docker or conda, logs
|
||||
- **Feature** — problem statement, proposed solution, which tier it belongs to
|
||||
|
||||
**Security issues:** Do **not** open a public issue. Email `security@circuitforge.tech`.
|
||||
See [SECURITY.md](./SECURITY.md).
|
||||
See the docs for: adding custom scrapers, adding integrations, code style, and PR checklist.
|
||||
|
|
|
|||
37
README.md
37
README.md
|
|
@ -1,10 +1,5 @@
|
|||
# Peregrine
|
||||
|
||||
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/pyr0ball/peregrine) — GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
|
||||
|
||||
[](./LICENSE-BSL)
|
||||
[](https://github.com/CircuitForge/peregrine/actions/workflows/ci.yml)
|
||||
|
||||
**AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**
|
||||
|
||||
> *"Don't be evil, for real and forever."*
|
||||
|
|
@ -28,9 +23,9 @@ cd peregrine
|
|||
|
||||
```bash
|
||||
./manage.sh start # remote profile (API-only, no GPU)
|
||||
./manage.sh start --profile cpu # local Ollama (CPU, or Metal GPU on Apple Silicon — see below)
|
||||
./manage.sh start --profile single-gpu # Ollama + Vision on GPU 0 (NVIDIA only)
|
||||
./manage.sh start --profile dual-gpu # Ollama + Vision + vLLM (GPU 0 + 1) (NVIDIA only)
|
||||
./manage.sh start --profile cpu # local Ollama on CPU
|
||||
./manage.sh start --profile single-gpu # Ollama + Vision on GPU 0
|
||||
./manage.sh start --profile dual-gpu # Ollama + Vision + vLLM (GPU 0 + 1)
|
||||
```
|
||||
|
||||
Or use `make` directly:
|
||||
|
|
@ -42,7 +37,7 @@ make start PROFILE=single-gpu
|
|||
|
||||
**3.** Open http://localhost:8501 — the setup wizard guides you through the rest.
|
||||
|
||||
> **macOS / Apple Silicon:** Docker Desktop must be running. For Metal GPU-accelerated inference, install Ollama natively before starting — `setup.sh` will prompt you to do this. See [Apple Silicon GPU](#apple-silicon-gpu) below.
|
||||
> **macOS:** Docker Desktop must be running before starting.
|
||||
> **Windows:** Not supported — use WSL2 with Ubuntu.
|
||||
|
||||
### Installing to `/opt` or other system directories
|
||||
|
|
@ -78,25 +73,9 @@ After `./manage.sh setup`, log out and back in for docker group membership to ta
|
|||
| Profile | Services started | Use case |
|
||||
|---------|-----------------|----------|
|
||||
| `remote` | app + searxng | No GPU; LLM calls go to Anthropic / OpenAI |
|
||||
| `cpu` | app + ollama + searxng | No GPU; local models on CPU. On Apple Silicon, use with native Ollama for Metal acceleration — see below. |
|
||||
| `single-gpu` | app + ollama + vision + searxng | One **NVIDIA** GPU: cover letters, research, vision |
|
||||
| `dual-gpu` | app + ollama + vllm + vision + searxng | Two **NVIDIA** GPUs: GPU 0 = Ollama, GPU 1 = vLLM |
|
||||
|
||||
### Apple Silicon GPU
|
||||
|
||||
Docker Desktop on macOS runs in a Linux VM — it cannot access the Apple GPU. Metal-accelerated inference requires Ollama to run **natively** on the host.
|
||||
|
||||
`setup.sh` handles this automatically: it offers to install Ollama via Homebrew, starts it as a background service, and explains what happens next. If Ollama is running on port 11434 when you start Peregrine, preflight detects it, stubs out the Docker Ollama container, and routes inference through the native process — which uses Metal automatically.
|
||||
|
||||
To do it manually:
|
||||
|
||||
```bash
|
||||
brew install ollama
|
||||
brew services start ollama # starts at login, uses Metal GPU
|
||||
./manage.sh start --profile cpu # preflight adopts native Ollama; Docker container is skipped
|
||||
```
|
||||
|
||||
The `cpu` profile label is a slight misnomer in this context — Ollama will be running on the GPU. `single-gpu` and `dual-gpu` profiles are NVIDIA-specific and not applicable on Mac.
|
||||
| `cpu` | app + ollama + searxng | No GPU; local models on CPU (slow) |
|
||||
| `single-gpu` | app + ollama + vision + searxng | One GPU: cover letters, research, vision |
|
||||
| `dual-gpu` | app + ollama + vllm + vision + searxng | GPU 0 = Ollama, GPU 1 = vLLM |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -104,7 +83,7 @@ The `cpu` profile label is a slight misnomer in this context — Ollama will be
|
|||
|
||||
On first launch the setup wizard walks through seven steps:
|
||||
|
||||
1. **Hardware** — detects NVIDIA GPUs (Linux) or Apple Silicon GPU (macOS) and recommends a profile
|
||||
1. **Hardware** — detects NVIDIA GPUs and recommends a profile
|
||||
2. **Tier** — choose free, paid, or premium (or use `dev_tier_override` for local testing)
|
||||
3. **Identity** — name, email, phone, LinkedIn, career summary
|
||||
4. **Resume** — upload a PDF/DOCX for LLM parsing, or use the guided form builder
|
||||
|
|
|
|||
26
SECURITY.md
26
SECURITY.md
|
|
@ -1,26 +0,0 @@
|
|||
# Security Policy
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
**Do not open a GitHub or Codeberg issue for security vulnerabilities.**
|
||||
|
||||
Email: `security@circuitforge.tech`
|
||||
|
||||
Include:
|
||||
- A description of the vulnerability
|
||||
- Steps to reproduce
|
||||
- Potential impact
|
||||
- Any suggested fix (optional)
|
||||
|
||||
**Response target:** 72 hours for acknowledgement, 14 days for triage.
|
||||
|
||||
We follow responsible disclosure — we will coordinate a fix and release before any
|
||||
public disclosure and will credit you in the release notes unless you prefer to remain
|
||||
anonymous.
|
||||
|
||||
## Supported Versions
|
||||
|
||||
| Version | Supported |
|
||||
|---------|-----------|
|
||||
| Latest release | ✅ |
|
||||
| Older releases | ❌ — please upgrade |
|
||||
12
app/app.py
12
app/app.py
|
|
@ -8,7 +8,6 @@ Run: streamlit run app/app.py
|
|||
bash scripts/manage-ui.sh start
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
|
@ -17,8 +16,6 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
|
||||
logging.basicConfig(level=logging.WARNING, format="%(name)s %(levelname)s: %(message)s")
|
||||
|
||||
IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
||||
|
||||
import streamlit as st
|
||||
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
|
||||
import sqlite3
|
||||
|
|
@ -79,7 +76,7 @@ except Exception:
|
|||
from scripts.user_profile import UserProfile as _UserProfile
|
||||
_USER_YAML = Path(__file__).parent.parent / "config" / "user.yaml"
|
||||
|
||||
_show_wizard = not IS_DEMO and (
|
||||
_show_wizard = (
|
||||
not _UserProfile.exists(_USER_YAML)
|
||||
or not _UserProfile(_USER_YAML).wizard_complete
|
||||
)
|
||||
|
|
@ -154,13 +151,6 @@ def _get_version() -> str:
|
|||
return "dev"
|
||||
|
||||
with st.sidebar:
|
||||
if IS_DEMO:
|
||||
st.info(
|
||||
"**Public demo** — read-only sample data. "
|
||||
"AI features and data saves are disabled.\n\n"
|
||||
"[Get your own instance →](https://circuitforge.tech/software/peregrine)",
|
||||
icon="🔒",
|
||||
)
|
||||
_task_indicator()
|
||||
st.divider()
|
||||
st.caption(f"Peregrine {_get_version()}")
|
||||
|
|
|
|||
|
|
@ -105,11 +105,10 @@ def _generation_widget(section: str, label: str, tier: str,
|
|||
Call this inside a step to add LLM generation support.
|
||||
The caller decides whether to auto-populate a field with the result.
|
||||
"""
|
||||
from app.wizard.tiers import can_use, tier_label as tl, has_configured_llm
|
||||
from app.wizard.tiers import can_use, tier_label as tl
|
||||
|
||||
_has_byok = has_configured_llm()
|
||||
if not can_use(tier, feature_key, has_byok=_has_byok):
|
||||
st.caption(f"{tl(feature_key, has_byok=_has_byok)} {label}")
|
||||
if not can_use(tier, feature_key):
|
||||
st.caption(f"{tl(feature_key)} {label}")
|
||||
return None
|
||||
|
||||
col_btn, col_fb = st.columns([2, 5])
|
||||
|
|
|
|||
|
|
@ -81,8 +81,6 @@ Return ONLY valid JSON in this exact format:
|
|||
_show_finetune = bool(_profile and _profile.inference_profile in ("single-gpu", "dual-gpu"))
|
||||
|
||||
USER_CFG = CONFIG_DIR / "user.yaml"
|
||||
SERVER_CFG = CONFIG_DIR / "server.yaml"
|
||||
SERVER_CFG_EXAMPLE = CONFIG_DIR / "server.yaml.example"
|
||||
|
||||
_dev_mode = _os.getenv("DEV_MODE", "").lower() in ("true", "1", "yes")
|
||||
_u_for_dev = yaml.safe_load(USER_CFG.read_text()) or {} if USER_CFG.exists() else {}
|
||||
|
|
@ -90,20 +88,19 @@ _show_dev_tab = _dev_mode or bool(_u_for_dev.get("dev_tier_override"))
|
|||
|
||||
_tab_names = [
|
||||
"👤 My Profile", "📝 Resume Profile", "🔎 Search",
|
||||
"⚙️ System", "🎯 Fine-Tune", "🔑 License", "💾 Data"
|
||||
"⚙️ System", "🎯 Fine-Tune", "🔑 License"
|
||||
]
|
||||
if _show_dev_tab:
|
||||
_tab_names.append("🛠️ Developer")
|
||||
_all_tabs = st.tabs(_tab_names)
|
||||
tab_profile, tab_resume, tab_search, tab_system, tab_finetune, tab_license, tab_data = _all_tabs[:7]
|
||||
tab_profile, tab_resume, tab_search, tab_system, tab_finetune, tab_license = _all_tabs[:6]
|
||||
|
||||
# ── Inline LLM generate buttons ───────────────────────────────────────────────
|
||||
# Unlocked when user has a configured LLM backend (BYOK) OR a paid tier.
|
||||
# Paid-tier feature: ✨ Generate buttons sit directly below each injectable field.
|
||||
# Writes into session state keyed to the widget's `key=` param, then reruns.
|
||||
from app.wizard.tiers import can_use as _cu, has_configured_llm as _has_llm
|
||||
_byok = _has_llm()
|
||||
from app.wizard.tiers import can_use as _cu
|
||||
_gen_panel_active = bool(_profile) and _cu(
|
||||
_profile.effective_tier if _profile else "free", "llm_career_summary", has_byok=_byok
|
||||
_profile.effective_tier if _profile else "free", "llm_career_summary"
|
||||
)
|
||||
|
||||
# Seed session state for LLM-injectable text fields on first load
|
||||
|
|
@ -252,7 +249,7 @@ with tab_profile:
|
|||
st.rerun()
|
||||
|
||||
if not _can_generate:
|
||||
st.caption("✨ AI generation requires a paid tier or a configured LLM backend (BYOK).")
|
||||
st.caption("✨ AI generation requires a paid tier.")
|
||||
|
||||
_mission_updated = {
|
||||
r["key"]: r["value"]
|
||||
|
|
@ -876,55 +873,6 @@ with tab_system:
|
|||
|
||||
st.divider()
|
||||
|
||||
# ── Deployment / Server ───────────────────────────────────────────────────
|
||||
with st.expander("🖥️ Deployment / Server", expanded=False):
|
||||
st.caption(
|
||||
"Settings that affect how Peregrine is served. "
|
||||
"Changes require a restart (`./manage.sh restart`) to take effect."
|
||||
)
|
||||
|
||||
_srv = _yaml_up.safe_load(SERVER_CFG.read_text()) if SERVER_CFG.exists() else {}
|
||||
_srv_example = _yaml_up.safe_load(SERVER_CFG_EXAMPLE.read_text()) if SERVER_CFG_EXAMPLE.exists() else {}
|
||||
_srv_defaults = {**_srv_example, **_srv}
|
||||
|
||||
_active_base_url = _os.environ.get("STREAMLIT_SERVER_BASE_URL_PATH", "")
|
||||
if _active_base_url:
|
||||
st.info(f"**Active base URL path:** `/{_active_base_url}` (set via environment)")
|
||||
else:
|
||||
st.info("**Active base URL path:** *(none — serving at root `/`)*")
|
||||
|
||||
s_base_url = st.text_input(
|
||||
"Base URL path",
|
||||
value=_srv_defaults.get("base_url_path", ""),
|
||||
placeholder="e.g. peregrine",
|
||||
help=(
|
||||
"URL prefix when serving behind a reverse proxy at a sub-path. "
|
||||
"Leave empty for direct access. "
|
||||
"Maps to STREAMLIT_BASE_URL_PATH in .env.\n\n"
|
||||
"Docs: https://docs.streamlit.io/develop/api-reference/configuration/config.toml#server.baseUrlPath"
|
||||
),
|
||||
)
|
||||
s_server_port = st.number_input(
|
||||
"Container port",
|
||||
value=int(_srv_defaults.get("server_port", 8501)),
|
||||
min_value=1024, max_value=65535, step=1,
|
||||
help="Port Streamlit listens on inside the container. The host port is set via STREAMLIT_PORT in .env.",
|
||||
)
|
||||
|
||||
if st.button("💾 Save Deployment Settings", key="save_server"):
|
||||
_new_srv = {"base_url_path": s_base_url.strip(), "server_port": int(s_server_port)}
|
||||
save_yaml(SERVER_CFG, _new_srv)
|
||||
# Mirror base_url_path into .env so compose picks it up on next restart
|
||||
_env_path = Path(__file__).parent.parent.parent / ".env"
|
||||
if _env_path.exists():
|
||||
_env_lines = [l for l in _env_path.read_text().splitlines()
|
||||
if not l.startswith("STREAMLIT_BASE_URL_PATH=")]
|
||||
_env_lines.append(f"STREAMLIT_BASE_URL_PATH={s_base_url.strip()}")
|
||||
_env_path.write_text("\n".join(_env_lines) + "\n")
|
||||
st.success("Deployment settings saved. Run `./manage.sh restart` to apply.")
|
||||
|
||||
st.divider()
|
||||
|
||||
# ── LLM Backends ─────────────────────────────────────────────────────────
|
||||
with st.expander("🤖 LLM Backends", expanded=False):
|
||||
import requests as _req
|
||||
|
|
@ -1390,99 +1338,6 @@ with tab_license:
|
|||
except Exception as _e:
|
||||
st.error(f"Activation failed: {_e}")
|
||||
|
||||
# ── Data tab — Backup / Restore / Teleport ────────────────────────────────────
|
||||
with tab_data:
|
||||
st.subheader("💾 Backup / Restore / Teleport")
|
||||
st.caption(
|
||||
"Export all your personal configs and job data as a portable zip. "
|
||||
"Use to migrate between machines, back up before testing, or transfer to a new Docker volume."
|
||||
)
|
||||
|
||||
from scripts.backup import create_backup, list_backup_contents, restore_backup as _do_restore
|
||||
|
||||
_base_dir = Path(__file__).parent.parent.parent
|
||||
|
||||
# ── Backup ────────────────────────────────────────────────────────────────
|
||||
st.markdown("### 📦 Create Backup")
|
||||
_incl_db = st.checkbox("Include staging.db (job data)", value=True, key="backup_incl_db")
|
||||
if st.button("Create Backup", key="backup_create"):
|
||||
with st.spinner("Creating backup…"):
|
||||
try:
|
||||
_zip_bytes = create_backup(_base_dir, include_db=_incl_db)
|
||||
_info = list_backup_contents(_zip_bytes)
|
||||
from datetime import datetime as _dt
|
||||
_ts = _dt.now().strftime("%Y%m%d-%H%M%S")
|
||||
_fname = f"peregrine-backup-{_ts}.zip"
|
||||
st.success(
|
||||
f"Backup ready — {len(_info['files'])} files, "
|
||||
f"{_info['total_bytes'] / 1024:.0f} KB uncompressed"
|
||||
)
|
||||
st.download_button(
|
||||
label="⬇️ Download backup zip",
|
||||
data=_zip_bytes,
|
||||
file_name=_fname,
|
||||
mime="application/zip",
|
||||
key="backup_download",
|
||||
)
|
||||
with st.expander("Files included"):
|
||||
for _fn in _info["files"]:
|
||||
_sz = _info["sizes"].get(_fn, 0)
|
||||
st.caption(f"`{_fn}` — {_sz:,} bytes")
|
||||
except Exception as _e:
|
||||
st.error(f"Backup failed: {_e}")
|
||||
|
||||
st.divider()
|
||||
|
||||
# ── Restore ───────────────────────────────────────────────────────────────
|
||||
st.markdown("### 📂 Restore from Backup")
|
||||
st.warning(
|
||||
"Restoring overwrites existing config files and (optionally) staging.db. "
|
||||
"Create a fresh backup first if you want to preserve current settings.",
|
||||
icon="⚠️",
|
||||
)
|
||||
_restore_file = st.file_uploader(
|
||||
"Upload backup zip", type=["zip"], key="restore_upload",
|
||||
help="Select a peregrine-backup-*.zip created by this tool."
|
||||
)
|
||||
_restore_db = st.checkbox("Restore staging.db (job data)", value=True, key="restore_incl_db")
|
||||
_restore_overwrite = st.checkbox("Overwrite existing files", value=True, key="restore_overwrite")
|
||||
|
||||
if _restore_file and st.button("Restore", type="primary", key="restore_go"):
|
||||
with st.spinner("Restoring…"):
|
||||
try:
|
||||
_zip_bytes = _restore_file.read()
|
||||
_result = _do_restore(
|
||||
_zip_bytes, _base_dir,
|
||||
include_db=_restore_db,
|
||||
overwrite=_restore_overwrite,
|
||||
)
|
||||
st.success(f"Restored {len(_result['restored'])} files.")
|
||||
with st.expander("Details"):
|
||||
for _fn in _result["restored"]:
|
||||
st.caption(f"✓ `{_fn}`")
|
||||
for _fn in _result["skipped"]:
|
||||
st.caption(f"— `{_fn}` (skipped)")
|
||||
st.info("Restart the app for changes to take effect.", icon="ℹ️")
|
||||
except Exception as _e:
|
||||
st.error(f"Restore failed: {_e}")
|
||||
|
||||
st.divider()
|
||||
|
||||
# ── Teleport ──────────────────────────────────────────────────────────────
|
||||
st.markdown("### 🚀 Teleport to Another Machine")
|
||||
st.markdown("""
|
||||
**How to move Peregrine to a new machine or Docker volume:**
|
||||
|
||||
1. **Here (source):** click **Create Backup** above and download the zip.
|
||||
2. **On the target machine:** clone the repo and run `./manage.sh start`.
|
||||
3. **In the target Peregrine UI:** go to Settings → 💾 Data → Restore from Backup and upload the zip.
|
||||
4. Restart the target app: `./manage.sh restart`.
|
||||
|
||||
The zip contains all gitignored configs (email credentials, Notion token, LLM settings, resume YAML)
|
||||
and optionally your staging database (all discovered/applied jobs, contacts, cover letters).
|
||||
""")
|
||||
|
||||
|
||||
# ── Developer tab ─────────────────────────────────────────────────────────────
|
||||
if _show_dev_tab:
|
||||
with _all_tabs[-1]:
|
||||
|
|
@ -1554,61 +1409,3 @@ if _show_dev_tab:
|
|||
st.error(f"Invalid token ({resp.status_code})")
|
||||
except Exception as e:
|
||||
st.error(f"Error: {e}")
|
||||
|
||||
st.divider()
|
||||
st.markdown("**📊 Export Classifier Training Data**")
|
||||
st.caption(
|
||||
"Exports inbound emails from `job_contacts` (labeled by the IMAP sync classifier) "
|
||||
"to `data/email_score.jsonl` for use with `scripts/benchmark_classifier.py --score`. "
|
||||
"⚠️ Labels are generated by llama3.1:8b — review before using as ground truth."
|
||||
)
|
||||
_db_candidates = [
|
||||
Path(__file__).parent.parent.parent / "data" / "staging.db",
|
||||
Path(__file__).parent.parent.parent / "staging.db",
|
||||
]
|
||||
_db_path = next((p for p in _db_candidates if p.exists()), None)
|
||||
_score_out = Path(__file__).parent.parent.parent / "data" / "email_score.jsonl"
|
||||
|
||||
if _db_path is None:
|
||||
st.warning("No `staging.db` found — run discovery first to create the database.")
|
||||
else:
|
||||
st.caption(f"Database: `{_db_path.name}` · Output: `data/email_score.jsonl`")
|
||||
if st.button("📤 Export DB labels → email_score.jsonl", key="dev_export_db"):
|
||||
import sqlite3 as _sqlite3
|
||||
from scripts.benchmark_classifier import LABELS as _BC_LABELS
|
||||
_conn = _sqlite3.connect(_db_path)
|
||||
_cur = _conn.cursor()
|
||||
_cur.execute("""
|
||||
SELECT subject, body, stage_signal
|
||||
FROM job_contacts
|
||||
WHERE stage_signal IS NOT NULL
|
||||
AND stage_signal != ''
|
||||
AND direction = 'inbound'
|
||||
ORDER BY received_at
|
||||
""")
|
||||
_rows = _cur.fetchall()
|
||||
_conn.close()
|
||||
|
||||
if not _rows:
|
||||
st.warning("No labeled emails in `job_contacts`. Run IMAP sync first.")
|
||||
else:
|
||||
_score_out.parent.mkdir(parents=True, exist_ok=True)
|
||||
_written, _skipped = 0, 0
|
||||
_label_counts: dict = {}
|
||||
with _score_out.open("w") as _f:
|
||||
for _subj, _body, _label in _rows:
|
||||
if _label not in _BC_LABELS:
|
||||
_skipped += 1
|
||||
continue
|
||||
import json as _json_dev
|
||||
_f.write(_json_dev.dumps({
|
||||
"subject": _subj or "",
|
||||
"body": (_body or "")[:800],
|
||||
"label": _label,
|
||||
}) + "\n")
|
||||
_written += 1
|
||||
_label_counts[_label] = _label_counts.get(_label, 0) + 1
|
||||
st.success(f"Exported **{_written}** emails → `data/email_score.jsonl` ({_skipped} skipped — unknown labels)")
|
||||
st.caption("Label distribution:")
|
||||
for _lbl, _cnt in sorted(_label_counts.items(), key=lambda x: -x[1]):
|
||||
st.caption(f" `{_lbl}`: {_cnt}")
|
||||
|
|
|
|||
|
|
@ -4,54 +4,33 @@ Tier definitions and feature gates for Peregrine.
|
|||
Tiers: free < paid < premium
|
||||
FEATURES maps feature key → minimum tier required.
|
||||
Features not in FEATURES are available to all tiers (free).
|
||||
|
||||
BYOK policy
|
||||
-----------
|
||||
Features in BYOK_UNLOCKABLE are gated only because CircuitForge would otherwise
|
||||
be providing the LLM compute. When a user has any configured LLM backend (local
|
||||
ollama/vllm or their own API key), those features unlock regardless of tier.
|
||||
Pass has_byok=has_configured_llm() to can_use() at call sites.
|
||||
|
||||
Features that stay gated even with BYOK:
|
||||
- Integrations (Notion sync, calendars, etc.) — infrastructure we run
|
||||
- llm_keywords_blocklist — orchestration pipeline over background keyword data
|
||||
- email_classifier — training pipeline, not a single LLM call
|
||||
- shared_cover_writer_model — our fine-tuned model weights
|
||||
- model_fine_tuning — GPU infrastructure
|
||||
- multi_user — account infrastructure
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
TIERS = ["free", "paid", "premium"]
|
||||
|
||||
# Maps feature key → minimum tier string required.
|
||||
# Features absent from this dict are free (available to all).
|
||||
FEATURES: dict[str, str] = {
|
||||
# Wizard LLM generation — BYOK-unlockable (pure LLM calls)
|
||||
# Wizard LLM generation
|
||||
"llm_career_summary": "paid",
|
||||
"llm_expand_bullets": "paid",
|
||||
"llm_suggest_skills": "paid",
|
||||
"llm_voice_guidelines": "premium",
|
||||
"llm_job_titles": "paid",
|
||||
"llm_keywords_blocklist": "paid",
|
||||
"llm_mission_notes": "paid",
|
||||
|
||||
# Orchestration — stays gated (background data pipeline, not just an LLM call)
|
||||
"llm_keywords_blocklist": "paid",
|
||||
|
||||
# App features — BYOK-unlockable (pure LLM calls over job/profile data)
|
||||
# App features
|
||||
"company_research": "paid",
|
||||
"interview_prep": "paid",
|
||||
"survey_assistant": "paid",
|
||||
|
||||
# Orchestration / infrastructure — stays gated
|
||||
"email_classifier": "paid",
|
||||
"survey_assistant": "paid",
|
||||
"model_fine_tuning": "premium",
|
||||
"shared_cover_writer_model": "paid",
|
||||
"multi_user": "premium",
|
||||
|
||||
# Integrations — stays gated (infrastructure CircuitForge operates)
|
||||
# Integrations (paid)
|
||||
"notion_sync": "paid",
|
||||
"google_sheets_sync": "paid",
|
||||
"airtable_sync": "paid",
|
||||
|
|
@ -60,71 +39,28 @@ FEATURES: dict[str, str] = {
|
|||
"slack_notifications": "paid",
|
||||
}
|
||||
|
||||
# Features that unlock when the user supplies any LLM backend (local or BYOK).
|
||||
# These are pure LLM-call features — the only reason they're behind a tier is
|
||||
# because CircuitForge would otherwise be providing the compute.
|
||||
BYOK_UNLOCKABLE: frozenset[str] = frozenset({
|
||||
"llm_career_summary",
|
||||
"llm_expand_bullets",
|
||||
"llm_suggest_skills",
|
||||
"llm_voice_guidelines",
|
||||
"llm_job_titles",
|
||||
"llm_mission_notes",
|
||||
"company_research",
|
||||
"interview_prep",
|
||||
"survey_assistant",
|
||||
})
|
||||
|
||||
# Free integrations (not in FEATURES):
|
||||
# google_drive_sync, dropbox_sync, onedrive_sync, mega_sync,
|
||||
# nextcloud_sync, discord_notifications, home_assistant
|
||||
|
||||
_LLM_CFG = Path(__file__).parent.parent.parent / "config" / "llm.yaml"
|
||||
|
||||
|
||||
def has_configured_llm(config_path: Path | None = None) -> bool:
|
||||
"""Return True if at least one non-vision LLM backend is enabled in llm.yaml.
|
||||
|
||||
Local backends (ollama, vllm) count — the policy is "you're providing the
|
||||
compute", whether that's your own hardware or your own API key.
|
||||
"""
|
||||
import yaml
|
||||
path = config_path or _LLM_CFG
|
||||
try:
|
||||
with open(path) as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
return any(
|
||||
b.get("enabled", True) and b.get("type") != "vision_service"
|
||||
for b in cfg.get("backends", {}).values()
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def can_use(tier: str, feature: str, has_byok: bool = False) -> bool:
|
||||
def can_use(tier: str, feature: str) -> bool:
|
||||
"""Return True if the given tier has access to the feature.
|
||||
|
||||
has_byok: pass has_configured_llm() to unlock BYOK_UNLOCKABLE features
|
||||
for users who supply their own LLM backend regardless of tier.
|
||||
|
||||
Returns True for unknown features (not gated).
|
||||
Returns False for unknown/invalid tier strings.
|
||||
"""
|
||||
required = FEATURES.get(feature)
|
||||
if required is None:
|
||||
return True # not gated — available to all
|
||||
if has_byok and feature in BYOK_UNLOCKABLE:
|
||||
return True
|
||||
try:
|
||||
return TIERS.index(tier) >= TIERS.index(required)
|
||||
except ValueError:
|
||||
return False # invalid tier string
|
||||
|
||||
|
||||
def tier_label(feature: str, has_byok: bool = False) -> str:
|
||||
"""Return a display label for a locked feature, or '' if free/unlocked."""
|
||||
if has_byok and feature in BYOK_UNLOCKABLE:
|
||||
return ""
|
||||
def tier_label(feature: str) -> str:
|
||||
"""Return a display label for a locked feature, or '' if free/unknown."""
|
||||
required = FEATURES.get(feature)
|
||||
if required is None:
|
||||
return ""
|
||||
|
|
|
|||
|
|
@ -1,52 +0,0 @@
|
|||
# compose.menagerie.yml — Public demo stack for menagerie.circuitforge.tech/peregrine
|
||||
#
|
||||
# Runs a fully isolated, neutered Peregrine instance:
|
||||
# - DEMO_MODE=true: blocks all LLM inference in llm_router.py
|
||||
# - demo/config/: pre-seeded demo user profile, all backends disabled
|
||||
# - demo/data/: isolated SQLite DB (no personal job data)
|
||||
# - No personal documents mounted
|
||||
# - Port 8503 (separate from the personal instance on 8502)
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f compose.menagerie.yml --project-name peregrine-demo up -d
|
||||
# docker compose -f compose.menagerie.yml --project-name peregrine-demo down
|
||||
#
|
||||
# Caddy menagerie.circuitforge.tech/peregrine* → host port 8504
|
||||
|
||||
services:
|
||||
|
||||
app:
|
||||
build: .
|
||||
ports:
|
||||
- "8504:8501"
|
||||
volumes:
|
||||
- ./demo/config:/app/config
|
||||
- ./demo/data:/app/data
|
||||
# No /docs mount — demo has no personal documents
|
||||
environment:
|
||||
- DEMO_MODE=true
|
||||
- STAGING_DB=/app/data/staging.db
|
||||
- DOCS_DIR=/tmp/demo-docs
|
||||
- STREAMLIT_SERVER_BASE_URL_PATH=peregrine
|
||||
- PYTHONUNBUFFERED=1
|
||||
- PYTHONLOGGING=WARNING
|
||||
# No API keys — inference is blocked by DEMO_MODE before any key is needed
|
||||
depends_on:
|
||||
searxng:
|
||||
condition: service_healthy
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: unless-stopped
|
||||
|
||||
searxng:
|
||||
image: searxng/searxng:latest
|
||||
volumes:
|
||||
- ./docker/searxng:/etc/searxng:ro
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
# No host port published — internal only; demo app uses it for job description enrichment
|
||||
# (non-AI scraping is allowed; only LLM inference is blocked)
|
||||
|
|
@ -19,7 +19,6 @@ services:
|
|||
- PEREGRINE_GPU_COUNT=${PEREGRINE_GPU_COUNT:-0}
|
||||
- PEREGRINE_GPU_NAMES=${PEREGRINE_GPU_NAMES:-}
|
||||
- RECOMMENDED_PROFILE=${RECOMMENDED_PROFILE:-remote}
|
||||
- STREAMLIT_SERVER_BASE_URL_PATH=${STREAMLIT_BASE_URL_PATH:-}
|
||||
- PYTHONUNBUFFERED=1
|
||||
- PYTHONLOGGING=WARNING
|
||||
depends_on:
|
||||
|
|
|
|||
|
|
@ -61,6 +61,6 @@ vision_fallback_order:
|
|||
- vision_service
|
||||
- claude_code
|
||||
- anthropic
|
||||
# Note: 'ollama' (alex-cover-writer) intentionally excluded — research
|
||||
# Note: 'ollama' (meghan-cover-writer) intentionally excluded — research
|
||||
# must never use the fine-tuned writer model, and this also avoids evicting
|
||||
# the writer from GPU memory while a cover letter task is in flight.
|
||||
|
|
|
|||
|
|
@ -1,14 +0,0 @@
|
|||
# config/server.yaml — Peregrine deployment / server settings
|
||||
# Copy to config/server.yaml and edit. Gitignored — do not commit.
|
||||
# Changes require restarting Peregrine to take effect (./manage.sh restart).
|
||||
|
||||
# base_url_path: URL prefix when serving Peregrine behind a reverse proxy.
|
||||
# Leave empty ("") for direct access (http://localhost:8502).
|
||||
# Set to "peregrine" when proxied at https://example.com/peregrine.
|
||||
# Maps to STREAMLIT_BASE_URL_PATH in .env → STREAMLIT_SERVER_BASE_URL_PATH
|
||||
# in the container. See: https://docs.streamlit.io/develop/api-reference/configuration/config.toml#server
|
||||
base_url_path: ""
|
||||
|
||||
# server_port: Port Streamlit listens on inside the container (usually 8501).
|
||||
# The external/host port is set via STREAMLIT_PORT in .env.
|
||||
server_port: 8501
|
||||
|
|
@ -20,14 +20,6 @@ mission_preferences:
|
|||
music: "" # e.g. "I've played in bands for 15 years and care deeply about how artists get paid"
|
||||
animal_welfare: "" # e.g. "I volunteer at my local shelter every weekend"
|
||||
education: "" # e.g. "I tutored underserved kids for 3 years and care deeply about literacy"
|
||||
social_impact: "" # e.g. "I want my work to reach people who need help most"
|
||||
health: "" # e.g. "I care about people navigating rare or poorly-understood health conditions"
|
||||
# Note: if left empty, Para 3 defaults to focusing on the people the company
|
||||
# serves — not the industry. Fill in for a more personal connection.
|
||||
|
||||
# Optional: how you write and communicate. Used to shape cover letter voice.
|
||||
# e.g. "Warm and direct. Cares about people first. Finds rare and complex situations fascinating."
|
||||
candidate_voice: ""
|
||||
|
||||
# Set to true to include optional identity-related sections in research briefs.
|
||||
# Both are for your personal decision-making only — never included in applications.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
{"subject": "Interview Invitation — Senior Engineer", "body": "Hi Alex, we'd love to schedule a 30-min phone screen. Are you available Thursday at 2pm? Please reply to confirm.", "label": "interview_scheduled"}
|
||||
{"subject": "Interview Invitation — Senior Engineer", "body": "Hi Meghan, we'd love to schedule a 30-min phone screen. Are you available Thursday at 2pm? Please reply to confirm.", "label": "interview_scheduled"}
|
||||
{"subject": "Your application to Acme Corp", "body": "Thank you for your interest in the Senior Engineer role. After careful consideration, we have decided to move forward with other candidates whose experience more closely matches our current needs.", "label": "rejected"}
|
||||
{"subject": "Offer Letter — Product Manager at Initech", "body": "Dear Alex, we are thrilled to extend an offer of employment for the Product Manager position. Please find the attached offer letter outlining compensation and start date.", "label": "offer_received"}
|
||||
{"subject": "Quick question about your background", "body": "Hi Alex, I came across your profile and would love to connect. We have a few roles that seem like a great match. Would you be open to a brief chat this week?", "label": "positive_response"}
|
||||
{"subject": "Company Culture Survey — Acme Corp", "body": "Alex, as part of our evaluation process, we invite all candidates to complete our culture fit assessment. The survey takes approximately 15 minutes. Please click the link below.", "label": "survey_received"}
|
||||
{"subject": "Offer Letter — Product Manager at Initech", "body": "Dear Meghan, we are thrilled to extend an offer of employment for the Product Manager position. Please find the attached offer letter outlining compensation and start date.", "label": "offer_received"}
|
||||
{"subject": "Quick question about your background", "body": "Hi Meghan, I came across your profile and would love to connect. We have a few roles that seem like a great match. Would you be open to a brief chat this week?", "label": "positive_response"}
|
||||
{"subject": "Company Culture Survey — Acme Corp", "body": "Meghan, as part of our evaluation process, we invite all candidates to complete our culture fit assessment. The survey takes approximately 15 minutes. Please click the link below.", "label": "survey_received"}
|
||||
{"subject": "Application Received — DataCo", "body": "Thank you for submitting your application for the Data Engineer role at DataCo. We have received your materials and will be in touch if your qualifications match our needs.", "label": "neutral"}
|
||||
{"subject": "Following up on your application", "body": "Hi Alex, I wanted to follow up on your recent application. Your background looks interesting and we'd like to learn more. Can we set up a quick call?", "label": "positive_response"}
|
||||
{"subject": "We're moving forward with other candidates", "body": "Dear Alex, thank you for taking the time to interview with us. After thoughtful consideration, we have decided not to move forward with your candidacy at this time.", "label": "rejected"}
|
||||
{"subject": "Following up on your application", "body": "Hi Meghan, I wanted to follow up on your recent application. Your background looks interesting and we'd like to learn more. Can we set up a quick call?", "label": "positive_response"}
|
||||
{"subject": "We're moving forward with other candidates", "body": "Dear Meghan, thank you for taking the time to interview with us. After thoughtful consideration, we have decided not to move forward with your candidacy at this time.", "label": "rejected"}
|
||||
|
|
|
|||
|
|
@ -1,68 +0,0 @@
|
|||
# Demo LLM config — all backends disabled.
|
||||
# DEMO_MODE=true in the environment blocks the router before any backend is tried,
|
||||
# so these values are never actually used. Kept for schema completeness.
|
||||
backends:
|
||||
anthropic:
|
||||
api_key_env: ANTHROPIC_API_KEY
|
||||
enabled: false
|
||||
model: claude-sonnet-4-6
|
||||
supports_images: true
|
||||
type: anthropic
|
||||
claude_code:
|
||||
api_key: any
|
||||
base_url: http://localhost:3009/v1
|
||||
enabled: false
|
||||
model: claude-code-terminal
|
||||
supports_images: true
|
||||
type: openai_compat
|
||||
github_copilot:
|
||||
api_key: any
|
||||
base_url: http://localhost:3010/v1
|
||||
enabled: false
|
||||
model: gpt-4o
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
ollama:
|
||||
api_key: ollama
|
||||
base_url: http://localhost:11434/v1
|
||||
enabled: false
|
||||
model: llama3.2:3b
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
ollama_research:
|
||||
api_key: ollama
|
||||
base_url: http://localhost:11434/v1
|
||||
enabled: false
|
||||
model: llama3.2:3b
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
vision_service:
|
||||
base_url: http://localhost:8002
|
||||
enabled: false
|
||||
supports_images: true
|
||||
type: vision_service
|
||||
vllm:
|
||||
api_key: ''
|
||||
base_url: http://localhost:8000/v1
|
||||
enabled: false
|
||||
model: __auto__
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
vllm_research:
|
||||
api_key: ''
|
||||
base_url: http://localhost:8000/v1
|
||||
enabled: false
|
||||
model: __auto__
|
||||
supports_images: false
|
||||
type: openai_compat
|
||||
fallback_order:
|
||||
- ollama
|
||||
- vllm
|
||||
- anthropic
|
||||
research_fallback_order:
|
||||
- vllm_research
|
||||
- ollama_research
|
||||
- anthropic
|
||||
vision_fallback_order:
|
||||
- vision_service
|
||||
- anthropic
|
||||
|
|
@ -1,44 +0,0 @@
|
|||
candidate_accessibility_focus: false
|
||||
candidate_lgbtq_focus: false
|
||||
candidate_voice: Clear, direct, and human. Focuses on impact over jargon.
|
||||
career_summary: 'Experienced software engineer with a background in full-stack development,
|
||||
cloud infrastructure, and data pipelines. Passionate about building tools that help
|
||||
people navigate complex systems.
|
||||
|
||||
'
|
||||
dev_tier_override: null
|
||||
dismissed_banners:
|
||||
- connect_cloud
|
||||
- setup_email
|
||||
docs_dir: /docs
|
||||
email: demo@circuitforge.tech
|
||||
inference_profile: remote
|
||||
linkedin: ''
|
||||
mission_preferences:
|
||||
animal_welfare: ''
|
||||
education: ''
|
||||
health: ''
|
||||
music: ''
|
||||
social_impact: Want my work to reach people who need it most.
|
||||
name: Demo User
|
||||
nda_companies: []
|
||||
ollama_models_dir: ~/models/ollama
|
||||
phone: ''
|
||||
services:
|
||||
ollama_host: localhost
|
||||
ollama_port: 11434
|
||||
ollama_ssl: false
|
||||
ollama_ssl_verify: true
|
||||
searxng_host: searxng
|
||||
searxng_port: 8080
|
||||
searxng_ssl: false
|
||||
searxng_ssl_verify: true
|
||||
streamlit_port: 8501
|
||||
vllm_host: localhost
|
||||
vllm_port: 8000
|
||||
vllm_ssl: false
|
||||
vllm_ssl_verify: true
|
||||
tier: free
|
||||
vllm_models_dir: ~/models/vllm
|
||||
wizard_complete: true
|
||||
wizard_step: 0
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
# Job Seeker Platform — Design Document
|
||||
**Date:** 2026-02-20
|
||||
**Status:** Approved
|
||||
**Candidate:** Alex Rivera
|
||||
**Candidate:** Meghan McCann
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ JobSpy (LinkedIn / Indeed / Glassdoor / ZipRecruiter)
|
|||
Notion DB (daily review — decide what to pursue)
|
||||
└─▶ match.py <notion-page-url>
|
||||
├─ fetch job description from listing URL
|
||||
├─ run Resume Matcher vs. /Library/Documents/JobSearch/Alex_Rivera_Resume_02-19-2025.pdf
|
||||
├─ run Resume Matcher vs. /Library/Documents/JobSearch/Meghan_McCann_Resume_02-19-2025.pdf
|
||||
└─▶ write Match Score + Keyword Gaps back to Notion page
|
||||
|
||||
AIHawk (when ready to apply)
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Stand up a job discovery pipeline (JobSpy → Notion) with LLM routing, resume matching, and automated LinkedIn application support for Alex Rivera.
|
||||
**Goal:** Stand up a job discovery pipeline (JobSpy → Notion) with LLM routing, resume matching, and automated LinkedIn application support for Meghan McCann.
|
||||
|
||||
**Architecture:** JobSpy scrapes listings from multiple boards and pushes deduplicated results into a Notion database. A local LLM router with 5-backend fallback chain powers AIHawk's application answer generation. Resume Matcher scores each listing against Alex's resume and writes keyword gaps back to Notion.
|
||||
**Architecture:** JobSpy scrapes listings from multiple boards and pushes deduplicated results into a Notion database. A local LLM router with 5-backend fallback chain powers AIHawk's application answer generation. Resume Matcher scores each listing against Meghan's resume and writes keyword gaps back to Notion.
|
||||
|
||||
**Tech Stack:** Python 3.12, conda env `job-seeker`, `python-jobspy`, `notion-client`, `openai` SDK, `anthropic` SDK, `pyyaml`, `pandas`, Resume-Matcher (cloned), Auto_Jobs_Applier_AIHawk (cloned), pytest, pytest-mock
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ This task creates the Notion DB that all scripts write to. Do it once manually.
|
|||
|
||||
**Step 1: Open Notion and create a new database**
|
||||
|
||||
Create a full-page database called **"Alex's Job Search"** in whatever Notion workspace you use for tracking.
|
||||
Create a full-page database called **"Meghan's Job Search"** in whatever Notion workspace you use for tracking.
|
||||
|
||||
**Step 2: Add the required properties**
|
||||
|
||||
|
|
@ -256,7 +256,7 @@ print('Connected to:', db['title'][0]['plain_text'])
|
|||
"
|
||||
```
|
||||
|
||||
Expected: `Connected to: Alex's Job Search`
|
||||
Expected: `Connected to: Meghan's Job Search`
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -752,7 +752,7 @@ Stop it with Ctrl+C — we'll run it on-demand.
|
|||
|
||||
The ATS-clean resume to use with Resume Matcher:
|
||||
```
|
||||
/Library/Documents/JobSearch/Alex_Rivera_Resume_02-19-2025.pdf
|
||||
/Library/Documents/JobSearch/Meghan_McCann_Resume_02-19-2025.pdf
|
||||
```
|
||||
|
||||
---
|
||||
|
|
@ -824,7 +824,7 @@ Expected: `ImportError` — `scripts.match` doesn't exist.
|
|||
```python
|
||||
# scripts/match.py
|
||||
"""
|
||||
Resume Matcher integration: score a Notion job listing against Alex's resume.
|
||||
Resume Matcher integration: score a Notion job listing against Meghan's resume.
|
||||
Writes Match Score and Keyword Gaps back to the Notion page.
|
||||
|
||||
Usage:
|
||||
|
|
@ -840,7 +840,7 @@ from bs4 import BeautifulSoup
|
|||
from notion_client import Client
|
||||
|
||||
CONFIG_DIR = Path(__file__).parent.parent / "config"
|
||||
RESUME_PATH = Path("/Library/Documents/JobSearch/Alex_Rivera_Resume_02-19-2025.pdf")
|
||||
RESUME_PATH = Path("/Library/Documents/JobSearch/Meghan_McCann_Resume_02-19-2025.pdf")
|
||||
|
||||
|
||||
def load_notion() -> tuple[Client, str]:
|
||||
|
|
@ -999,7 +999,7 @@ cp /devl/job-seeker/aihawk/data_folder/plain_text_resume.yaml \
|
|||
/devl/job-seeker/aihawk/data_folder/plain_text_resume.yaml.bak
|
||||
```
|
||||
|
||||
Edit `/devl/job-seeker/aihawk/data_folder/plain_text_resume.yaml` with Alex's info.
|
||||
Edit `/devl/job-seeker/aihawk/data_folder/plain_text_resume.yaml` with Meghan's info.
|
||||
Key fields to fill:
|
||||
- `personal_information`: name, email, phone, linkedin, github (leave blank), location
|
||||
- `work_experience`: pull from the SVG content already extracted
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
## Overview
|
||||
|
||||
A Streamlit multi-page web UI that gives Alex (and her partner) a friendly interface to review scraped job listings, curate them before they hit Notion, edit search/LLM/Notion settings, and fill out her AIHawk application profile. Designed to be usable by anyone — no technical knowledge required.
|
||||
A Streamlit multi-page web UI that gives Meghan (and her partner) a friendly interface to review scraped job listings, curate them before they hit Notion, edit search/LLM/Notion settings, and fill out her AIHawk application profile. Designed to be usable by anyone — no technical knowledge required.
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Build a Streamlit web UI with SQLite staging so Alex can review scraped jobs, approve/batch-sync to Notion, edit settings, and complete her AIHawk profile.
|
||||
**Goal:** Build a Streamlit web UI with SQLite staging so Meghan can review scraped jobs, approve/batch-sync to Notion, edit settings, and complete her AIHawk profile.
|
||||
|
||||
**Architecture:** `discover.py` writes to a local SQLite `staging.db` instead of Notion directly. Streamlit pages read/write SQLite for job review, YAML files for settings and resume. A new `sync.py` pushes approved jobs to Notion on demand.
|
||||
|
||||
|
|
@ -788,7 +788,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
|
|||
from scripts.db import DEFAULT_DB, init_db, get_job_counts
|
||||
|
||||
st.set_page_config(
|
||||
page_title="Alex's Job Search",
|
||||
page_title="Meghan's Job Search",
|
||||
page_icon="🔍",
|
||||
layout="wide",
|
||||
)
|
||||
|
|
@ -796,7 +796,7 @@ st.set_page_config(
|
|||
init_db(DEFAULT_DB)
|
||||
counts = get_job_counts(DEFAULT_DB)
|
||||
|
||||
st.title("🔍 Alex's Job Search")
|
||||
st.title("🔍 Meghan's Job Search")
|
||||
st.caption("Discover → Review → Sync to Notion")
|
||||
|
||||
st.divider()
|
||||
|
|
@ -877,7 +877,7 @@ st.info("Coming soon — Task 7")
|
|||
```bash
|
||||
conda run -n job-seeker streamlit run /devl/job-seeker/app/Home.py --server.headless true &
|
||||
sleep 4
|
||||
curl -s http://localhost:8501 | grep -q "Alex" && echo "OK" || echo "FAIL"
|
||||
curl -s http://localhost:8501 | grep -q "Meghan" && echo "OK" || echo "FAIL"
|
||||
kill %1
|
||||
```
|
||||
|
||||
|
|
@ -1215,7 +1215,7 @@ git commit -m "feat: add Settings page with search, LLM, and Notion tabs"
|
|||
```python
|
||||
# app/pages/3_Resume_Editor.py
|
||||
"""
|
||||
Resume Editor — form-based editor for Alex's AIHawk profile YAML.
|
||||
Resume Editor — form-based editor for Meghan's AIHawk profile YAML.
|
||||
FILL_IN fields highlighted in amber.
|
||||
"""
|
||||
import sys
|
||||
|
|
@ -1227,7 +1227,7 @@ import yaml
|
|||
|
||||
st.set_page_config(page_title="Resume Editor", page_icon="📝", layout="wide")
|
||||
st.title("📝 Resume Editor")
|
||||
st.caption("Edit Alex's application profile used by AIHawk for LinkedIn Easy Apply.")
|
||||
st.caption("Edit Meghan's application profile used by AIHawk for LinkedIn Easy Apply.")
|
||||
|
||||
RESUME_PATH = Path(__file__).parent.parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
||||
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ def test_classify_stage_signal_interview(tmp_path):
|
|||
mock_router.complete.return_value = "interview_scheduled"
|
||||
result = classify_stage_signal(
|
||||
"Let's schedule a call",
|
||||
"Hi Alex, we'd love to book a 30-min phone screen with you.",
|
||||
"Hi Meghan, we'd love to book a 30-min phone screen with you.",
|
||||
)
|
||||
assert result == "interview_scheduled"
|
||||
|
||||
|
|
@ -362,11 +362,11 @@ def test_sync_job_emails_classifies_inbound(tmp_path):
|
|||
|
||||
fake_msg_bytes = (
|
||||
b"From: recruiter@acme.com\r\n"
|
||||
b"To: alex@example.com\r\n"
|
||||
b"To: meghan@example.com\r\n"
|
||||
b"Subject: Interview Invitation\r\n"
|
||||
b"Message-ID: <unique-001@acme.com>\r\n"
|
||||
b"\r\n"
|
||||
b"Hi Alex, we'd like to schedule a phone screen."
|
||||
b"Hi Meghan, we'd like to schedule a phone screen."
|
||||
)
|
||||
|
||||
conn_mock = MagicMock()
|
||||
|
|
@ -465,7 +465,7 @@ def test_extract_lead_info_returns_company_and_title():
|
|||
from scripts.imap_sync import extract_lead_info
|
||||
with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
|
||||
mock_router.complete.return_value = '{"company": "Wiz", "title": "Senior TAM"}'
|
||||
result = extract_lead_info("Senior TAM at Wiz", "Hi Alex, we have a role…", "recruiter@wiz.com")
|
||||
result = extract_lead_info("Senior TAM at Wiz", "Hi Meghan, we have a role…", "recruiter@wiz.com")
|
||||
assert result == ("Wiz", "Senior TAM")
|
||||
|
||||
|
||||
|
|
@ -945,7 +945,7 @@ def test_get_email_leads(tmp_path):
|
|||
insert_job(db_path, {
|
||||
"title": "TAM", "company": "Wiz", "url": "email://wiz.com/abc123",
|
||||
"source": "email", "location": "", "is_remote": 0,
|
||||
"salary": "", "description": "Hi Alex…", "date_found": "2026-02-21",
|
||||
"salary": "", "description": "Hi Meghan…", "date_found": "2026-02-21",
|
||||
})
|
||||
leads = get_email_leads(db_path)
|
||||
assert len(leads) == 1
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
## Problem
|
||||
|
||||
The current `company_research.py` produces shallow output:
|
||||
- Resume context is a hardcoded 2-sentence blurb — talking points aren't grounded in Alex's actual experience
|
||||
- Resume context is a hardcoded 2-sentence blurb — talking points aren't grounded in Meghan's actual experience
|
||||
- Search coverage is limited: CEO, HQ, LinkedIn, one generic news query
|
||||
- Output has 4 sections; new data categories (tech stack, funding, culture, competitors) have nowhere to go
|
||||
- No skills/keyword config to drive experience matching against the JD
|
||||
|
|
@ -91,10 +91,10 @@ keywords:
|
|||
## Job Description
|
||||
{JD text, up to 2500 chars}
|
||||
|
||||
## Alex's Matched Experience
|
||||
## Meghan's Matched Experience
|
||||
[Top 2 scored experience entries — full detail]
|
||||
|
||||
Also in Alex's background: [remaining entries as one-liners]
|
||||
Also in Meghan's background: [remaining entries as one-liners]
|
||||
|
||||
## Matched Skills & Keywords
|
||||
Skills matching this JD: {matched_keywords joined}
|
||||
|
|
@ -132,7 +132,7 @@ Skills matching this JD: {matched_keywords joined}
|
|||
| `## Funding & Market Position` | Stage, investors, recent rounds, competitor landscape |
|
||||
| `## Recent Developments` | News, launches, pivots, exec moves |
|
||||
| `## Red Flags & Watch-outs` | Culture issues, layoffs, exec departures, financial stress |
|
||||
| `## Talking Points for Alex` | 5 role-matched, resume-grounded, UpGuard-aware talking points ready to speak aloud |
|
||||
| `## Talking Points for Meghan` | 5 role-matched, resume-grounded, UpGuard-aware talking points ready to speak aloud |
|
||||
|
||||
Talking points prompt instructs LLM to: cite the specific matched experience by name, reference matched skills, apply UpGuard NDA rule, frame each as a ready-to-speak sentence.
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Expand company research to gather richer web data (funding, tech stack, competitors, culture/Glassdoor, news), match Alex's resume experience against the JD, and produce a 7-section brief with role-grounded talking points.
|
||||
**Goal:** Expand company research to gather richer web data (funding, tech stack, competitors, culture/Glassdoor, news), match Meghan's resume experience against the JD, and produce a 7-section brief with role-grounded talking points.
|
||||
|
||||
**Architecture:** Parallel SearXNG JSON queries (6 types) feed a structured context block alongside tiered resume experience (top-2 scored full, rest condensed) from `config/resume_keywords.yaml`. Single LLM call produces 7 output sections stored in expanded DB columns.
|
||||
|
||||
|
|
@ -197,7 +197,7 @@ cp config/resume_keywords.yaml config/resume_keywords.yaml.example
|
|||
|
||||
**Step 3: Add to `.gitignore` if personal, or commit both**
|
||||
|
||||
`resume_keywords.yaml` contains Alex's personal keywords — commit both (no secrets).
|
||||
`resume_keywords.yaml` contains Meghan's personal keywords — commit both (no secrets).
|
||||
|
||||
**Step 4: Commit**
|
||||
|
||||
|
|
@ -275,7 +275,7 @@ def test_build_resume_context_top2_full_rest_condensed():
|
|||
assert "Lead Technical Account Manager" in ctx
|
||||
assert "Managed enterprise security accounts" in ctx
|
||||
# Condensed for rest
|
||||
assert "Also in Alex" in ctx
|
||||
assert "Also in Meghan" in ctx
|
||||
assert "Generic Co" in ctx
|
||||
# UpGuard NDA note present
|
||||
assert "NDA" in ctx or "enterprise security vendor" in ctx
|
||||
|
|
@ -297,7 +297,7 @@ Add to `scripts/company_research.py`, after the `_parse_sections` function:
|
|||
_RESUME_YAML = Path(__file__).parent.parent / "aihawk" / "data_folder" / "plain_text_resume.yaml"
|
||||
_KEYWORDS_YAML = Path(__file__).parent.parent / "config" / "resume_keywords.yaml"
|
||||
|
||||
# Companies where Alex has an NDA — reference engagement but not specifics
|
||||
# Companies where Meghan has an NDA — reference engagement but not specifics
|
||||
# unless the role is a strong security/compliance match (score >= 3 on JD).
|
||||
_NDA_COMPANIES = {"upguard"}
|
||||
|
||||
|
|
@ -353,14 +353,14 @@ def _build_resume_context(resume: dict, keywords: list[str], jd: str) -> str:
|
|||
bullets.extend(resp.values())
|
||||
return "\n".join(f" - {b}" for b in bullets)
|
||||
|
||||
lines = ["## Alex's Matched Experience"]
|
||||
lines = ["## Meghan's Matched Experience"]
|
||||
for exp in top2:
|
||||
lines.append(f"\n**{_exp_label(exp)}** (match score: {exp['score']})")
|
||||
lines.append(_exp_bullets(exp))
|
||||
|
||||
if rest:
|
||||
condensed = ", ".join(_exp_label(e) for e in rest)
|
||||
lines.append(f"\nAlso in Alex's background: {condensed}")
|
||||
lines.append(f"\nAlso in Meghan's background: {condensed}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
|
@ -554,7 +554,7 @@ At the top of `research_company()`, after `jd_excerpt`, add:
|
|||
Replace the existing `prompt = f"""..."""` block with:
|
||||
|
||||
```python
|
||||
prompt = f"""You are preparing Alex Rivera for a job interview.
|
||||
prompt = f"""You are preparing Meghan McCann for a job interview.
|
||||
|
||||
Role: **{title}** at **{company}**
|
||||
|
||||
|
|
@ -591,9 +591,9 @@ Draw on the live snippets above; if none available, note what is publicly known.
|
|||
Culture issues, layoffs, exec departures, financial stress, or Glassdoor concerns worth knowing before the call.
|
||||
If nothing notable, write "No significant red flags identified."
|
||||
|
||||
## Talking Points for Alex
|
||||
## Talking Points for Meghan
|
||||
Five specific talking points for the phone screen. Each must:
|
||||
- Reference a concrete experience from Alex's matched background by name
|
||||
- Reference a concrete experience from Meghan's matched background by name
|
||||
(UpGuard NDA rule: say "enterprise security vendor" unless role has clear security focus)
|
||||
- Connect to a specific signal from the JD or company context above
|
||||
- Be 1–2 sentences, ready to speak aloud
|
||||
|
|
@ -615,7 +615,7 @@ Replace the existing return block:
|
|||
"ceo_brief": sections.get("Leadership & Culture", ""),
|
||||
"tech_brief": sections.get("Tech Stack & Product", ""),
|
||||
"funding_brief": sections.get("Funding & Market Position", ""),
|
||||
"talking_points": sections.get("Talking Points for Alex", ""),
|
||||
"talking_points": sections.get("Talking Points for Meghan", ""),
|
||||
# Recent Developments and Red Flags stored in raw_output; rendered from there
|
||||
# (avoids adding more columns right now — can migrate later if needed)
|
||||
}
|
||||
|
|
@ -632,7 +632,7 @@ Wait — `Recent Developments` and `Red Flags` aren't in the return dict above.
|
|||
"funding_brief": sections.get("Funding & Market Position", ""),
|
||||
"competitors_brief": sections.get("Funding & Market Position", ""), # same section
|
||||
"red_flags": sections.get("Red Flags & Watch-outs", ""),
|
||||
"talking_points": sections.get("Talking Points for Alex", ""),
|
||||
"talking_points": sections.get("Talking Points for Meghan", ""),
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -769,7 +769,7 @@ Append at the end of the file:
|
|||
with tab_skills:
|
||||
st.subheader("🏷️ Skills & Keywords")
|
||||
st.caption(
|
||||
"These are matched against job descriptions to select Alex's most relevant "
|
||||
"These are matched against job descriptions to select Meghan's most relevant "
|
||||
"experience and highlight keyword overlap in the research brief."
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -486,7 +486,7 @@ backends:
|
|||
api_key: ollama
|
||||
base_url: http://localhost:11434/v1
|
||||
enabled: true
|
||||
model: alex-cover-writer:latest
|
||||
model: meghan-cover-writer:latest
|
||||
type: openai_compat
|
||||
supports_images: false
|
||||
ollama_research:
|
||||
|
|
@ -524,7 +524,7 @@ vision_fallback_order:
|
|||
- vision_service
|
||||
- claude_code
|
||||
- anthropic
|
||||
# Note: 'ollama' (alex-cover-writer) intentionally excluded — research
|
||||
# Note: 'ollama' (meghan-cover-writer) intentionally excluded — research
|
||||
# must never use the fine-tuned writer model.
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ location_map:
|
|||
|
||||
**Step 2: Create `config/craigslist.yaml`** (personal config — gitignored)
|
||||
|
||||
Copy `.example` as-is (Alex targets sfbay + remote, so this default is correct).
|
||||
Copy `.example` as-is (Meghan targets sfbay + remote, so this default is correct).
|
||||
|
||||
**Step 3: Add to `.gitignore`**
|
||||
|
||||
|
|
|
|||
|
|
@ -43,20 +43,20 @@ Everything that must be extracted into `config/user.yaml` via a `UserProfile` cl
|
|||
|
||||
| File | Hardcoded value | Generalized as |
|
||||
|------|----------------|----------------|
|
||||
| `company_research.py` | `"Alex Rivera"` in prompts | `profile.name` |
|
||||
| `company_research.py` | `"Meghan McCann"` in prompts | `profile.name` |
|
||||
| `company_research.py` | `_NDA_COMPANIES = {"upguard"}` | `profile.nda_companies` |
|
||||
| `company_research.py` | `_SCRAPER_DIR = Path("/Library/...")` | bundled in Docker image |
|
||||
| `generate_cover_letter.py` | `SYSTEM_CONTEXT` with Alex's bio | `profile.career_summary` |
|
||||
| `generate_cover_letter.py` | `SYSTEM_CONTEXT` with Meghan's bio | `profile.career_summary` |
|
||||
| `generate_cover_letter.py` | `LETTERS_DIR = Path("/Library/...")` | `profile.docs_dir` |
|
||||
| `4_Apply.py` | contact block (name/email/phone) | `profile.*` |
|
||||
| `4_Apply.py` | `DOCS_DIR = Path("/Library/...")` | `profile.docs_dir` |
|
||||
| `5_Interviews.py` | email assistant persona "Alex Rivera is a Customer Success..." | `profile.name + profile.career_summary` |
|
||||
| `6_Interview_Prep.py` | `"Alex"` in interviewer prompts | `profile.name` |
|
||||
| `5_Interviews.py` | email assistant persona "Meghan McCann is a Customer Success..." | `profile.name + profile.career_summary` |
|
||||
| `6_Interview_Prep.py` | `"Meghan"` in interviewer prompts | `profile.name` |
|
||||
| `7_Survey.py` | `_SURVEY_SYSTEM` — "The candidate values collaborative teamwork..." | `profile.career_summary` or survey persona field |
|
||||
| `scripts/vision_service/main.py` | `model_id = "vikhyatk/moondream2"`, `revision = "2025-01-09"` | `config/llm.yaml` vision_service block |
|
||||
| `match.py` | `RESUME_PATH = Path("/Library/...Alex_Rivera_Resume...")` | configurable in Settings |
|
||||
| `Home.py` | `"Alex's Job Search"` | `f"{profile.name}'s Job Search"` |
|
||||
| `finetune_local.py` | all `/Library/` paths + `"alex-cover-writer"` | `profile.*` |
|
||||
| `match.py` | `RESUME_PATH = Path("/Library/...Meghan_McCann_Resume...")` | configurable in Settings |
|
||||
| `Home.py` | `"Meghan's Job Search"` | `f"{profile.name}'s Job Search"` |
|
||||
| `finetune_local.py` | all `/Library/` paths + `"meghan-cover-writer"` | `profile.*` |
|
||||
| `2_Settings.py` | `PFP_DIR`, host service paths (manage-services.sh etc.) | removed / compose-driven |
|
||||
| `config/llm.yaml` | hard-coded `base_url` values | auto-generated from `user.yaml` |
|
||||
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ auto-generates `config/llm.yaml` base URLs from service config, redirects to Hom
|
|||
|
||||
### New: My Profile tab
|
||||
Editable form for all `user.yaml` fields post-setup. Saving regenerates `config/llm.yaml`
|
||||
base URLs automatically. Replaces scattered "Alex's" references in existing tab captions.
|
||||
base URLs automatically. Replaces scattered "Meghan's" references in existing tab captions.
|
||||
|
||||
### Updated: Services tab
|
||||
- Reads port/host from `profile.services.*` instead of hard-coded values
|
||||
|
|
@ -173,21 +173,21 @@ personal data is currently hard-coded.
|
|||
|
||||
| Location | Current | Generalized |
|
||||
|---|---|---|
|
||||
| `company_research.py` prompts | `"Alex Rivera"` | `profile.name` |
|
||||
| `company_research.py` prompts | `"Meghan McCann"` | `profile.name` |
|
||||
| `company_research.py` | `_NDA_COMPANIES = {"upguard"}` | `profile.nda_companies` |
|
||||
| `company_research.py` | `_SCRAPER_DIR = Path("/Library/...")` | bundled in container |
|
||||
| `generate_cover_letter.py` | `SYSTEM_CONTEXT` with Alex's bio | `profile.career_summary` |
|
||||
| `generate_cover_letter.py` | `SYSTEM_CONTEXT` with Meghan's bio | `profile.career_summary` |
|
||||
| `generate_cover_letter.py` | `LETTERS_DIR = Path("/Library/...")` | `profile.docs_dir` |
|
||||
| `generate_cover_letter.py` | `_MISSION_SIGNALS` / `_MISSION_NOTES` (hardcoded) | `profile.mission_industries` list; First-Run Wizard step |
|
||||
| `4_Apply.py` | contact block with name/email/phone | `profile.*` |
|
||||
| `4_Apply.py` | `DOCS_DIR = Path("/Library/...")` | `profile.docs_dir` |
|
||||
| `5_Interviews.py` email assistant | `"Alex Rivera is a Customer Success..."` | `profile.name + profile.career_summary` |
|
||||
| `6_Interview_Prep.py` | `"Alex"` in interviewer prompts | `profile.name` |
|
||||
| `5_Interviews.py` email assistant | `"Meghan McCann is a Customer Success..."` | `profile.name + profile.career_summary` |
|
||||
| `6_Interview_Prep.py` | `"Meghan"` in interviewer prompts | `profile.name` |
|
||||
| `7_Survey.py` `_SURVEY_SYSTEM` | "The candidate values collaborative teamwork, clear communication, growth, and impact." | `profile.career_summary` or user-editable survey persona field |
|
||||
| `scripts/vision_service/main.py` | `model_id = "vikhyatk/moondream2"`, `revision = "2025-01-09"` | configurable in `config/llm.yaml` vision_service block |
|
||||
| `match.py` | `RESUME_PATH = Path("/Library/...Alex_Rivera_Resume...")` | configurable in Settings |
|
||||
| `Home.py` | `"Alex's Job Search"` | `f"{profile.name}'s Job Search"` |
|
||||
| `finetune_local.py` | all `/Library/` paths + `"alex-cover-writer"` | `profile.*` |
|
||||
| `match.py` | `RESUME_PATH = Path("/Library/...Meghan_McCann_Resume...")` | configurable in Settings |
|
||||
| `Home.py` | `"Meghan's Job Search"` | `f"{profile.name}'s Job Search"` |
|
||||
| `finetune_local.py` | all `/Library/` paths + `"meghan-cover-writer"` | `profile.*` |
|
||||
| `2_Settings.py` | `PFP_DIR`, hard-coded service paths | removed / compose-driven |
|
||||
| `config/llm.yaml` | hard-coded `base_url` values | auto-generated from `user.yaml` |
|
||||
| `config/search_profiles.yaml` | `mission_tags` on profiles (implicit) | `profile.mission_industries` drives profile generation in wizard |
|
||||
|
|
|
|||
|
|
@ -388,7 +388,7 @@ def _searxng_running(searxng_url: str = "http://localhost:8888") -> bool:
|
|||
return False
|
||||
```
|
||||
|
||||
Replace all `"Alex Rivera"` / `"Alex's"` / `_NDA_COMPANIES` references:
|
||||
Replace all `"Meghan McCann"` / `"Meghan's"` / `_NDA_COMPANIES` references:
|
||||
```python
|
||||
# At top of research_company():
|
||||
from scripts.user_profile import UserProfile
|
||||
|
|
@ -404,13 +404,13 @@ def _company_label(exp: dict) -> str:
|
|||
return _profile.nda_label(company, score)
|
||||
return company
|
||||
|
||||
# Replace "## Alex's Matched Experience":
|
||||
# Replace "## Meghan's Matched Experience":
|
||||
lines = [f"## {_profile.name if _profile else 'Candidate'}'s Matched Experience"]
|
||||
|
||||
# In research_company() prompt, replace "Alex Rivera":
|
||||
# In research_company() prompt, replace "Meghan McCann":
|
||||
name = _profile.name if _profile else "the candidate"
|
||||
summary = _profile.career_summary if _profile else ""
|
||||
# Replace "You are preparing Alex Rivera for a job interview." with:
|
||||
# Replace "You are preparing Meghan McCann for a job interview." with:
|
||||
prompt = f"""You are preparing {name} for a job interview.\n{summary}\n..."""
|
||||
```
|
||||
|
||||
|
|
@ -419,7 +419,7 @@ prompt = f"""You are preparing {name} for a job interview.\n{summary}\n..."""
|
|||
Replace:
|
||||
```python
|
||||
LETTERS_DIR = Path("/Library/Documents/JobSearch")
|
||||
SYSTEM_CONTEXT = """You are writing cover letters for Alex Rivera..."""
|
||||
SYSTEM_CONTEXT = """You are writing cover letters for Meghan McCann..."""
|
||||
```
|
||||
|
||||
With:
|
||||
|
|
@ -517,9 +517,9 @@ _name = _profile.name if _profile else "Job Seeker"
|
|||
|
||||
Replace:
|
||||
```python
|
||||
st.title("🔍 Alex's Job Search")
|
||||
st.title("🔍 Meghan's Job Search")
|
||||
# and:
|
||||
st.caption(f"Run TF-IDF match scoring against Alex's resume...")
|
||||
st.caption(f"Run TF-IDF match scoring against Meghan's resume...")
|
||||
```
|
||||
With:
|
||||
```python
|
||||
|
|
@ -534,9 +534,9 @@ Replace:
|
|||
```python
|
||||
DOCS_DIR = Path("/Library/Documents/JobSearch")
|
||||
# and the contact paragraph:
|
||||
Paragraph("ALEX RIVERA", name_style)
|
||||
Paragraph("alex@example.com · (555) 867-5309 · ...", contact_style)
|
||||
Paragraph("Warm regards,<br/><br/>Alex Rivera", body_style)
|
||||
Paragraph("MEGHAN McCANN", name_style)
|
||||
Paragraph("meghan.m.mccann@gmail.com · (510) 764-3155 · ...", contact_style)
|
||||
Paragraph("Warm regards,<br/><br/>Meghan McCann", body_style)
|
||||
```
|
||||
With:
|
||||
```python
|
||||
|
|
@ -560,12 +560,12 @@ Replace hard-coded persona strings with:
|
|||
_persona = (
|
||||
f"{_name} is a {_profile.career_summary[:120] if _profile and _profile.career_summary else 'professional'}"
|
||||
)
|
||||
# Replace all occurrences of "Alex Rivera is a Customer Success..." with _persona
|
||||
# Replace all occurrences of "Meghan McCann is a Customer Success..." with _persona
|
||||
```
|
||||
|
||||
**Step 5: 6_Interview_Prep.py — interviewer and Q&A prompts**
|
||||
|
||||
Replace all occurrences of `"Alex"` in f-strings with `_name`.
|
||||
Replace all occurrences of `"Meghan"` in f-strings with `_name`.
|
||||
|
||||
**Step 6: 2_Settings.py — Services tab**
|
||||
|
||||
|
|
@ -588,7 +588,7 @@ Replace the SearXNG entry to use Docker Compose instead of a host path:
|
|||
},
|
||||
```
|
||||
|
||||
Replace all caption strings containing "Alex's" with `f"{_name}'s"`.
|
||||
Replace all caption strings containing "Meghan's" with `f"{_name}'s"`.
|
||||
|
||||
**Step 7: Commit**
|
||||
|
||||
|
|
|
|||
|
|
@ -95,14 +95,14 @@ data/email_compare_sample.jsonl
|
|||
Create `data/email_score.jsonl.example` with fake-but-realistic emails:
|
||||
|
||||
```jsonl
|
||||
{"subject": "Interview Invitation — Senior Engineer", "body": "Hi Alex, we'd love to schedule a 30-min phone screen. Are you available Thursday at 2pm? Please reply to confirm.", "label": "interview_scheduled"}
|
||||
{"subject": "Interview Invitation — Senior Engineer", "body": "Hi Meghan, we'd love to schedule a 30-min phone screen. Are you available Thursday at 2pm? Please reply to confirm.", "label": "interview_scheduled"}
|
||||
{"subject": "Your application to Acme Corp", "body": "Thank you for your interest in the Senior Engineer role. After careful consideration, we have decided to move forward with other candidates whose experience more closely matches our current needs.", "label": "rejected"}
|
||||
{"subject": "Offer Letter — Product Manager at Initech", "body": "Dear Alex, we are thrilled to extend an offer of employment for the Product Manager position. Please find the attached offer letter outlining compensation and start date.", "label": "offer_received"}
|
||||
{"subject": "Quick question about your background", "body": "Hi Alex, I came across your profile and would love to connect. We have a few roles that seem like a great match. Would you be open to a brief chat this week?", "label": "positive_response"}
|
||||
{"subject": "Company Culture Survey — Acme Corp", "body": "Alex, as part of our evaluation process, we invite all candidates to complete our culture fit assessment. The survey takes approximately 15 minutes. Please click the link below.", "label": "survey_received"}
|
||||
{"subject": "Offer Letter — Product Manager at Initech", "body": "Dear Meghan, we are thrilled to extend an offer of employment for the Product Manager position. Please find the attached offer letter outlining compensation and start date.", "label": "offer_received"}
|
||||
{"subject": "Quick question about your background", "body": "Hi Meghan, I came across your profile and would love to connect. We have a few roles that seem like a great match. Would you be open to a brief chat this week?", "label": "positive_response"}
|
||||
{"subject": "Company Culture Survey — Acme Corp", "body": "Meghan, as part of our evaluation process, we invite all candidates to complete our culture fit assessment. The survey takes approximately 15 minutes. Please click the link below.", "label": "survey_received"}
|
||||
{"subject": "Application Received — DataCo", "body": "Thank you for submitting your application for the Data Engineer role at DataCo. We have received your materials and will be in touch if your qualifications match our needs.", "label": "neutral"}
|
||||
{"subject": "Following up on your application", "body": "Hi Alex, I wanted to follow up on your recent application. Your background looks interesting and we'd like to learn more. Can we set up a quick call?", "label": "positive_response"}
|
||||
{"subject": "We're moving forward with other candidates", "body": "Dear Alex, thank you for taking the time to interview with us. After thoughtful consideration, we have decided not to move forward with your candidacy at this time.", "label": "rejected"}
|
||||
{"subject": "Following up on your application", "body": "Hi Meghan, I wanted to follow up on your recent application. Your background looks interesting and we'd like to learn more. Can we set up a quick call?", "label": "positive_response"}
|
||||
{"subject": "We're moving forward with other candidates", "body": "Dear Meghan, thank you for taking the time to interview with us. After thoughtful consideration, we have decided not to move forward with your candidacy at this time.", "label": "rejected"}
|
||||
```
|
||||
|
||||
**Step 3: Commit**
|
||||
|
|
@ -493,7 +493,7 @@ def test_gliclass_adapter_returns_highest_score():
|
|||
return_value=mock_pipeline_instance):
|
||||
adapter = GLiClassAdapter("test-gli", "some/model")
|
||||
adapter.load()
|
||||
result = adapter.classify("Offer letter enclosed", "Dear Alex, we are pleased to offer...")
|
||||
result = adapter.classify("Offer letter enclosed", "Dear Meghan, we are pleased to offer...")
|
||||
|
||||
assert result == "offer_received"
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,229 +0,0 @@
|
|||
# Public Mirror Strategy — Design
|
||||
|
||||
**Date:** 2026-03-02
|
||||
**Scope:** Peregrine (initial); pattern applies to all future CircuitForge products
|
||||
**Status:** Approved — ready for implementation planning
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
Publish Peregrine to GitHub and Codeberg as push-mirrored community hubs. Full BSL 1.1
|
||||
codebase, no MIT carve-outs. Git hooks enforcing safety + commit format committed to the
|
||||
repo so every clone gets them automatically. Issue templates and a CONTRIBUTING.md make
|
||||
the project approachable for external contributors. FossHub added when a Windows installer
|
||||
exists.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
**Whole repo: BSL 1.1.** No MIT exception — including `scrapers/`. The original rationale
|
||||
for making scrapers MIT (community maintenance) is equally served by BSL 1.1: contributors
|
||||
can fix broken scrapers, submit PRs, and run the tool at home for free. Making scrapers MIT
|
||||
would allow competitors to lift CF-authored scraper code into a competing commercial product
|
||||
without a license, which is not in CircuitForge's interest.
|
||||
|
||||
The `LICENSE` file at repo root covers the full codebase. No `LICENSE-MIT` file needed.
|
||||
CONTRIBUTING.md explains what BSL means practically for contributors.
|
||||
|
||||
BSL converts to MIT after 4 years per the standard BSL 1.1 terms.
|
||||
|
||||
---
|
||||
|
||||
## Mirror Sync
|
||||
|
||||
Forgejo has built-in **push mirror** support (Settings → Mirror → Push mirrors). Every push
|
||||
to the primary Forgejo repo auto-replicates within seconds — no CI/CD overhead, no cron job.
|
||||
|
||||
Two mirrors:
|
||||
- `github.com/CircuitForge/peregrine`
|
||||
- `codeberg.org/CircuitForge/peregrine`
|
||||
|
||||
Both under the `CircuitForge` org (consistent branding; not the personal `pyr0ball` account).
|
||||
GitHub and Codeberg orgs to be created if not already present.
|
||||
|
||||
---
|
||||
|
||||
## README Canonical-Source Banner
|
||||
|
||||
A prominent notice near the top of the README:
|
||||
|
||||
```
|
||||
> **Primary development** happens at [git.opensourcesolarpunk.com](https://git.opensourcesolarpunk.com/pyr0ball/peregrine).
|
||||
> GitHub and Codeberg are push mirrors. Issues and PRs are welcome on either platform.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CONTRIBUTING.md
|
||||
|
||||
Sections:
|
||||
|
||||
1. **License** — BSL 1.1 overview. What it means: self-hosting for personal non-commercial
|
||||
use is free; commercial SaaS use requires a paid license; converts to MIT after 4 years.
|
||||
Link to full `LICENSE`.
|
||||
|
||||
2. **CLA** — One-sentence acknowledgment in bold:
|
||||
*"By submitting a pull request you agree that your contribution is licensed under the
|
||||
project's BSL 1.1 terms."* No separate CLA file or signature process — the PR template
|
||||
repeats this as a checkbox.
|
||||
|
||||
3. **Dev setup** — Docker path (recommended) and conda path, pointing to
|
||||
`docs/getting-started/installation.md`.
|
||||
|
||||
4. **PR process** — GH and Codeberg PRs are reviewed and cherry-picked to Forgejo; Forgejo
|
||||
is the canonical merge target. Contributors do not need a Forgejo account.
|
||||
|
||||
5. **Commit format** — `type: description` (or `type(scope): description`). Valid types:
|
||||
`feat fix docs chore test refactor perf ci build`. Hooks enforce this — if your commit is
|
||||
rejected, the hook message tells you exactly why.
|
||||
|
||||
6. **Issue guidance** — link to templates; note that security issues go to
|
||||
`security@circuitforge.tech`, not GitHub Issues.
|
||||
|
||||
---
|
||||
|
||||
## Git Hooks (`.githooks/`)
|
||||
|
||||
Committed to the repo. Activated by `setup.sh` via:
|
||||
|
||||
```sh
|
||||
git config core.hooksPath .githooks
|
||||
```
|
||||
|
||||
`setup.sh` already runs on first clone; hook activation is added there so no contributor
|
||||
has to think about it.
|
||||
|
||||
### `pre-commit`
|
||||
|
||||
Blocks the commit if any staged file matches:
|
||||
|
||||
**Exact path blocklist:**
|
||||
- `config/user.yaml`
|
||||
- `config/server.yaml`
|
||||
- `config/llm.yaml`
|
||||
- `config/notion.yaml`
|
||||
- `config/adzuna.yaml`
|
||||
- `config/label_tool.yaml`
|
||||
- `.env`
|
||||
- `demo/data/*.db`
|
||||
- `data/*.db`
|
||||
- `data/*.jsonl`
|
||||
|
||||
**Content scan** (regex on staged diff):
|
||||
- `sk-[A-Za-z0-9]{20,}` — OpenAI-style keys
|
||||
- `Bearer [A-Za-z0-9\-_]{20,}` — generic bearer tokens
|
||||
- `api_key:\s*["\']?[A-Za-z0-9\-_]{16,}` — YAML key fields with values
|
||||
|
||||
On match: prints the offending file/pattern, aborts with a clear message and hint to use
|
||||
`git restore --staged <file>` or add to `.gitignore`.
|
||||
|
||||
### `commit-msg`
|
||||
|
||||
Reads `$1` (the commit message temp file). Rejects if:
|
||||
- Message is empty or whitespace-only
|
||||
- First line does not match `^(feat|fix|docs|chore|test|refactor|perf|ci|build)(\(.+\))?: .+`
|
||||
|
||||
On rejection: prints the required format and lists valid types. Does not touch the message
|
||||
(no auto-rewriting).
|
||||
|
||||
---
|
||||
|
||||
## Issue Templates
|
||||
|
||||
Location: `.github/ISSUE_TEMPLATE/` (GitHub) and `.gitea/ISSUE_TEMPLATE/` (Codeberg/Forgejo).
|
||||
|
||||
### Bug Report (`bug_report.md`)
|
||||
|
||||
Fields:
|
||||
- Peregrine version (output of `./manage.sh status`)
|
||||
- OS and runtime (Docker / conda-direct)
|
||||
- Steps to reproduce
|
||||
- Expected behaviour
|
||||
- Actual behaviour (with log snippets)
|
||||
- Relevant config (redact keys)
|
||||
|
||||
### Feature Request (`feature_request.md`)
|
||||
|
||||
Fields:
|
||||
- Problem statement ("I want to do X but currently...")
|
||||
- Proposed solution
|
||||
- Alternatives considered
|
||||
- Which tier this might belong to (free / paid / premium / ultra)
|
||||
- Willingness to contribute a PR
|
||||
|
||||
### PR Template (`.github/pull_request_template.md`)
|
||||
|
||||
Fields:
|
||||
- Summary of changes
|
||||
- Related issue(s)
|
||||
- Type of change (feat / fix / docs / ...)
|
||||
- Testing done
|
||||
- **CLA checkbox:** `[ ] I agree my contribution is licensed under the project's BSL 1.1 terms.`
|
||||
|
||||
### Security (`SECURITY.md`)
|
||||
|
||||
Single page: do not open a GitHub Issue for security vulnerabilities. Email
|
||||
`security@circuitforge.tech`. Response target: 72 hours.
|
||||
|
||||
---
|
||||
|
||||
## GitHub-Specific Extras
|
||||
|
||||
**CI (GitHub Actions)** — `.github/workflows/ci.yml`:
|
||||
- Trigger: push and PR to `main`
|
||||
- Steps: checkout → set up Python 3.11 → install deps from `requirements.txt` →
|
||||
`pytest tests/ -v`
|
||||
- Free for public repos; gives contributors a green checkmark without needing local conda
|
||||
|
||||
**Repo topics:** `job-search`, `ai-assistant`, `privacy`, `streamlit`, `python`,
|
||||
`open-core`, `neurodivergent`, `accessibility`, `bsl`
|
||||
|
||||
**Releases:** Mirror Forgejo tags. Release notes auto-generated from conventional commit
|
||||
subjects grouped by type.
|
||||
|
||||
---
|
||||
|
||||
## FossHub (Future — Windows RC prerequisite)
|
||||
|
||||
When a signed Windows installer (`.msi` or `.exe`) is ready:
|
||||
|
||||
1. Submit via FossHub publisher portal (`https://www.fosshub.com/contribute.html`)
|
||||
2. Requirements: stable versioned release, no bundled software, no adware
|
||||
3. FossHub gives a trusted, antivirus-clean download URL — important for an app running on
|
||||
users' personal machines
|
||||
4. Link FossHub download from README and from `circuitforge.tech` downloads section
|
||||
|
||||
No action needed until Windows RC exists.
|
||||
|
||||
---
|
||||
|
||||
## File Map
|
||||
|
||||
```
|
||||
peregrine/
|
||||
├── .githooks/
|
||||
│ ├── pre-commit # sensitive file + key pattern blocker
|
||||
│ └── commit-msg # conventional commit format enforcer
|
||||
├── .github/
|
||||
│ ├── workflows/
|
||||
│ │ └── ci.yml # pytest on push/PR
|
||||
│ ├── ISSUE_TEMPLATE/
|
||||
│ │ ├── bug_report.md
|
||||
│ │ └── feature_request.md
|
||||
│ └── pull_request_template.md
|
||||
├── .gitea/
|
||||
│ └── ISSUE_TEMPLATE/ # mirrors .github/ISSUE_TEMPLATE/ for Forgejo/Codeberg
|
||||
├── CONTRIBUTING.md
|
||||
└── SECURITY.md
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Out of Scope
|
||||
|
||||
- Forgejo mirror configuration (done via Forgejo web UI, not committed to repo)
|
||||
- GitHub/Codeberg org creation (manual one-time step)
|
||||
- Windows installer build pipeline (separate future effort)
|
||||
- `circuitforge-core` extraction (deferred until second product)
|
||||
|
|
@ -12,53 +12,41 @@ free < paid < premium
|
|||
|
||||
| Tier | Description |
|
||||
|------|-------------|
|
||||
| `free` | Core discovery pipeline, resume matching, basic UI. AI features unlock with BYOK. |
|
||||
| `paid` | Managed cloud LLM (no key required), integrations, calendar, notifications |
|
||||
| `free` | Core discovery pipeline, resume matching, and basic UI — no LLM features |
|
||||
| `paid` | All AI features: cover letters, research, email, integrations, calendar, notifications |
|
||||
| `premium` | Adds fine-tuning and multi-user support |
|
||||
|
||||
---
|
||||
|
||||
## BYOK — Bring Your Own Key
|
||||
|
||||
If you configure any LLM backend in `config/llm.yaml` — local (ollama, vllm) **or** an external API key (Anthropic, OpenAI, etc.) — **all pure LLM-call features unlock automatically**, regardless of your subscription tier.
|
||||
|
||||
The paid tier gives you access to CircuitForge's managed cloud inference. It does not gate your ability to use AI when you're providing the compute yourself.
|
||||
|
||||
Features that unlock with BYOK are listed in `BYOK_UNLOCKABLE` in `tiers.py`. Features that depend on CircuitForge-operated infrastructure (integrations, email classifier training, fine-tuned models) remain tier-gated.
|
||||
|
||||
---
|
||||
|
||||
## Feature Gate Table
|
||||
|
||||
Features listed here require a minimum tier. Features not in this table are available to all tiers (free by default).
|
||||
|
||||
### Wizard LLM generation
|
||||
|
||||
| Feature key | Minimum tier | BYOK unlocks? | Description |
|
||||
|-------------|-------------|---------------|-------------|
|
||||
| `llm_career_summary` | paid | ✅ yes | LLM-assisted career summary generation in the wizard |
|
||||
| `llm_expand_bullets` | paid | ✅ yes | LLM expansion of resume bullet points |
|
||||
| `llm_suggest_skills` | paid | ✅ yes | LLM skill suggestions from resume content |
|
||||
| `llm_voice_guidelines` | premium | ✅ yes | LLM writing voice and tone guidelines |
|
||||
| `llm_job_titles` | paid | ✅ yes | LLM-suggested job title variations for search |
|
||||
| `llm_mission_notes` | paid | ✅ yes | LLM-generated mission alignment notes |
|
||||
| `llm_keywords_blocklist` | paid | ❌ no | Orchestration pipeline over background keyword data |
|
||||
| Feature key | Minimum tier | Description |
|
||||
|-------------|-------------|-------------|
|
||||
| `llm_career_summary` | paid | LLM-assisted career summary generation in the wizard |
|
||||
| `llm_expand_bullets` | paid | LLM expansion of resume bullet points |
|
||||
| `llm_suggest_skills` | paid | LLM skill suggestions from resume content |
|
||||
| `llm_voice_guidelines` | premium | LLM writing voice and tone guidelines |
|
||||
| `llm_job_titles` | paid | LLM-suggested job title variations for search |
|
||||
| `llm_keywords_blocklist` | paid | LLM-suggested blocklist keywords |
|
||||
| `llm_mission_notes` | paid | LLM-generated mission alignment notes |
|
||||
|
||||
### App features
|
||||
|
||||
| Feature key | Minimum tier | BYOK unlocks? | Description |
|
||||
|-------------|-------------|---------------|-------------|
|
||||
| `company_research` | paid | ✅ yes | Auto-generated company research briefs pre-interview |
|
||||
| `interview_prep` | paid | ✅ yes | Live reference sheet and practice Q&A during calls |
|
||||
| `survey_assistant` | paid | ✅ yes | Culture-fit survey Q&A helper (text + screenshot) |
|
||||
| `email_classifier` | paid | ❌ no | IMAP email sync with LLM classification (training pipeline) |
|
||||
| `model_fine_tuning` | premium | ❌ no | Cover letter model fine-tuning on personal writing |
|
||||
| `shared_cover_writer_model` | paid | ❌ no | Access to shared fine-tuned cover letter model (CF infra) |
|
||||
| `multi_user` | premium | ❌ no | Multiple user profiles on one instance |
|
||||
| Feature key | Minimum tier | Description |
|
||||
|-------------|-------------|-------------|
|
||||
| `company_research` | paid | Auto-generated company research briefs pre-interview |
|
||||
| `interview_prep` | paid | Live reference sheet and practice Q&A during calls |
|
||||
| `email_classifier` | paid | IMAP email sync with LLM classification |
|
||||
| `survey_assistant` | paid | Culture-fit survey Q&A helper (text + screenshot) |
|
||||
| `model_fine_tuning` | premium | Cover letter model fine-tuning on personal writing |
|
||||
| `shared_cover_writer_model` | paid | Access to shared fine-tuned cover letter model |
|
||||
| `multi_user` | premium | Multiple user profiles on one instance |
|
||||
|
||||
### Integrations
|
||||
|
||||
Integrations depend on CircuitForge-operated infrastructure and are **not** BYOK-unlockable.
|
||||
### Integrations (paid)
|
||||
|
||||
| Feature key | Minimum tier | Description |
|
||||
|-------------|-------------|-------------|
|
||||
|
|
@ -85,46 +73,31 @@ The following integrations are free for all tiers and are not in the `FEATURES`
|
|||
|
||||
## API Reference
|
||||
|
||||
### `can_use(tier, feature, has_byok=False) -> bool`
|
||||
### `can_use(tier, feature) -> bool`
|
||||
|
||||
Returns `True` if the given tier has access to the feature. Pass `has_byok=has_configured_llm()` to apply BYOK unlock logic.
|
||||
Returns `True` if the given tier has access to the feature.
|
||||
|
||||
```python
|
||||
from app.wizard.tiers import can_use, has_configured_llm
|
||||
from app.wizard.tiers import can_use
|
||||
|
||||
byok = has_configured_llm()
|
||||
can_use("free", "company_research") # False
|
||||
can_use("paid", "company_research") # True
|
||||
can_use("premium", "company_research") # True
|
||||
|
||||
can_use("free", "company_research") # False — no LLM configured
|
||||
can_use("free", "company_research", has_byok=True) # True — BYOK unlocks it
|
||||
can_use("paid", "company_research") # True
|
||||
|
||||
can_use("free", "notion_sync", has_byok=True) # False — integration, not BYOK-unlockable
|
||||
can_use("free", "unknown_feature") # True — ungated features return True
|
||||
can_use("invalid", "company_research") # False — invalid tier string
|
||||
can_use("free", "unknown_feature") # True — ungated features return True
|
||||
can_use("invalid", "company_research") # False — invalid tier string
|
||||
```
|
||||
|
||||
### `has_configured_llm(config_path=None) -> bool`
|
||||
### `tier_label(feature) -> str`
|
||||
|
||||
Returns `True` if at least one non-vision LLM backend is enabled in `config/llm.yaml`. Local backends (ollama, vllm) and external API keys both count.
|
||||
|
||||
```python
|
||||
from app.wizard.tiers import has_configured_llm
|
||||
|
||||
has_configured_llm() # True if any backend is enabled and not vision_service
|
||||
```
|
||||
|
||||
### `tier_label(feature, has_byok=False) -> str`
|
||||
|
||||
Returns a display badge string for locked features, or `""` if the feature is free, unlocked, or BYOK-accessible.
|
||||
Returns a display badge string for locked features, or `""` if the feature is free or unknown.
|
||||
|
||||
```python
|
||||
from app.wizard.tiers import tier_label
|
||||
|
||||
tier_label("company_research") # "🔒 Paid"
|
||||
tier_label("company_research", has_byok=True) # "" (BYOK unlocks, no label shown)
|
||||
tier_label("model_fine_tuning") # "⭐ Premium"
|
||||
tier_label("notion_sync", has_byok=True) # "🔒 Paid" (BYOK doesn't unlock integrations)
|
||||
tier_label("job_discovery") # "" (ungated)
|
||||
tier_label("company_research") # "🔒 Paid"
|
||||
tier_label("model_fine_tuning") # "⭐ Premium"
|
||||
tier_label("job_discovery") # "" (ungated)
|
||||
```
|
||||
|
||||
---
|
||||
|
|
@ -147,42 +120,36 @@ dev_tier_override: premium # overrides tier locally for testing
|
|||
|
||||
## Adding a New Feature Gate
|
||||
|
||||
1. Add the feature to `FEATURES` in `app/wizard/tiers.py`. If it's a pure LLM call that should unlock with BYOK, also add it to `BYOK_UNLOCKABLE`:
|
||||
1. Add the feature to `FEATURES` in `app/wizard/tiers.py`:
|
||||
|
||||
```python
|
||||
FEATURES: dict[str, str] = {
|
||||
# ...existing entries...
|
||||
"my_new_llm_feature": "paid",
|
||||
"my_new_feature": "paid", # or "free" | "premium"
|
||||
}
|
||||
|
||||
BYOK_UNLOCKABLE: frozenset[str] = frozenset({
|
||||
# ...existing entries...
|
||||
"my_new_llm_feature", # add here if it's a pure LLM call
|
||||
})
|
||||
```
|
||||
|
||||
2. Guard the feature in the UI, passing `has_byok`:
|
||||
2. Guard the feature in the UI:
|
||||
|
||||
```python
|
||||
from app.wizard.tiers import can_use, tier_label, has_configured_llm
|
||||
from app.wizard.tiers import can_use, tier_label
|
||||
from scripts.user_profile import UserProfile
|
||||
|
||||
_byok = has_configured_llm()
|
||||
if can_use(user.tier, "my_new_llm_feature", has_byok=_byok):
|
||||
user = UserProfile()
|
||||
if can_use(user.tier, "my_new_feature"):
|
||||
# show the feature
|
||||
pass
|
||||
else:
|
||||
st.info(f"Requires a paid plan or a configured LLM backend.")
|
||||
st.info(f"My New Feature requires a {tier_label('my_new_feature').replace('🔒 ', '').replace('⭐ ', '')} plan.")
|
||||
```
|
||||
|
||||
3. Add tests in `tests/test_wizard_tiers.py` covering both the tier gate and BYOK unlock:
|
||||
3. Add a test in `tests/test_tiers.py`:
|
||||
|
||||
```python
|
||||
def test_my_new_feature_requires_paid_without_byok():
|
||||
assert can_use("free", "my_new_llm_feature") is False
|
||||
assert can_use("paid", "my_new_llm_feature") is True
|
||||
|
||||
def test_my_new_feature_byok_unlocks():
|
||||
assert can_use("free", "my_new_llm_feature", has_byok=True) is True
|
||||
def test_my_new_feature_requires_paid():
|
||||
assert can_use("free", "my_new_feature") is False
|
||||
assert can_use("paid", "my_new_feature") is True
|
||||
assert can_use("premium", "my_new_feature") is True
|
||||
```
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -26,9 +26,6 @@ LABELS: list[str] = [
|
|||
"positive_response",
|
||||
"survey_received",
|
||||
"neutral",
|
||||
"event_rescheduled",
|
||||
"unrelated",
|
||||
"digest",
|
||||
]
|
||||
|
||||
# Natural-language descriptions used by the RerankerAdapter.
|
||||
|
|
@ -38,10 +35,7 @@ LABEL_DESCRIPTIONS: dict[str, str] = {
|
|||
"rejected": "application rejected or not moving forward with candidacy",
|
||||
"positive_response": "positive recruiter interest or request to connect",
|
||||
"survey_received": "invitation to complete a culture-fit survey or assessment",
|
||||
"neutral": "automated ATS confirmation such as application received",
|
||||
"event_rescheduled": "an interview or scheduled event moved to a new time",
|
||||
"unrelated": "non-job-search email unrelated to any application or recruiter",
|
||||
"digest": "job digest or multi-listing email with multiple job postings",
|
||||
"neutral": "automated ATS confirmation or unrelated email",
|
||||
}
|
||||
|
||||
# Lazy import shims — allow tests to patch without requiring the libs installed.
|
||||
|
|
@ -141,23 +135,23 @@ class ClassifierAdapter(abc.ABC):
|
|||
class ZeroShotAdapter(ClassifierAdapter):
|
||||
"""Wraps any transformers zero-shot-classification pipeline.
|
||||
|
||||
load() calls pipeline("zero-shot-classification", model=..., device=...) to get
|
||||
an inference callable, stored as self._pipeline. classify() then calls
|
||||
self._pipeline(text, LABELS, multi_label=False). In tests, patch
|
||||
'scripts.classifier_adapters.pipeline' with a MagicMock whose .return_value is
|
||||
itself a MagicMock(return_value={...}) to simulate both the factory call and the
|
||||
inference call.
|
||||
Design note: the module-level ``pipeline`` shim is resolved once in load()
|
||||
and stored as ``self._pipeline``. classify() calls ``self._pipeline`` directly
|
||||
with (text, candidate_labels, multi_label=False). This makes the adapter
|
||||
patchable in tests via ``patch('scripts.classifier_adapters.pipeline', mock)``:
|
||||
``mock`` is stored in ``self._pipeline`` and called with the text during
|
||||
classify(), so ``mock.call_args`` captures the arguments.
|
||||
|
||||
two_pass: if True, classify() runs a second pass restricted to the top-2 labels
|
||||
from the first pass, forcing a binary choice. This typically improves confidence
|
||||
without the accuracy cost of a full 6-label second run.
|
||||
For real transformers use, ``pipeline`` is the factory function and the call
|
||||
in classify() initialises the pipeline on first use (lazy loading without
|
||||
pre-caching a model object). Subclasses that need a pre-warmed model object
|
||||
should override load() to call the factory and store the result.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, model_id: str, two_pass: bool = False) -> None:
|
||||
def __init__(self, name: str, model_id: str) -> None:
|
||||
self._name = name
|
||||
self._model_id = model_id
|
||||
self._pipeline: Any = None
|
||||
self._two_pass = two_pass
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
|
|
@ -172,9 +166,9 @@ class ZeroShotAdapter(ClassifierAdapter):
|
|||
_pipe_fn = _mod.pipeline
|
||||
if _pipe_fn is None:
|
||||
raise ImportError("transformers not installed — run: pip install transformers")
|
||||
device = 0 if _cuda_available() else -1
|
||||
# Instantiate the pipeline once; classify() calls the resulting object on each text.
|
||||
self._pipeline = _pipe_fn("zero-shot-classification", model=self._model_id, device=device)
|
||||
# Store the pipeline factory/callable so that test patches are honoured.
|
||||
# classify() will call self._pipeline(text, labels, multi_label=False).
|
||||
self._pipeline = _pipe_fn
|
||||
|
||||
def unload(self) -> None:
|
||||
self._pipeline = None
|
||||
|
|
@ -184,9 +178,6 @@ class ZeroShotAdapter(ClassifierAdapter):
|
|||
self.load()
|
||||
text = f"Subject: {subject}\n\n{body[:600]}"
|
||||
result = self._pipeline(text, LABELS, multi_label=False)
|
||||
if self._two_pass and len(result["labels"]) >= 2:
|
||||
top2 = result["labels"][:2]
|
||||
result = self._pipeline(text, top2, multi_label=False)
|
||||
return result["labels"][0]
|
||||
|
||||
|
||||
|
|
@ -258,6 +249,6 @@ class RerankerAdapter(ClassifierAdapter):
|
|||
if self._reranker is None:
|
||||
self.load()
|
||||
text = f"Subject: {subject}\n\n{body[:600]}"
|
||||
pairs = [[text, LABEL_DESCRIPTIONS.get(label, label.replace("_", " "))] for label in LABELS]
|
||||
pairs = [[text, LABEL_DESCRIPTIONS[label]] for label in LABELS]
|
||||
scores: list[float] = self._reranker.compute_score(pairs, normalize=True)
|
||||
return LABELS[scores.index(max(scores))]
|
||||
|
|
|
|||
|
|
@ -73,20 +73,6 @@ _MISSION_SIGNALS: dict[str, list[str]] = {
|
|||
"social good", "civic", "public health", "mental health", "food security",
|
||||
"housing", "homelessness", "poverty", "workforce development",
|
||||
],
|
||||
# Health is listed last — it's a genuine but lower-priority connection than
|
||||
# music/animals/education/social_impact. detect_mission_alignment returns on first
|
||||
# match, so dict order = preference order.
|
||||
"health": [
|
||||
"patient", "patients", "healthcare", "health tech", "healthtech",
|
||||
"pharma", "pharmaceutical", "clinical", "medical",
|
||||
"hospital", "clinic", "therapy", "therapist",
|
||||
"rare disease", "life sciences", "life science",
|
||||
"treatment", "prescription", "biotech", "biopharma", "medtech",
|
||||
"behavioral health", "population health",
|
||||
"care management", "care coordination", "oncology", "specialty pharmacy",
|
||||
"provider network", "payer", "health plan", "benefits administration",
|
||||
"ehr", "emr", "fhir", "hipaa",
|
||||
],
|
||||
}
|
||||
|
||||
_candidate = _profile.name if _profile else "the candidate"
|
||||
|
|
@ -113,15 +99,6 @@ _MISSION_DEFAULTS: dict[str, str] = {
|
|||
f"cause {_candidate} cares deeply about. Para 3 should warmly reflect their genuine "
|
||||
"desire to apply their skills to work that makes a real difference in people's lives."
|
||||
),
|
||||
"health": (
|
||||
f"This company works in healthcare, life sciences, or patient care. "
|
||||
f"Do NOT write about {_candidate}'s passion for pharmaceuticals or healthcare as an "
|
||||
"industry. Instead, Para 3 should reflect genuine care for the PEOPLE these companies "
|
||||
"exist to serve — those navigating complex, often invisible, or unusual health journeys; "
|
||||
"patients facing rare or poorly understood conditions; individuals whose situations don't "
|
||||
"fit a clean category. The connection is to the humans behind the data, not the industry. "
|
||||
"If the user has provided a personal note, use that to anchor Para 3 specifically."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -212,24 +189,6 @@ def build_prompt(
|
|||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _trim_to_letter_end(text: str) -> str:
|
||||
"""Remove repetitive hallucinated content after the first complete sign-off.
|
||||
|
||||
Fine-tuned models sometimes loop after completing the letter. This cuts at
|
||||
the first closing + candidate name so only the intended letter is saved.
|
||||
"""
|
||||
candidate_first = (_profile.name.split()[0] if _profile else "").strip()
|
||||
pattern = (
|
||||
r'(?:Warm regards|Sincerely|Best regards|Kind regards|Thank you)[,.]?\s*\n+\s*'
|
||||
+ (re.escape(candidate_first) if candidate_first else r'\w+')
|
||||
+ r'\b'
|
||||
)
|
||||
m = re.search(pattern, text, re.IGNORECASE)
|
||||
if m:
|
||||
return text[:m.end()].strip()
|
||||
return text.strip()
|
||||
|
||||
|
||||
def generate(
|
||||
title: str,
|
||||
company: str,
|
||||
|
|
@ -268,10 +227,8 @@ def generate(
|
|||
if feedback:
|
||||
print("[cover-letter] Refinement mode: feedback provided", file=sys.stderr)
|
||||
|
||||
# max_tokens=1200 caps generation at ~900 words — enough for any cover letter
|
||||
# and prevents fine-tuned models from looping into repetitive garbage output.
|
||||
result = _router.complete(prompt, max_tokens=1200)
|
||||
return _trim_to_letter_end(result)
|
||||
result = _router.complete(prompt)
|
||||
return result.strip()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
|
|
|||
|
|
@ -49,11 +49,6 @@ class LLMRouter:
|
|||
are only tried when images is provided.
|
||||
Raises RuntimeError if all backends are exhausted.
|
||||
"""
|
||||
if os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"):
|
||||
raise RuntimeError(
|
||||
"AI inference is disabled in the public demo. "
|
||||
"Run your own instance to use AI features."
|
||||
)
|
||||
order = fallback_order if fallback_order is not None else self.config["fallback_order"]
|
||||
for name in order:
|
||||
backend = self.config["backends"][name]
|
||||
|
|
|
|||
71
setup.sh
71
setup.sh
|
|
@ -89,15 +89,6 @@ configure_git_safe_dir() {
|
|||
fi
|
||||
}
|
||||
|
||||
activate_git_hooks() {
|
||||
local repo_dir
|
||||
repo_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
if [[ -d "$repo_dir/.githooks" ]]; then
|
||||
git -C "$repo_dir" config core.hooksPath .githooks
|
||||
success "Git hooks activated (.githooks/)."
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Git ────────────────────────────────────────────────────────────────────────
|
||||
install_git() {
|
||||
if cmd_exists git; then success "git already installed: $(git --version)"; return; fi
|
||||
|
|
@ -137,10 +128,8 @@ check_podman() {
|
|||
esac
|
||||
success "podman-compose installed."
|
||||
fi
|
||||
if [[ "$OS" != "Darwin" ]]; then
|
||||
warn "GPU profiles (single-gpu, dual-gpu) require CDI setup:"
|
||||
warn " sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
|
||||
fi
|
||||
warn "GPU profiles (single-gpu, dual-gpu) require CDI setup:"
|
||||
warn " sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml"
|
||||
return 0
|
||||
}
|
||||
|
||||
|
|
@ -276,54 +265,6 @@ install_nvidia_toolkit() {
|
|||
success "NVIDIA Container Toolkit installed."
|
||||
}
|
||||
|
||||
# ── Ollama (macOS native) ──────────────────────────────────────────────────────
|
||||
# On macOS, Docker Desktop runs in a Linux VM that cannot access the Apple GPU.
|
||||
# Ollama must run natively on the host to use Metal GPU acceleration.
|
||||
# When it's running on :11434, preflight automatically adopts it and stubs out
|
||||
# the Docker Ollama container so there's no conflict.
|
||||
install_ollama_macos() {
|
||||
[[ "$OS" != "Darwin" ]] && return
|
||||
echo ""
|
||||
info "Ollama (native macOS — enables Apple Silicon Metal GPU acceleration)"
|
||||
echo -e " Docker cannot pass through the Apple GPU. For GPU-accelerated inference,"
|
||||
echo -e " Ollama must run natively on the host."
|
||||
echo ""
|
||||
|
||||
if cmd_exists ollama; then
|
||||
success "Ollama already installed: $(ollama --version 2>/dev/null | head -1 || echo 'unknown version')"
|
||||
if pgrep -x ollama &>/dev/null || launchctl print gui/"$(id -u)" 2>/dev/null | grep -q com.ollama; then
|
||||
success "Ollama service is running — preflight will adopt it automatically."
|
||||
else
|
||||
warn "Ollama is installed but not running."
|
||||
warn "Start it with: brew services start ollama (or: ollama serve)"
|
||||
fi
|
||||
return
|
||||
fi
|
||||
|
||||
# Non-interactive (e.g. curl | bash) — skip prompt
|
||||
if [[ ! -t 0 ]]; then
|
||||
warn "Non-interactive — skipping Ollama install."
|
||||
warn "Install manually: brew install ollama && brew services start ollama"
|
||||
return
|
||||
fi
|
||||
|
||||
read -rp " Install Ollama natively for Metal GPU support? [Y/n]: " yn
|
||||
yn="${yn:-Y}"
|
||||
if [[ "$yn" =~ ^[Yy] ]]; then
|
||||
if cmd_exists brew; then
|
||||
brew install ollama
|
||||
brew services start ollama
|
||||
success "Ollama installed and started."
|
||||
success "Preflight will adopt it on next run — no Docker Ollama container will start."
|
||||
else
|
||||
warn "Homebrew not found."
|
||||
warn "Install Ollama manually from https://ollama.com/download/mac then start it."
|
||||
fi
|
||||
else
|
||||
info "Skipped. The 'cpu' profile will use Docker Ollama on CPU instead."
|
||||
fi
|
||||
}
|
||||
|
||||
# ── Environment setup ──────────────────────────────────────────────────────────
|
||||
# Note: Ollama runs as a Docker container — the compose.yml ollama service
|
||||
# handles model download automatically on first start (see docker/ollama/entrypoint.sh).
|
||||
|
|
@ -402,7 +343,6 @@ main() {
|
|||
install_build_tools
|
||||
install_git
|
||||
configure_git_safe_dir
|
||||
activate_git_hooks
|
||||
# Podman takes precedence if already installed; otherwise install Docker
|
||||
if ! check_podman; then
|
||||
install_docker
|
||||
|
|
@ -410,7 +350,6 @@ main() {
|
|||
check_compose
|
||||
install_nvidia_toolkit
|
||||
fi
|
||||
install_ollama_macos
|
||||
setup_env
|
||||
configure_model_paths
|
||||
|
||||
|
|
@ -420,11 +359,7 @@ main() {
|
|||
echo -e " ${GREEN}Next steps:${NC}"
|
||||
echo -e " 1. Start Peregrine:"
|
||||
echo -e " ${YELLOW}./manage.sh start${NC} # remote/API-only (no local GPU)"
|
||||
if [[ "$OS" == "Darwin" ]] && cmd_exists ollama; then
|
||||
echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (Metal GPU via native Ollama)"
|
||||
else
|
||||
echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (CPU)"
|
||||
fi
|
||||
echo -e " ${YELLOW}./manage.sh start --profile cpu${NC} # local Ollama inference (CPU)"
|
||||
echo -e " 2. Open ${YELLOW}http://localhost:8501${NC} — the setup wizard will guide you"
|
||||
echo -e " (Tip: edit ${YELLOW}.env${NC} any time to adjust ports or model paths)"
|
||||
echo ""
|
||||
|
|
|
|||
|
|
@ -2,14 +2,11 @@
|
|||
import pytest
|
||||
|
||||
|
||||
def test_labels_constant_has_nine_items():
|
||||
def test_labels_constant_has_six_items():
|
||||
from scripts.classifier_adapters import LABELS
|
||||
assert len(LABELS) == 9
|
||||
assert len(LABELS) == 6
|
||||
assert "interview_scheduled" in LABELS
|
||||
assert "neutral" in LABELS
|
||||
assert "event_rescheduled" in LABELS
|
||||
assert "unrelated" in LABELS
|
||||
assert "digest" in LABELS
|
||||
|
||||
|
||||
def test_compute_metrics_perfect_predictions():
|
||||
|
|
@ -60,23 +57,20 @@ def test_zeroshot_adapter_classify_mocked():
|
|||
from unittest.mock import MagicMock, patch
|
||||
from scripts.classifier_adapters import ZeroShotAdapter
|
||||
|
||||
# Two-level mock: factory call returns pipeline instance; instance call returns inference result.
|
||||
mock_pipe_factory = MagicMock()
|
||||
mock_pipe_factory.return_value = MagicMock(return_value={
|
||||
mock_pipeline = MagicMock()
|
||||
mock_pipeline.return_value = {
|
||||
"labels": ["rejected", "neutral", "interview_scheduled"],
|
||||
"scores": [0.85, 0.10, 0.05],
|
||||
})
|
||||
}
|
||||
|
||||
with patch("scripts.classifier_adapters.pipeline", mock_pipe_factory):
|
||||
with patch("scripts.classifier_adapters.pipeline", mock_pipeline):
|
||||
adapter = ZeroShotAdapter("test-zs", "some/model")
|
||||
adapter.load()
|
||||
result = adapter.classify("We went with another candidate", "Thank you for applying.")
|
||||
|
||||
assert result == "rejected"
|
||||
# Factory was called with the correct task type
|
||||
assert mock_pipe_factory.call_args[0][0] == "zero-shot-classification"
|
||||
# Pipeline instance was called with the email text
|
||||
assert "We went with another candidate" in mock_pipe_factory.return_value.call_args[0][0]
|
||||
call_args = mock_pipeline.call_args
|
||||
assert "We went with another candidate" in call_args[0][0]
|
||||
|
||||
|
||||
def test_zeroshot_adapter_unload_clears_pipeline():
|
||||
|
|
@ -148,7 +142,7 @@ def test_gliclass_adapter_returns_highest_score():
|
|||
return_value=mock_pipeline_instance):
|
||||
adapter = GLiClassAdapter("test-gli", "some/model")
|
||||
adapter.load()
|
||||
result = adapter.classify("Offer letter enclosed", "Dear Alex, we are pleased to offer...")
|
||||
result = adapter.classify("Offer letter enclosed", "Dear Meghan, we are pleased to offer...")
|
||||
|
||||
assert result == "offer_received"
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ def test_strip_greeting():
|
|||
"""strip_greeting removes the 'Dear X,' line and returns the body."""
|
||||
from scripts.prepare_training_data import strip_greeting
|
||||
|
||||
text = "Dear Hiring Team,\n\nI'm delighted to apply for the CSM role.\n\nBest regards,\nAlex"
|
||||
text = "Dear Hiring Team,\n\nI'm delighted to apply for the CSM role.\n\nBest regards,\nMeghan"
|
||||
result = strip_greeting(text)
|
||||
assert result.startswith("I'm delighted")
|
||||
assert "Dear" not in result
|
||||
|
|
@ -48,7 +48,7 @@ def test_build_records_from_tmp_corpus(tmp_path):
|
|||
"Dear Acme Hiring Team,\n\n"
|
||||
"I'm delighted to apply for the Director of Customer Success position at Acme Corp. "
|
||||
"With six years of experience, I bring strong skills.\n\n"
|
||||
"Best regards,\nAlex Rivera"
|
||||
"Best regards,\nMeghan McCann"
|
||||
)
|
||||
|
||||
records = build_records(tmp_path)
|
||||
|
|
@ -107,11 +107,11 @@ def test_generate_calls_llm_router():
|
|||
{"company": "Acme", "text": "I'm delighted to apply for the CSM role at Acme."},
|
||||
]
|
||||
mock_router = MagicMock()
|
||||
mock_router.complete.return_value = "Dear Hiring Team,\n\nI'm delighted to apply.\n\nWarm regards,\nAlex Rivera"
|
||||
mock_router.complete.return_value = "Dear Hiring Team,\n\nI'm delighted to apply.\n\nWarm regards,\nMeghan McCann"
|
||||
|
||||
with patch("scripts.generate_cover_letter.load_corpus", return_value=fake_corpus):
|
||||
result = generate("Customer Success Manager", "TestCo", "Looking for a CSM",
|
||||
_router=mock_router)
|
||||
|
||||
mock_router.complete.assert_called_once()
|
||||
assert "Alex Rivera" in result
|
||||
assert "Meghan McCann" in result
|
||||
|
|
|
|||
|
|
@ -408,7 +408,7 @@ def test_get_email_leads(tmp_path):
|
|||
insert_job(db_path, {
|
||||
"title": "TAM", "company": "Wiz", "url": "email://wiz.com/abc123",
|
||||
"source": "email", "location": "", "is_remote": 0,
|
||||
"salary": "", "description": "Hi Alex…", "date_found": "2026-02-21",
|
||||
"salary": "", "description": "Hi Meghan…", "date_found": "2026-02-21",
|
||||
})
|
||||
leads = get_email_leads(db_path)
|
||||
assert len(leads) == 1
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
HOOK=".githooks/pre-commit"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
pass() { echo " PASS: $1"; }
|
||||
fail() { echo " FAIL: $1"; exit 1; }
|
||||
|
||||
# Helper: run hook against a fake staged file list
|
||||
run_hook_with() {
|
||||
local staged_file="$1"
|
||||
local staged_content="${2:-}"
|
||||
local tmpdir
|
||||
tmpdir=$(mktemp -d)
|
||||
|
||||
# Create shim that reports our file as staged
|
||||
cat > "$tmpdir/git" <<SHIM
|
||||
#!/usr/bin/env bash
|
||||
if [[ "\$*" == *"diff-index"* ]]; then
|
||||
echo "$staged_file"
|
||||
elif [[ "\$*" == *"diff"*"--cached"* ]]; then
|
||||
echo "$staged_content"
|
||||
else
|
||||
command git "\$@"
|
||||
fi
|
||||
SHIM
|
||||
chmod +x "$tmpdir/git"
|
||||
PATH="$tmpdir:$PATH" bash "$HOOK" 2>&1
|
||||
local status=$?
|
||||
rm -rf "$tmpdir"
|
||||
return $status
|
||||
}
|
||||
|
||||
echo "Test 1: blocks config/user.yaml"
|
||||
run_hook_with "config/user.yaml" && fail "should have blocked" || pass "blocked user.yaml"
|
||||
|
||||
echo "Test 2: blocks .env"
|
||||
run_hook_with ".env" && fail "should have blocked" || pass "blocked .env"
|
||||
|
||||
echo "Test 3: blocks content with OpenAI key pattern"
|
||||
run_hook_with "app/app.py" "+sk-abcdefghijklmnopqrstuvwxyz123456" && \
|
||||
fail "should have blocked key pattern" || pass "blocked key pattern"
|
||||
|
||||
echo "Test 4: allows safe file"
|
||||
run_hook_with "app/app.py" "import streamlit" && pass "allowed safe file" || \
|
||||
fail "should have allowed safe file"
|
||||
|
||||
echo "All pre-commit hook tests passed."
|
||||
|
||||
# ── commit-msg hook tests ────────────────────────────────────────────────────
|
||||
COMMIT_HOOK=".githooks/commit-msg"
|
||||
tmpfile=$(mktemp)
|
||||
|
||||
echo "Test 5: accepts valid feat message"
|
||||
echo "feat: add thing" > "$tmpfile"
|
||||
bash "$COMMIT_HOOK" "$tmpfile" && pass "accepted feat" || fail "rejected valid feat"
|
||||
|
||||
echo "Test 6: accepts valid fix with scope"
|
||||
echo "fix(auth): handle token expiry" > "$tmpfile"
|
||||
bash "$COMMIT_HOOK" "$tmpfile" && pass "accepted fix(scope)" || fail "rejected valid fix(scope)"
|
||||
|
||||
echo "Test 7: rejects empty message"
|
||||
echo "" > "$tmpfile"
|
||||
bash "$COMMIT_HOOK" "$tmpfile" && fail "should reject empty" || pass "rejected empty"
|
||||
|
||||
echo "Test 8: rejects non-conventional message"
|
||||
echo "updated the thing" > "$tmpfile"
|
||||
bash "$COMMIT_HOOK" "$tmpfile" && fail "should reject non-conventional" || pass "rejected non-conventional"
|
||||
|
||||
echo "Test 9: rejects invalid type"
|
||||
echo "yolo: ship it" > "$tmpfile"
|
||||
bash "$COMMIT_HOOK" "$tmpfile" && fail "should reject invalid type" || pass "rejected invalid type"
|
||||
|
||||
rm -f "$tmpfile"
|
||||
echo "All commit-msg hook tests passed."
|
||||
|
|
@ -10,7 +10,7 @@ def test_classify_stage_signal_interview():
|
|||
mock_router.complete.return_value = "interview_scheduled"
|
||||
result = classify_stage_signal(
|
||||
"Let's schedule a call",
|
||||
"Hi Alex, we'd love to book a 30-min phone screen with you.",
|
||||
"Hi Meghan, we'd love to book a 30-min phone screen with you.",
|
||||
)
|
||||
assert result == "interview_scheduled"
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ def test_extract_lead_info_returns_company_and_title():
|
|||
from scripts.imap_sync import extract_lead_info
|
||||
with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
|
||||
mock_router.complete.return_value = '{"company": "Wiz", "title": "Senior TAM"}'
|
||||
result = extract_lead_info("Senior TAM at Wiz", "Hi Alex, we have a role…", "recruiter@wiz.com")
|
||||
result = extract_lead_info("Senior TAM at Wiz", "Hi Meghan, we have a role…", "recruiter@wiz.com")
|
||||
assert result == ("Wiz", "Senior TAM")
|
||||
|
||||
|
||||
|
|
@ -120,11 +120,11 @@ def test_sync_job_emails_classifies_inbound(tmp_path):
|
|||
|
||||
fake_msg_bytes = (
|
||||
b"From: recruiter@acme.com\r\n"
|
||||
b"To: alex@example.com\r\n"
|
||||
b"To: meghan@example.com\r\n"
|
||||
b"Subject: Interview Invitation\r\n"
|
||||
b"Message-ID: <unique-001@acme.com>\r\n"
|
||||
b"\r\n"
|
||||
b"Hi Alex, we'd like to schedule a phone screen."
|
||||
b"Hi Meghan, we'd like to schedule a phone screen."
|
||||
)
|
||||
|
||||
conn_mock = MagicMock()
|
||||
|
|
@ -227,7 +227,7 @@ View job: https://www.linkedin.com/comm/jobs/view/9999002/?trackingId=def
|
|||
_ALERT_EMAIL = {
|
||||
"message_id": "<alert-001@linkedin.com>",
|
||||
"from_addr": "jobalerts-noreply@linkedin.com",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "2 new jobs for customer success manager",
|
||||
"body": _ALERT_BODY,
|
||||
"date": "2026-02-24 12:00:00",
|
||||
|
|
@ -366,7 +366,7 @@ def test_ats_subject_phrase_not_matched_in_body_only():
|
|||
"""ATS confirm phrase in body alone does NOT trigger — subject-only check."""
|
||||
from scripts.imap_sync import _has_rejection_or_ats_signal
|
||||
# "thank you for applying" is an ATS subject phrase; must NOT be caught in body only
|
||||
body = "Hi Alex, thank you for applying to our Senior TAM role. We'd love to chat."
|
||||
body = "Hi Meghan, thank you for applying to our Senior TAM role. We'd love to chat."
|
||||
assert _has_rejection_or_ats_signal("Interview Invitation", body) is False
|
||||
|
||||
|
||||
|
|
@ -391,7 +391,7 @@ def test_rejection_uppercase_lowercased():
|
|||
def test_rejection_phrase_in_quoted_thread_beyond_limit_not_blocked():
|
||||
"""Rejection phrase beyond 1500-char body window does not block the email."""
|
||||
from scripts.imap_sync import _has_rejection_or_ats_signal
|
||||
clean_intro = "Hi Alex, we'd love to schedule a call with you. " * 30 # ~1500 chars
|
||||
clean_intro = "Hi Meghan, we'd love to schedule a call with you. " * 30 # ~1500 chars
|
||||
quoted_footer = "\n\nOn Mon, Jan 1 wrote:\n> Unfortunately we went with another candidate."
|
||||
body = clean_intro + quoted_footer
|
||||
# The phrase lands after the 1500-char cutoff — should NOT be blocked
|
||||
|
|
@ -519,7 +519,7 @@ def test_parse_message_no_message_id_returns_none():
|
|||
b"From: recruiter@acme.com\r\n"
|
||||
b"Subject: Interview Invitation\r\n"
|
||||
b"\r\n"
|
||||
b"Hi Alex!"
|
||||
b"Hi Meghan!"
|
||||
)
|
||||
conn = MagicMock()
|
||||
conn.fetch.return_value = ("OK", [(b"1 (RFC822 {40})", raw)])
|
||||
|
|
@ -563,7 +563,7 @@ def test_extract_lead_info_returns_none_on_llm_error():
|
|||
from scripts.imap_sync import extract_lead_info
|
||||
with patch("scripts.imap_sync._CLASSIFIER_ROUTER") as mock_router:
|
||||
mock_router.complete.side_effect = RuntimeError("timeout")
|
||||
result = extract_lead_info("Senior TAM at Wiz", "Hi Alex…", "r@wiz.com")
|
||||
result = extract_lead_info("Senior TAM at Wiz", "Hi Meghan…", "r@wiz.com")
|
||||
assert result == (None, None)
|
||||
|
||||
|
||||
|
|
@ -572,9 +572,9 @@ def test_extract_lead_info_returns_none_on_llm_error():
|
|||
_PLAIN_RECRUIT_EMAIL = {
|
||||
"message_id": "<recruit-001@acme.com>",
|
||||
"from_addr": "recruiter@acme.com",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "Interview Opportunity at Acme",
|
||||
"body": "Hi Alex, we have an exciting opportunity for you.",
|
||||
"body": "Hi Meghan, we have an exciting opportunity for you.",
|
||||
"date": "2026-02-25 10:00:00",
|
||||
}
|
||||
|
||||
|
|
@ -776,9 +776,9 @@ def test_scan_todo_label_email_matches_company_and_keyword(tmp_path):
|
|||
todo_email = {
|
||||
"message_id": "<todo-001@acme.com>",
|
||||
"from_addr": "recruiter@acme.com",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "Interview scheduled with Acme",
|
||||
"body": "Hi Alex, your interview is confirmed.",
|
||||
"body": "Hi Meghan, your interview is confirmed.",
|
||||
"date": "2026-02-25 10:00:00",
|
||||
}
|
||||
|
||||
|
|
@ -807,7 +807,7 @@ def test_scan_todo_label_no_action_keyword_skipped(tmp_path):
|
|||
no_keyword_email = {
|
||||
"message_id": "<todo-002@acme.com>",
|
||||
"from_addr": "noreply@acme.com",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "Acme newsletter",
|
||||
"body": "Company updates this week.",
|
||||
"date": "2026-02-25 10:00:00",
|
||||
|
|
@ -834,9 +834,9 @@ def test_scan_todo_label_no_company_match_skipped(tmp_path):
|
|||
unrelated_email = {
|
||||
"message_id": "<todo-003@other.com>",
|
||||
"from_addr": "recruiter@other.com",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "Interview scheduled with OtherCo",
|
||||
"body": "Hi Alex, interview with OtherCo confirmed.",
|
||||
"body": "Hi Meghan, interview with OtherCo confirmed.",
|
||||
"date": "2026-02-25 10:00:00",
|
||||
}
|
||||
|
||||
|
|
@ -861,9 +861,9 @@ def test_scan_todo_label_duplicate_message_id_not_reinserted(tmp_path):
|
|||
todo_email = {
|
||||
"message_id": "<already-seen@acme.com>",
|
||||
"from_addr": "recruiter@acme.com",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "Interview scheduled with Acme",
|
||||
"body": "Hi Alex.",
|
||||
"body": "Hi Meghan.",
|
||||
"date": "2026-02-25 10:00:00",
|
||||
}
|
||||
|
||||
|
|
@ -891,7 +891,7 @@ def test_scan_todo_label_stage_signal_set_for_non_neutral(tmp_path):
|
|||
todo_email = {
|
||||
"message_id": "<signal-001@acme.com>",
|
||||
"from_addr": "recruiter@acme.com",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "Interview scheduled with Acme",
|
||||
"body": "Your phone screen is confirmed.",
|
||||
"date": "2026-02-25 10:00:00",
|
||||
|
|
@ -924,7 +924,7 @@ def test_scan_todo_label_body_fallback_matches(tmp_path):
|
|||
body_only_email = {
|
||||
"message_id": "<body-fallback@noreply.greenhouse.io>",
|
||||
"from_addr": "noreply@greenhouse.io",
|
||||
"to_addr": "alex@example.com",
|
||||
"to_addr": "meghan@example.com",
|
||||
"subject": "Interview scheduled",
|
||||
"body": "Your interview with Acme has been confirmed for tomorrow.",
|
||||
"date": "2026-02-25 10:00:00",
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import sys
|
|||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from app.wizard.tiers import can_use, tier_label, TIERS, FEATURES, BYOK_UNLOCKABLE
|
||||
from app.wizard.tiers import can_use, tier_label, TIERS, FEATURES
|
||||
|
||||
|
||||
def test_tiers_list():
|
||||
|
|
@ -67,48 +67,3 @@ def test_free_integrations_are_accessible():
|
|||
def test_paid_integrations_gated():
|
||||
assert can_use("free", "notion_sync") is False
|
||||
assert can_use("paid", "notion_sync") is True
|
||||
|
||||
|
||||
# ── BYOK tests ────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_byok_unlocks_llm_features_for_free_tier():
|
||||
# BYOK_UNLOCKABLE features become accessible on free tier when has_byok=True
|
||||
for feature in BYOK_UNLOCKABLE:
|
||||
assert can_use("free", feature, has_byok=True) is True, (
|
||||
f"{feature} should be accessible with BYOK on free tier"
|
||||
)
|
||||
|
||||
|
||||
def test_byok_does_not_unlock_integrations():
|
||||
# Integrations stay gated even with BYOK — they depend on CF infrastructure
|
||||
for feature in ["notion_sync", "google_sheets_sync", "slack_notifications"]:
|
||||
assert can_use("free", feature, has_byok=True) is False, (
|
||||
f"{feature} should stay gated even with BYOK"
|
||||
)
|
||||
|
||||
|
||||
def test_byok_does_not_unlock_orchestration_features():
|
||||
# These features depend on background pipelines, not just an LLM call
|
||||
for feature in ["llm_keywords_blocklist", "email_classifier", "model_fine_tuning"]:
|
||||
assert can_use("free", feature, has_byok=True) is False, (
|
||||
f"{feature} should stay gated even with BYOK"
|
||||
)
|
||||
|
||||
|
||||
def test_tier_label_hidden_when_byok_unlocks():
|
||||
# BYOK_UNLOCKABLE features should show no lock label when has_byok=True
|
||||
for feature in BYOK_UNLOCKABLE:
|
||||
assert tier_label(feature, has_byok=True) == "", (
|
||||
f"{feature} should show no lock label when BYOK is active"
|
||||
)
|
||||
|
||||
|
||||
def test_tier_label_still_shows_for_non_unlockable_with_byok():
|
||||
assert tier_label("notion_sync", has_byok=True) != ""
|
||||
assert tier_label("email_classifier", has_byok=True) != ""
|
||||
|
||||
|
||||
def test_byok_false_preserves_original_gating():
|
||||
# has_byok=False (default) must not change existing behaviour
|
||||
assert can_use("free", "company_research", has_byok=False) is False
|
||||
assert can_use("paid", "company_research", has_byok=False) is True
|
||||
|
|
|
|||
|
|
@ -1,658 +0,0 @@
|
|||
"""Email Label Tool — card-stack UI for building classifier benchmark data.
|
||||
|
||||
Philosophy: Scrape → Store → Process
|
||||
Fetch (IMAP, wide search, multi-account) → data/email_label_queue.jsonl
|
||||
Label (card stack) → data/email_score.jsonl
|
||||
|
||||
Run:
|
||||
conda run -n job-seeker streamlit run tools/label_tool.py --server.port 8503
|
||||
|
||||
Config: config/label_tool.yaml (gitignored — see config/label_tool.yaml.example)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import email as _email_lib
|
||||
import hashlib
|
||||
import html as _html
|
||||
import imaplib
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from email.header import decode_header as _raw_decode
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import streamlit as st
|
||||
import yaml
|
||||
|
||||
# ── Path setup ─────────────────────────────────────────────────────────────
|
||||
_ROOT = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(_ROOT))
|
||||
|
||||
_QUEUE_FILE = _ROOT / "data" / "email_label_queue.jsonl"
|
||||
_SCORE_FILE = _ROOT / "data" / "email_score.jsonl"
|
||||
_CFG_FILE = _ROOT / "config" / "label_tool.yaml"
|
||||
|
||||
# ── Labels ─────────────────────────────────────────────────────────────────
|
||||
LABELS = [
|
||||
"interview_scheduled",
|
||||
"offer_received",
|
||||
"rejected",
|
||||
"positive_response",
|
||||
"survey_received",
|
||||
"neutral",
|
||||
"event_rescheduled",
|
||||
"unrelated",
|
||||
"digest",
|
||||
]
|
||||
|
||||
_LABEL_META: dict[str, dict] = {
|
||||
"interview_scheduled": {"emoji": "🗓️", "color": "#4CAF50", "key": "1"},
|
||||
"offer_received": {"emoji": "🎉", "color": "#2196F3", "key": "2"},
|
||||
"rejected": {"emoji": "❌", "color": "#F44336", "key": "3"},
|
||||
"positive_response": {"emoji": "👍", "color": "#FF9800", "key": "4"},
|
||||
"survey_received": {"emoji": "📋", "color": "#9C27B0", "key": "5"},
|
||||
"neutral": {"emoji": "⬜", "color": "#607D8B", "key": "6"},
|
||||
"event_rescheduled": {"emoji": "🔄", "color": "#FF5722", "key": "7"},
|
||||
"unrelated": {"emoji": "🗑️", "color": "#757575", "key": "8"},
|
||||
"digest": {"emoji": "📰", "color": "#00BCD4", "key": "9"},
|
||||
}
|
||||
|
||||
# ── HTML sanitiser ───────────────────────────────────────────────────────────
|
||||
# Valid chars per XML 1.0 §2.2 (same set HTML5 innerHTML enforces):
|
||||
# #x9 | #xA | #xD | [#x20–#xD7FF] | [#xE000–#xFFFD] | [#x10000–#x10FFFF]
|
||||
# Anything outside this range causes InvalidCharacterError in the browser.
|
||||
_INVALID_XML_CHARS = re.compile(
|
||||
r"[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]"
|
||||
)
|
||||
|
||||
def _to_html(text: str, newlines_to_br: bool = False) -> str:
|
||||
"""Strip invalid XML chars, HTML-escape the result, optionally convert \\n → <br>."""
|
||||
if not text:
|
||||
return ""
|
||||
cleaned = _INVALID_XML_CHARS.sub("", text)
|
||||
escaped = _html.escape(cleaned)
|
||||
if newlines_to_br:
|
||||
escaped = escaped.replace("\n", "<br>")
|
||||
return escaped
|
||||
|
||||
|
||||
# ── Wide IMAP search terms (cast a net across all 9 categories) ─────────────
|
||||
_WIDE_TERMS = [
|
||||
# interview_scheduled
|
||||
"interview", "phone screen", "video call", "zoom link", "schedule a call",
|
||||
# offer_received
|
||||
"offer letter", "job offer", "offer of employment", "pleased to offer",
|
||||
# rejected
|
||||
"unfortunately", "not moving forward", "other candidates", "regret to inform",
|
||||
"no longer", "decided not to", "decided to go with",
|
||||
# positive_response
|
||||
"opportunity", "interested in your background", "reached out", "great fit",
|
||||
"exciting role", "love to connect",
|
||||
# survey_received
|
||||
"assessment", "questionnaire", "culture fit", "culture-fit", "online assessment",
|
||||
# neutral / ATS confirms
|
||||
"application received", "thank you for applying", "application confirmation",
|
||||
"you applied", "your application for",
|
||||
# event_rescheduled
|
||||
"reschedule", "rescheduled", "new time", "moved to", "postponed", "new date",
|
||||
# digest
|
||||
"job digest", "jobs you may like", "recommended jobs", "jobs for you",
|
||||
"new jobs", "job alert",
|
||||
# general recruitment
|
||||
"application", "recruiter", "recruiting", "hiring", "candidate",
|
||||
]
|
||||
|
||||
|
||||
# ── IMAP helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
def _decode_str(value: str | None) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
parts = _raw_decode(value)
|
||||
out = []
|
||||
for part, enc in parts:
|
||||
if isinstance(part, bytes):
|
||||
out.append(part.decode(enc or "utf-8", errors="replace"))
|
||||
else:
|
||||
out.append(str(part))
|
||||
return " ".join(out).strip()
|
||||
|
||||
|
||||
def _extract_body(msg: Any) -> str:
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
if part.get_content_type() == "text/plain":
|
||||
try:
|
||||
charset = part.get_content_charset() or "utf-8"
|
||||
return part.get_payload(decode=True).decode(charset, errors="replace")
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
charset = msg.get_content_charset() or "utf-8"
|
||||
return msg.get_payload(decode=True).decode(charset, errors="replace")
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _fetch_account(cfg: dict, days: int, limit: int, known_keys: set[str],
|
||||
progress_cb=None) -> list[dict]:
|
||||
"""Fetch emails from one IMAP account using wide recruitment search terms."""
|
||||
since = (datetime.now() - timedelta(days=days)).strftime("%d-%b-%Y")
|
||||
host = cfg.get("host", "imap.gmail.com")
|
||||
port = int(cfg.get("port", 993))
|
||||
use_ssl = cfg.get("use_ssl", True)
|
||||
username = cfg["username"]
|
||||
password = cfg["password"]
|
||||
name = cfg.get("name", username)
|
||||
|
||||
conn = (imaplib.IMAP4_SSL if use_ssl else imaplib.IMAP4)(host, port)
|
||||
conn.login(username, password)
|
||||
|
||||
seen_uids: dict[bytes, None] = {}
|
||||
conn.select("INBOX", readonly=True)
|
||||
for term in _WIDE_TERMS:
|
||||
try:
|
||||
_, data = conn.search(None, f'(SUBJECT "{term}" SINCE "{since}")')
|
||||
for uid in (data[0] or b"").split():
|
||||
seen_uids[uid] = None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
emails: list[dict] = []
|
||||
uids = list(seen_uids.keys())[:limit * 3] # overfetch; filter after dedup
|
||||
for i, uid in enumerate(uids):
|
||||
if len(emails) >= limit:
|
||||
break
|
||||
if progress_cb:
|
||||
progress_cb(i / len(uids), f"{name}: {len(emails)} fetched…")
|
||||
try:
|
||||
_, raw_data = conn.fetch(uid, "(RFC822)")
|
||||
if not raw_data or not raw_data[0]:
|
||||
continue
|
||||
msg = _email_lib.message_from_bytes(raw_data[0][1])
|
||||
subj = _decode_str(msg.get("Subject", ""))
|
||||
from_addr = _decode_str(msg.get("From", ""))
|
||||
date = _decode_str(msg.get("Date", ""))
|
||||
body = _extract_body(msg)[:800]
|
||||
entry = {
|
||||
"subject": subj,
|
||||
"body": body,
|
||||
"from_addr": from_addr,
|
||||
"date": date,
|
||||
"account": name,
|
||||
}
|
||||
key = _entry_key(entry)
|
||||
if key not in known_keys:
|
||||
known_keys.add(key)
|
||||
emails.append(entry)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
conn.logout()
|
||||
except Exception:
|
||||
pass
|
||||
return emails
|
||||
|
||||
|
||||
# ── Queue / score file helpers ───────────────────────────────────────────────
|
||||
|
||||
def _entry_key(e: dict) -> str:
|
||||
return hashlib.md5(
|
||||
(e.get("subject", "") + (e.get("body") or "")[:100]).encode()
|
||||
).hexdigest()
|
||||
|
||||
|
||||
def _load_jsonl(path: Path) -> list[dict]:
|
||||
if not path.exists():
|
||||
return []
|
||||
rows = []
|
||||
with path.open() as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
rows.append(json.loads(line))
|
||||
except Exception:
|
||||
pass
|
||||
return rows
|
||||
|
||||
|
||||
def _save_jsonl(path: Path, rows: list[dict]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w") as f:
|
||||
for row in rows:
|
||||
f.write(json.dumps(row, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def _append_jsonl(path: Path, row: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("a") as f:
|
||||
f.write(json.dumps(row, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
# ── Config ──────────────────────────────────────────────────────────────────
|
||||
|
||||
def _load_config() -> list[dict]:
|
||||
if not _CFG_FILE.exists():
|
||||
return []
|
||||
cfg = yaml.safe_load(_CFG_FILE.read_text()) or {}
|
||||
return cfg.get("accounts", [])
|
||||
|
||||
|
||||
# ── Page setup ──────────────────────────────────────────────────────────────
|
||||
|
||||
st.set_page_config(
|
||||
page_title="Email Labeler",
|
||||
page_icon="📬",
|
||||
layout="wide",
|
||||
)
|
||||
|
||||
st.markdown("""
|
||||
<style>
|
||||
/* Card stack */
|
||||
.email-card {
|
||||
border: 1px solid rgba(128,128,128,0.25);
|
||||
border-radius: 14px;
|
||||
padding: 28px 32px;
|
||||
box-shadow: 0 6px 24px rgba(0,0,0,0.18);
|
||||
margin-bottom: 4px;
|
||||
position: relative;
|
||||
}
|
||||
.card-stack-hint {
|
||||
height: 10px;
|
||||
border-radius: 0 0 12px 12px;
|
||||
border: 1px solid rgba(128,128,128,0.15);
|
||||
margin: 0 16px;
|
||||
box-shadow: 0 4px 12px rgba(0,0,0,0.10);
|
||||
}
|
||||
.card-stack-hint2 {
|
||||
height: 8px;
|
||||
border-radius: 0 0 10px 10px;
|
||||
border: 1px solid rgba(128,128,128,0.08);
|
||||
margin: 0 32px;
|
||||
}
|
||||
/* Subject line */
|
||||
.card-subject { font-size: 1.3rem; font-weight: 700; margin-bottom: 6px; }
|
||||
.card-meta { font-size: 0.82rem; opacity: 0.6; margin-bottom: 16px; }
|
||||
.card-body { font-size: 0.92rem; opacity: 0.85; white-space: pre-wrap; line-height: 1.5; }
|
||||
/* Bucket buttons */
|
||||
div[data-testid="stButton"] > button.bucket-btn {
|
||||
height: 70px;
|
||||
font-size: 1.05rem;
|
||||
font-weight: 600;
|
||||
border-radius: 12px;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.title("📬 Email Label Tool")
|
||||
st.caption("Scrape → Store → Process | card-stack edition")
|
||||
|
||||
# ── Session state init ───────────────────────────────────────────────────────
|
||||
|
||||
if "queue" not in st.session_state:
|
||||
st.session_state.queue: list[dict] = _load_jsonl(_QUEUE_FILE)
|
||||
|
||||
if "labeled" not in st.session_state:
|
||||
st.session_state.labeled: list[dict] = _load_jsonl(_SCORE_FILE)
|
||||
st.session_state.labeled_keys: set[str] = {
|
||||
_entry_key(r) for r in st.session_state.labeled
|
||||
}
|
||||
|
||||
if "idx" not in st.session_state:
|
||||
# Start past already-labeled entries in the queue
|
||||
labeled_keys = st.session_state.labeled_keys
|
||||
for i, entry in enumerate(st.session_state.queue):
|
||||
if _entry_key(entry) not in labeled_keys:
|
||||
st.session_state.idx = i
|
||||
break
|
||||
else:
|
||||
st.session_state.idx = len(st.session_state.queue)
|
||||
|
||||
if "history" not in st.session_state:
|
||||
st.session_state.history: list[tuple[int, str]] = [] # (queue_idx, label)
|
||||
|
||||
|
||||
# ── Sidebar stats ────────────────────────────────────────────────────────────
|
||||
|
||||
with st.sidebar:
|
||||
labeled = st.session_state.labeled
|
||||
queue = st.session_state.queue
|
||||
unlabeled = [e for e in queue if _entry_key(e) not in st.session_state.labeled_keys]
|
||||
|
||||
st.metric("✅ Labeled", len(labeled))
|
||||
st.metric("📥 Queue", len(unlabeled))
|
||||
|
||||
if labeled:
|
||||
st.caption("**Label distribution**")
|
||||
counts = {lbl: 0 for lbl in LABELS}
|
||||
for r in labeled:
|
||||
counts[r.get("label", "")] = counts.get(r.get("label", ""), 0) + 1
|
||||
for lbl in LABELS:
|
||||
m = _LABEL_META[lbl]
|
||||
st.caption(f"{m['emoji']} {lbl}: **{counts[lbl]}**")
|
||||
|
||||
|
||||
# ── Tabs ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
tab_label, tab_fetch, tab_stats = st.tabs(["🃏 Label", "📥 Fetch", "📊 Stats"])
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# FETCH TAB
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
with tab_fetch:
|
||||
accounts = _load_config()
|
||||
|
||||
if not accounts:
|
||||
st.warning(
|
||||
f"No accounts configured. Copy `config/label_tool.yaml.example` → "
|
||||
f"`config/label_tool.yaml` and add your IMAP accounts.",
|
||||
icon="⚠️",
|
||||
)
|
||||
else:
|
||||
st.markdown(f"**{len(accounts)} account(s) configured:**")
|
||||
for acc in accounts:
|
||||
st.caption(f"• {acc.get('name', acc.get('username'))} ({acc.get('host')})")
|
||||
|
||||
col_days, col_limit = st.columns(2)
|
||||
days = col_days.number_input("Days back", min_value=7, max_value=730, value=180)
|
||||
limit = col_limit.number_input("Max emails per account", min_value=10, max_value=1000, value=150)
|
||||
|
||||
all_accs = [a.get("name", a.get("username")) for a in accounts]
|
||||
selected = st.multiselect("Accounts to fetch", all_accs, default=all_accs)
|
||||
|
||||
if st.button("📥 Fetch from IMAP", disabled=not accounts or not selected, type="primary"):
|
||||
existing_keys = {_entry_key(e) for e in st.session_state.queue}
|
||||
existing_keys.update(st.session_state.labeled_keys)
|
||||
|
||||
fetched_all: list[dict] = []
|
||||
status = st.status("Fetching…", expanded=True)
|
||||
_live = status.empty()
|
||||
|
||||
for acc in accounts:
|
||||
name = acc.get("name", acc.get("username"))
|
||||
if name not in selected:
|
||||
continue
|
||||
status.write(f"Connecting to **{name}**…")
|
||||
try:
|
||||
emails = _fetch_account(
|
||||
acc, days=int(days), limit=int(limit),
|
||||
known_keys=existing_keys,
|
||||
progress_cb=lambda p, msg: _live.markdown(f"⏳ {msg}"),
|
||||
)
|
||||
_live.empty()
|
||||
fetched_all.extend(emails)
|
||||
status.write(f"✓ {name}: {len(emails)} new emails")
|
||||
except Exception as e:
|
||||
_live.empty()
|
||||
status.write(f"✗ {name}: {e}")
|
||||
|
||||
if fetched_all:
|
||||
_save_jsonl(_QUEUE_FILE, st.session_state.queue + fetched_all)
|
||||
st.session_state.queue = _load_jsonl(_QUEUE_FILE)
|
||||
# Reset idx to first unlabeled
|
||||
labeled_keys = st.session_state.labeled_keys
|
||||
for i, entry in enumerate(st.session_state.queue):
|
||||
if _entry_key(entry) not in labeled_keys:
|
||||
st.session_state.idx = i
|
||||
break
|
||||
status.update(label=f"Done — {len(fetched_all)} new emails added to queue", state="complete")
|
||||
else:
|
||||
status.update(label="No new emails found (all already in queue or score file)", state="complete")
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# LABEL TAB
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
with tab_label:
|
||||
queue = st.session_state.queue
|
||||
labeled_keys = st.session_state.labeled_keys
|
||||
idx = st.session_state.idx
|
||||
|
||||
# Advance idx past already-labeled entries
|
||||
while idx < len(queue) and _entry_key(queue[idx]) in labeled_keys:
|
||||
idx += 1
|
||||
st.session_state.idx = idx
|
||||
|
||||
unlabeled = [e for e in queue if _entry_key(e) not in labeled_keys]
|
||||
total_in_queue = len(queue)
|
||||
n_labeled = len(st.session_state.labeled)
|
||||
|
||||
if not queue:
|
||||
st.info("Queue is empty — go to **Fetch** to pull emails from IMAP.", icon="📥")
|
||||
elif not unlabeled:
|
||||
st.success(
|
||||
f"🎉 All {n_labeled} emails labeled! Go to **Stats** to review and export.",
|
||||
icon="✅",
|
||||
)
|
||||
else:
|
||||
# Progress
|
||||
labeled_in_queue = total_in_queue - len(unlabeled)
|
||||
progress_pct = labeled_in_queue / total_in_queue if total_in_queue else 0
|
||||
st.progress(progress_pct, text=f"{labeled_in_queue} / {total_in_queue} labeled in queue")
|
||||
|
||||
# Current email
|
||||
entry = queue[idx]
|
||||
|
||||
# Card HTML
|
||||
subj = entry.get("subject", "(no subject)") or "(no subject)"
|
||||
from_ = entry.get("from_addr", "") or ""
|
||||
date_ = entry.get("date", "") or ""
|
||||
acct = entry.get("account", "") or ""
|
||||
body = (entry.get("body") or "").strip()
|
||||
|
||||
st.markdown(
|
||||
f"""<div class="email-card">
|
||||
<div class="card-meta">{_to_html(from_)} · {_to_html(date_[:16])} · <em>{_to_html(acct)}</em></div>
|
||||
<div class="card-subject">{_to_html(subj)}</div>
|
||||
<div class="card-body">{_to_html(body[:500], newlines_to_br=True)}</div>
|
||||
</div>""",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
if len(body) > 500:
|
||||
with st.expander("Show full body"):
|
||||
st.text(body)
|
||||
|
||||
# Stack hint (visual depth)
|
||||
st.markdown('<div class="card-stack-hint"></div>', unsafe_allow_html=True)
|
||||
st.markdown('<div class="card-stack-hint2"></div>', unsafe_allow_html=True)
|
||||
|
||||
st.markdown("") # spacer
|
||||
|
||||
# ── Bucket buttons ────────────────────────────────────────────────
|
||||
def _do_label(label: str) -> None:
|
||||
row = {"subject": entry.get("subject", ""), "body": body[:600], "label": label}
|
||||
st.session_state.labeled.append(row)
|
||||
st.session_state.labeled_keys.add(_entry_key(entry))
|
||||
_append_jsonl(_SCORE_FILE, row)
|
||||
st.session_state.history.append((idx, label))
|
||||
# Advance
|
||||
next_idx = idx + 1
|
||||
while next_idx < len(queue) and _entry_key(queue[next_idx]) in labeled_keys:
|
||||
next_idx += 1
|
||||
st.session_state.idx = next_idx
|
||||
|
||||
# Pre-compute per-label counts once
|
||||
_counts: dict[str, int] = {}
|
||||
for _r in st.session_state.labeled:
|
||||
_lbl_r = _r.get("label", "")
|
||||
_counts[_lbl_r] = _counts.get(_lbl_r, 0) + 1
|
||||
|
||||
row1_cols = st.columns(3)
|
||||
row2_cols = st.columns(3)
|
||||
row3_cols = st.columns(3)
|
||||
bucket_pairs = [
|
||||
(row1_cols[0], "interview_scheduled"),
|
||||
(row1_cols[1], "offer_received"),
|
||||
(row1_cols[2], "rejected"),
|
||||
(row2_cols[0], "positive_response"),
|
||||
(row2_cols[1], "survey_received"),
|
||||
(row2_cols[2], "neutral"),
|
||||
(row3_cols[0], "event_rescheduled"),
|
||||
(row3_cols[1], "unrelated"),
|
||||
(row3_cols[2], "digest"),
|
||||
]
|
||||
for col, lbl in bucket_pairs:
|
||||
m = _LABEL_META[lbl]
|
||||
cnt = _counts.get(lbl, 0)
|
||||
label_display = f"{m['emoji']} **{lbl}** [{cnt}]\n`{m['key']}`"
|
||||
if col.button(label_display, key=f"lbl_{lbl}", use_container_width=True):
|
||||
_do_label(lbl)
|
||||
st.rerun()
|
||||
|
||||
# ── Wildcard label ─────────────────────────────────────────────────
|
||||
if "show_custom" not in st.session_state:
|
||||
st.session_state.show_custom = False
|
||||
|
||||
other_col, _ = st.columns([1, 2])
|
||||
if other_col.button("🏷️ Other… `0`", key="lbl_other_toggle", use_container_width=True):
|
||||
st.session_state.show_custom = not st.session_state.show_custom
|
||||
st.rerun()
|
||||
|
||||
if st.session_state.get("show_custom"):
|
||||
custom_cols = st.columns([3, 1])
|
||||
custom_val = custom_cols[0].text_input(
|
||||
"Custom label:", key="custom_label_text",
|
||||
placeholder="e.g. linkedin_outreach",
|
||||
label_visibility="collapsed",
|
||||
)
|
||||
if custom_cols[1].button(
|
||||
"✓ Apply", key="apply_custom", type="primary",
|
||||
disabled=not (custom_val or "").strip(),
|
||||
):
|
||||
_do_label(custom_val.strip().lower().replace(" ", "_"))
|
||||
st.session_state.show_custom = False
|
||||
st.rerun()
|
||||
|
||||
# ── Navigation ────────────────────────────────────────────────────
|
||||
st.markdown("")
|
||||
nav_cols = st.columns([2, 1, 1, 1])
|
||||
|
||||
remaining = len(unlabeled) - 1
|
||||
nav_cols[0].caption(f"**{remaining}** remaining · Keys: 1–9 = label, 0 = other, S = skip, U = undo")
|
||||
|
||||
if nav_cols[1].button("↩ Undo", disabled=not st.session_state.history, use_container_width=True):
|
||||
prev_idx, prev_label = st.session_state.history.pop()
|
||||
# Remove the last labeled entry
|
||||
if st.session_state.labeled:
|
||||
removed = st.session_state.labeled.pop()
|
||||
st.session_state.labeled_keys.discard(_entry_key(removed))
|
||||
_save_jsonl(_SCORE_FILE, st.session_state.labeled)
|
||||
st.session_state.idx = prev_idx
|
||||
st.rerun()
|
||||
|
||||
if nav_cols[2].button("→ Skip", use_container_width=True):
|
||||
next_idx = idx + 1
|
||||
while next_idx < len(queue) and _entry_key(queue[next_idx]) in labeled_keys:
|
||||
next_idx += 1
|
||||
st.session_state.idx = next_idx
|
||||
st.rerun()
|
||||
|
||||
if nav_cols[3].button("🗑️ Discard", use_container_width=True):
|
||||
# Remove from queue entirely — not written to score file
|
||||
st.session_state.queue = [e for e in queue if _entry_key(e) != _entry_key(entry)]
|
||||
_save_jsonl(_QUEUE_FILE, st.session_state.queue)
|
||||
next_idx = min(idx, len(st.session_state.queue) - 1)
|
||||
while next_idx < len(st.session_state.queue) and _entry_key(st.session_state.queue[next_idx]) in labeled_keys:
|
||||
next_idx += 1
|
||||
st.session_state.idx = max(next_idx, 0)
|
||||
st.rerun()
|
||||
|
||||
# Keyboard shortcut capture (JS → hidden button click)
|
||||
st.components.v1.html(
|
||||
"""<script>
|
||||
document.addEventListener('keydown', function(e) {
|
||||
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
|
||||
const keyToLabel = {
|
||||
'1':'interview_scheduled','2':'offer_received','3':'rejected',
|
||||
'4':'positive_response','5':'survey_received','6':'neutral',
|
||||
'7':'event_rescheduled','8':'unrelated','9':'digest'
|
||||
};
|
||||
const label = keyToLabel[e.key];
|
||||
if (label) {
|
||||
const btns = window.parent.document.querySelectorAll('button');
|
||||
for (const btn of btns) {
|
||||
if (btn.innerText.toLowerCase().includes(label.replace('_',' '))) {
|
||||
btn.click(); break;
|
||||
}
|
||||
}
|
||||
} else if (e.key === '0') {
|
||||
const btns = window.parent.document.querySelectorAll('button');
|
||||
for (const btn of btns) {
|
||||
if (btn.innerText.includes('Other')) { btn.click(); break; }
|
||||
}
|
||||
} else if (e.key.toLowerCase() === 's') {
|
||||
const btns = window.parent.document.querySelectorAll('button');
|
||||
for (const btn of btns) {
|
||||
if (btn.innerText.includes('Skip')) { btn.click(); break; }
|
||||
}
|
||||
} else if (e.key.toLowerCase() === 'u') {
|
||||
const btns = window.parent.document.querySelectorAll('button');
|
||||
for (const btn of btns) {
|
||||
if (btn.innerText.includes('Undo')) { btn.click(); break; }
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>""",
|
||||
height=0,
|
||||
)
|
||||
|
||||
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
# STATS TAB
|
||||
# ══════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
with tab_stats:
|
||||
labeled = st.session_state.labeled
|
||||
|
||||
if not labeled:
|
||||
st.info("No labeled emails yet.")
|
||||
else:
|
||||
counts: dict[str, int] = {}
|
||||
for r in labeled:
|
||||
lbl = r.get("label", "")
|
||||
if lbl:
|
||||
counts[lbl] = counts.get(lbl, 0) + 1
|
||||
|
||||
st.markdown(f"**{len(labeled)} labeled emails total**")
|
||||
|
||||
# Show known labels first, then any custom labels
|
||||
all_display_labels = list(LABELS) + [l for l in counts if l not in LABELS]
|
||||
max_count = max(counts.values()) if counts else 1
|
||||
for lbl in all_display_labels:
|
||||
if lbl not in counts:
|
||||
continue
|
||||
m = _LABEL_META.get(lbl)
|
||||
emoji = m["emoji"] if m else "🏷️"
|
||||
col_name, col_bar, col_n = st.columns([3, 5, 1])
|
||||
col_name.markdown(f"{emoji} {lbl}")
|
||||
col_bar.progress(counts[lbl] / max_count)
|
||||
col_n.markdown(f"**{counts[lbl]}**")
|
||||
|
||||
st.divider()
|
||||
|
||||
# Export hint
|
||||
st.caption(
|
||||
f"Score file: `{_SCORE_FILE.relative_to(_ROOT)}` "
|
||||
f"({_SCORE_FILE.stat().st_size if _SCORE_FILE.exists() else 0:,} bytes)"
|
||||
)
|
||||
if st.button("🔄 Re-sync from disk"):
|
||||
st.session_state.labeled = _load_jsonl(_SCORE_FILE)
|
||||
st.session_state.labeled_keys = {_entry_key(r) for r in st.session_state.labeled}
|
||||
st.rerun()
|
||||
|
||||
if _SCORE_FILE.exists():
|
||||
st.download_button(
|
||||
"⬇️ Download email_score.jsonl",
|
||||
data=_SCORE_FILE.read_bytes(),
|
||||
file_name="email_score.jsonl",
|
||||
mime="application/jsonlines",
|
||||
)
|
||||
Loading…
Reference in a new issue