peregrine/scripts/task_runner.py
pyr0ball 8e36863a49
Some checks failed
CI / Backend (Python) (push) Failing after 2m15s
CI / Frontend (Vue) (push) Failing after 21s
Mirror / mirror (push) Failing after 9s
feat: Interview prep Q&A, cf-orch hardware profile, a11y fixes, dark theme
Backend
- dev-api.py: Q&A suggest endpoint, Log Contact, cf-orch node detection in wizard
  hardware step, canonical search_profiles format (profiles:[...]), connections
  settings endpoints, Resume Library endpoints
- db_migrate.py: migrations 002/003/004 — ATS columns, resume review, final
  resume struct
- discover.py: _normalize_profiles() for legacy wizard YAML format compat
- resume_optimizer.py: section-by-section resume parsing + scoring
- task_runner.py: Q&A and contact-log task types
- company_research.py: accessibility brief column wiring
- generate_cover_letter.py: restore _candidate module-level binding

Frontend
- InterviewPrepView.vue: Q&A chat tab, Log Contact form, MarkdownView rendering
- InterviewCard.vue: new reusable card component for interviews kanban
- InterviewsView.vue: rejected analytics section with stage breakdown chips
- ResumeProfileView.vue: sync with new resume store shape
- SearchPrefsView.vue: cf-orch toggle, profile format migration
- SystemSettingsView.vue: connections settings wiring
- ConnectionsSettingsView.vue: new view for integration connections
- MarkdownView.vue: new component for safe markdown rendering
- ApplyWorkspace.vue: a11y — h1→h2 demotion, aria-expanded on Q&A toggle,
  confirmation dialog on Reject action (#98 #99 #100)
- peregrine.css: explicit [data-theme="dark"] token block for light-OS users (#101),
  :focus-visible outline (#97)
- wizard.css: cf-orch hardware step styles
- WizardHardwareStep.vue: cf-orch node display, profile selection with orch option
- WizardLayout.vue: hardware step wiring

Infra
- compose.yml / compose.cloud.yml: cf-orch agent sidecar, llm.cloud.yaml mount
- Dockerfile.cfcore: cf-core editable install in image build
- HANDOFF-xanderland.md: Podman/systemd setup guide for beta tester
- podman-standalone.sh: standalone Podman run script

Tests
- test_dev_api_settings.py: remove stale worktree path bootstrap (credential_store
  now in main repo); fix job_boards fixture to use non-empty list
- test_wizard_api.py: update profiles assertion to superset check (cf-orch added);
  update step6 assertion to canonical profiles[].titles format
2026-04-14 17:01:18 -07:00

426 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# scripts/task_runner.py
"""
Background task runner for LLM generation tasks.
Submitting a task inserts a row in background_tasks and spawns a daemon thread.
The thread calls the appropriate generator, writes results to existing tables,
and marks the task completed or failed.
Deduplication: only one queued/running task per (task_type, job_id) is allowed.
Different task types for the same job run concurrently (e.g. cover letter + research).
"""
import logging
import sqlite3
import threading
from pathlib import Path
log = logging.getLogger(__name__)
def _normalize_aihawk_resume(raw: dict) -> dict:
"""Convert a plain_text_resume.yaml (AIHawk format) into the optimizer struct.
Handles two AIHawk variants:
- Newer Peregrine wizard output: already uses bullets/start_date/end_date/career_summary
- Older raw AIHawk format: uses responsibilities (str), period ("YYYY Present")
"""
import re as _re
def _split_responsibilities(text: str) -> list[str]:
lines = [ln.strip() for ln in text.strip().splitlines() if ln.strip()]
return lines if lines else [text.strip()]
def _parse_period(period: str) -> tuple[str, str]:
parts = _re.split(r"\s*[–—-]\s*", period, maxsplit=1)
start = parts[0].strip() if parts else ""
end = parts[1].strip() if len(parts) > 1 else "Present"
return start, end
experience = []
for entry in raw.get("experience", []):
if "responsibilities" in entry:
bullets = _split_responsibilities(entry["responsibilities"])
else:
bullets = entry.get("bullets", [])
if "period" in entry:
start_date, end_date = _parse_period(entry["period"])
else:
start_date = entry.get("start_date", "")
end_date = entry.get("end_date", "Present")
experience.append({
"title": entry.get("title", ""),
"company": entry.get("company", ""),
"start_date": start_date,
"end_date": end_date,
"bullets": bullets,
})
# career_summary may be a string or absent; assessment field is a legacy bool in some profiles
career_summary = raw.get("career_summary", "")
if not isinstance(career_summary, str):
career_summary = ""
return {
"career_summary": career_summary,
"experience": experience,
"education": raw.get("education", []),
"skills": raw.get("skills", []),
"achievements": raw.get("achievements", []),
}
from scripts.db import (
DEFAULT_DB,
insert_task,
update_task_status,
update_task_stage,
update_cover_letter,
save_research,
save_optimized_resume,
)
def submit_task(db_path: Path = DEFAULT_DB, task_type: str = "",
job_id: int = None,
params: str | None = None) -> tuple[int, bool]:
"""Submit a background task.
LLM task types (cover_letter, company_research, wizard_generate) are routed
through the TaskScheduler for VRAM-aware batch scheduling.
All other types spawn a free daemon thread as before.
Returns (task_id, True) if a new task was queued.
Returns (existing_id, False) if an identical task is already in-flight.
"""
task_id, is_new = insert_task(db_path, task_type, job_id or 0, params=params)
if is_new:
from scripts.task_scheduler import get_scheduler, LLM_TASK_TYPES
if task_type in LLM_TASK_TYPES:
enqueued = get_scheduler(db_path, run_task_fn=_run_task).enqueue(
task_id, task_type, job_id or 0, params
)
if not enqueued:
update_task_status(
db_path, task_id, "failed", error="Queue depth limit reached"
)
else:
t = threading.Thread(
target=_run_task,
args=(db_path, task_id, task_type, job_id or 0, params),
daemon=True,
)
t.start()
return task_id, is_new
_WIZARD_PROMPTS: dict[str, str] = {
"career_summary": (
"Based on the following resume text, write a concise 2-4 sentence professional "
"career summary in first person. Focus on years of experience, key skills, and "
"what makes this person distinctive. Return only the summary text, no labels.\n\n"
"Resume:\n{resume_text}"
),
"expand_bullets": (
"Rewrite these rough responsibility notes as polished STAR-format bullet points "
"(Situation/Task, Action, Result). Each bullet should start with a strong action verb. "
"Return a JSON array of bullet strings only.\n\nNotes:\n{bullet_notes}"
),
"suggest_skills": (
"Based on these work experience descriptions, suggest additional skills to add to "
"a resume. Return a JSON array of skill strings only — no explanations.\n\n"
"Experience:\n{experience_text}"
),
"voice_guidelines": (
"Analyze the writing style and tone of this resume and cover letter corpus. "
"Return 3-5 concise guidelines for maintaining this person's authentic voice in "
"future cover letters (e.g. 'Uses direct, confident statements'). "
"Return a JSON array of guideline strings.\n\nContent:\n{content}"
),
"job_titles": (
"Given these job titles and resume, suggest 5-8 additional job title variations "
"this person should search for. Return a JSON array of title strings only.\n\n"
"Current titles: {current_titles}\nResume summary: {resume_text}"
),
"keywords": (
"Based on this resume and target job titles, suggest important keywords and phrases "
"to include in job applications. Return a JSON array of keyword strings.\n\n"
"Titles: {titles}\nResume: {resume_text}"
),
"blocklist": (
"Based on this resume and job search context, suggest companies, industries, or "
"keywords to blocklist (avoid in job search results). "
"Return a JSON array of strings.\n\nContext: {resume_text}"
),
"mission_notes": (
"Based on this resume, write a short personal note (1-2 sentences) about why this "
"person might genuinely care about each of these industries: music, animal_welfare, education. "
"Return a JSON object with those three industry keys and note values. "
"If the resume shows no clear connection to an industry, set its value to empty string.\n\n"
"Resume: {resume_text}"
),
}
def _run_wizard_generate(section: str, input_data: dict) -> str:
"""Run LLM generation for a wizard section. Returns result string.
Raises ValueError for unknown sections.
Raises any LLM exception on failure.
"""
template = _WIZARD_PROMPTS.get(section)
if template is None:
raise ValueError(f"Unknown wizard_generate section: {section!r}")
# Format the prompt, substituting available keys; unknown placeholders become empty string
import re as _re
def _safe_format(tmpl: str, kwargs: dict) -> str:
"""Format template substituting available keys; leaves missing keys as empty string."""
def replacer(m):
key = m.group(1)
return str(kwargs.get(key, ""))
return _re.sub(r"\{(\w+)\}", replacer, tmpl)
prompt = _safe_format(template, {k: str(v) for k, v in input_data.items()})
# Append iterative refinement context if provided
previous_result = input_data.get("previous_result", "")
feedback = input_data.get("feedback", "")
if previous_result:
prompt += f"\n\n---\nPrevious output:\n{previous_result}"
if feedback:
prompt += f"\n\nUser feedback / requested changes:\n{feedback}\n\nPlease revise accordingly."
from scripts.llm_router import LLMRouter
return LLMRouter().complete(prompt)
def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
params: str | None = None) -> None:
"""Thread body: run the generator and persist the result."""
# job_id == 0 means a global task (e.g. discovery) with no associated job row.
job: dict = {}
if job_id:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
row = conn.execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone()
conn.close()
if row is None:
update_task_status(db_path, task_id, "failed", error=f"Job {job_id} not found")
return
job = dict(row)
update_task_status(db_path, task_id, "running")
try:
if task_type == "discovery":
import os as _os
if _os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes"):
update_task_status(
db_path, task_id, "failed",
error="Discovery is disabled in the public demo. Run your own instance to use this feature.",
)
return
from scripts.discover import run_discovery
from pathlib import Path as _Path
new_count = run_discovery(db_path, config_dir=_Path(db_path).parent / "config")
n = new_count or 0
update_task_status(
db_path, task_id, "completed",
error=f"{n} new listing{'s' if n != 1 else ''} added",
)
return
elif task_type == "cover_letter":
import json as _json
p = _json.loads(params or "{}")
from scripts.generate_cover_letter import generate
_cfg_dir = Path(db_path).parent / "config"
_user_llm_cfg = _cfg_dir / "llm.yaml"
_user_yaml = _cfg_dir / "user.yaml"
result = generate(
job.get("title", ""),
job.get("company", ""),
job.get("description", ""),
previous_result=p.get("previous_result", ""),
feedback=p.get("feedback", ""),
is_jobgether=job.get("source") == "jobgether",
config_path=_user_llm_cfg,
user_yaml_path=_user_yaml,
)
update_cover_letter(db_path, job_id, result)
elif task_type == "company_research":
from scripts.company_research import research_company
_cfg_dir = Path(db_path).parent / "config"
_user_llm_cfg = _cfg_dir / "llm.yaml"
result = research_company(
job,
on_stage=lambda s: update_task_stage(db_path, task_id, s),
config_path=_user_llm_cfg if _user_llm_cfg.exists() else None,
)
save_research(db_path, job_id=job_id, **result)
elif task_type == "enrich_descriptions":
from scripts.enrich_descriptions import enrich_all_descriptions
r = enrich_all_descriptions(db_path)
errs = len(r.get("errors", []))
msg = (
f"{r['succeeded']} description(s) fetched, {r['failed']} failed"
+ (f", {errs} error(s)" if errs else "")
)
update_task_status(db_path, task_id, "completed", error=msg)
return
elif task_type == "scrape_url":
from scripts.scrape_url import scrape_job_url
fields = scrape_job_url(db_path, job_id)
title = fields.get("title") or job.get("url", "?")
company = fields.get("company", "")
msg = f"{title}" + (f" @ {company}" if company else "")
update_task_status(db_path, task_id, "completed", error=msg)
# Auto-enrich company/salary for Craigslist jobs
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
job_row = conn.execute(
"SELECT source, company FROM jobs WHERE id=?", (job_id,)
).fetchone()
conn.close()
if job_row and job_row["source"] == "craigslist" and not job_row["company"]:
submit_task(db_path, "enrich_craigslist", job_id)
return
elif task_type == "enrich_craigslist":
from scripts.enrich_descriptions import enrich_craigslist_fields
extracted = enrich_craigslist_fields(db_path, job_id)
company = extracted.get("company", "")
msg = f"company={company}" if company else "no company found"
update_task_status(db_path, task_id, "completed", error=msg)
return
elif task_type == "email_sync":
try:
from scripts.imap_sync import sync_all
result = sync_all(db_path,
on_stage=lambda s: update_task_stage(db_path, task_id, s))
leads = result.get("new_leads", 0)
todo = result.get("todo_attached", 0)
errs = len(result.get("errors", []))
msg = (
f"{result['synced']} jobs updated, "
f"+{result['inbound']} in, +{result['outbound']} out"
+ (f", {leads} new lead(s)" if leads else "")
+ (f", {todo} todo attached" if todo else "")
+ (f", {errs} error(s)" if errs else "")
)
update_task_status(db_path, task_id, "completed", error=msg)
return
except FileNotFoundError:
update_task_status(db_path, task_id, "failed",
error="Email not configured — go to Settings → Email")
return
elif task_type == "wizard_generate":
import json as _json
p = _json.loads(params or "{}")
section = p.get("section", "")
input_data = p.get("input", {})
if not section:
raise ValueError("wizard_generate: 'section' key is required in params")
result = _run_wizard_generate(section, input_data)
update_task_status(
db_path, task_id, "completed",
error=_json.dumps({"section": section, "result": result}),
)
return
elif task_type == "resume_optimize":
import json as _json
from scripts.resume_parser import structure_resume
from scripts.resume_optimizer import (
extract_jd_signals,
prioritize_gaps,
rewrite_for_ats,
hallucination_check,
render_resume_text,
)
from scripts.user_profile import load_user_profile
_user_yaml = Path(db_path).parent / "config" / "user.yaml"
description = job.get("description", "")
resume_path = load_user_profile(str(_user_yaml)).get("resume_path", "")
# Parse the candidate's resume
update_task_stage(db_path, task_id, "parsing resume")
_plain_yaml = Path(db_path).parent / "config" / "plain_text_resume.yaml"
if resume_path and Path(resume_path).exists():
resume_text = Path(resume_path).read_text(errors="replace")
resume_struct, parse_err = structure_resume(resume_text)
elif _plain_yaml.exists():
import yaml as _yaml
_raw = _yaml.safe_load(_plain_yaml.read_text(encoding="utf-8")) or {}
resume_struct = _normalize_aihawk_resume(_raw)
resume_text = resume_struct.get("career_summary", "")
parse_err = ""
else:
resume_text = ""
resume_struct, parse_err = structure_resume("")
# Extract keyword gaps and build gap report (free tier)
update_task_stage(db_path, task_id, "extracting keyword gaps")
gaps = extract_jd_signals(description, resume_text)
prioritized = prioritize_gaps(gaps, resume_struct)
gap_report = _json.dumps(prioritized, indent=2)
# Full rewrite (paid tier only) → enters awaiting_review, not completed
p = _json.loads(params or "{}")
selected_gaps = p.get("selected_gaps", None)
if selected_gaps is not None:
selected_set = set(selected_gaps)
prioritized = [g for g in prioritized if g.get("term") in selected_set]
if p.get("full_rewrite", False):
update_task_stage(db_path, task_id, "rewriting resume sections")
candidate_voice = load_user_profile(str(_user_yaml)).get("candidate_voice", "")
rewritten = rewrite_for_ats(resume_struct, prioritized, job, candidate_voice)
if hallucination_check(resume_struct, rewritten):
from scripts.resume_optimizer import build_review_diff
from scripts.db import save_resume_draft
draft = build_review_diff(resume_struct, rewritten)
# Attach gap report to draft for reference in the review UI
draft["gap_report"] = prioritized
save_resume_draft(db_path, job_id=job_id,
draft_json=_json.dumps(draft))
# Save gap report now; final text written after user review
save_optimized_resume(db_path, job_id=job_id,
text="", gap_report=gap_report)
# Park task in awaiting_review — finalize endpoint resolves it
update_task_status(db_path, task_id, "awaiting_review")
return
else:
log.warning("[task_runner] resume_optimize hallucination check failed for job %d", job_id)
save_optimized_resume(db_path, job_id=job_id,
text="", gap_report=gap_report)
else:
# Gap-only run (free tier): save report, no draft
save_optimized_resume(db_path, job_id=job_id,
text="", gap_report=gap_report)
elif task_type == "prepare_training":
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
records = build_records()
write_jsonl(records, DEFAULT_OUTPUT)
n = len(records)
update_task_status(
db_path, task_id, "completed",
error=f"{n} training pair{'s' if n != 1 else ''} extracted",
)
return
else:
raise ValueError(f"Unknown task_type: {task_type!r}")
update_task_status(db_path, task_id, "completed")
except BaseException as exc:
# BaseException catches SystemExit (from companyScraper sys.exit calls)
# in addition to regular exceptions.
update_task_status(db_path, task_id, "failed", error=str(exc))