feat(#43): numbered SQL migration runner (Rails-style)
Some checks failed
CI / test (push) Failing after 19s

- migrations/001_baseline.sql: full schema baseline (all tables/cols)
- scripts/db_migrate.py: apply sorted *.sql files, track in schema_migrations
- Wired into FastAPI startup and Streamlit app.py startup
- Replaces ad-hoc digest_queue CREATE in _startup()
- 6 tests covering apply, idempotency, partial apply, failure rollback
- docs/developer-guide/contributing.md: migration authoring guide
This commit is contained in:
pyr0ball 2026-04-04 22:17:42 -07:00
parent 065c02feb7
commit 64554dbef1
6 changed files with 340 additions and 14 deletions

View file

@ -26,6 +26,7 @@ IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
import streamlit as st import streamlit as st
from scripts.db import DEFAULT_DB, init_db, get_active_tasks from scripts.db import DEFAULT_DB, init_db, get_active_tasks
from scripts.db_migrate import migrate_db
from app.feedback import inject_feedback_button from app.feedback import inject_feedback_button
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
import sqlite3 import sqlite3
@ -41,6 +42,7 @@ st.set_page_config(
resolve_session("peregrine") resolve_session("peregrine")
init_db(get_db_path()) init_db(get_db_path())
migrate_db(Path(get_db_path()))
# Demo tier — initialize once per session (cookie persistence handled client-side) # Demo tier — initialize once per session (cookie persistence handled client-side)
if IS_DEMO and "simulated_tier" not in st.session_state: if IS_DEMO and "simulated_tier" not in st.session_state:

View file

@ -35,6 +35,7 @@ if str(PEREGRINE_ROOT) not in sys.path:
from circuitforge_core.config.settings import load_env as _load_env # noqa: E402 from circuitforge_core.config.settings import load_env as _load_env # noqa: E402
from scripts.credential_store import get_credential, set_credential, delete_credential # noqa: E402 from scripts.credential_store import get_credential, set_credential, delete_credential # noqa: E402
from scripts.db_migrate import migrate_db # noqa: E402
DB_PATH = os.environ.get("STAGING_DB", "/devl/job-seeker/staging.db") DB_PATH = os.environ.get("STAGING_DB", "/devl/job-seeker/staging.db")
@ -132,23 +133,11 @@ def _strip_html(text: str | None) -> str | None:
@app.on_event("startup") @app.on_event("startup")
def _startup(): def _startup():
"""Load .env then ensure digest_queue table exists.""" """Load .env then run pending SQLite migrations."""
# Load .env before any runtime env reads — safe because startup doesn't run # Load .env before any runtime env reads — safe because startup doesn't run
# when dev_api is imported by tests (only when uvicorn actually starts). # when dev_api is imported by tests (only when uvicorn actually starts).
_load_env(PEREGRINE_ROOT / ".env") _load_env(PEREGRINE_ROOT / ".env")
db = _get_db() migrate_db(Path(DB_PATH))
try:
db.execute("""
CREATE TABLE IF NOT EXISTS digest_queue (
id INTEGER PRIMARY KEY,
job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id),
created_at TEXT DEFAULT (datetime('now')),
UNIQUE(job_contact_id)
)
""")
db.commit()
finally:
db.close()
# ── Link extraction helpers ─────────────────────────────────────────────── # ── Link extraction helpers ───────────────────────────────────────────────

View file

@ -102,6 +102,23 @@ Before opening a pull request:
--- ---
## Database Migrations
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
### Adding a migration
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
### Rollbacks
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
---
## What NOT to Do ## What NOT to Do
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored - Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored

View file

@ -0,0 +1,97 @@
-- Migration 001: Baseline schema
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
company TEXT,
url TEXT UNIQUE,
source TEXT,
location TEXT,
is_remote INTEGER DEFAULT 0,
salary TEXT,
description TEXT,
match_score REAL,
keyword_gaps TEXT,
date_found TEXT,
status TEXT DEFAULT 'pending',
notion_page_id TEXT,
cover_letter TEXT,
applied_at TEXT,
interview_date TEXT,
rejection_stage TEXT,
phone_screen_at TEXT,
interviewing_at TEXT,
offer_at TEXT,
hired_at TEXT,
survey_at TEXT,
calendar_event_id TEXT,
optimized_resume TEXT,
ats_gap_report TEXT
);
CREATE TABLE IF NOT EXISTS job_contacts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
direction TEXT,
subject TEXT,
from_addr TEXT,
to_addr TEXT,
body TEXT,
received_at TEXT,
is_response_needed INTEGER DEFAULT 0,
responded_at TEXT,
message_id TEXT,
stage_signal TEXT,
suggestion_dismissed INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS company_research (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER UNIQUE,
generated_at TEXT,
company_brief TEXT,
ceo_brief TEXT,
talking_points TEXT,
raw_output TEXT,
tech_brief TEXT,
funding_brief TEXT,
competitors_brief TEXT,
red_flags TEXT,
scrape_used INTEGER DEFAULT 0,
accessibility_brief TEXT
);
CREATE TABLE IF NOT EXISTS background_tasks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_type TEXT,
job_id INTEGER,
params TEXT,
status TEXT DEFAULT 'pending',
error TEXT,
created_at TEXT,
started_at TEXT,
finished_at TEXT,
stage TEXT,
updated_at TEXT
);
CREATE TABLE IF NOT EXISTS survey_responses (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
survey_name TEXT,
received_at TEXT,
source TEXT,
raw_input TEXT,
image_path TEXT,
mode TEXT,
llm_output TEXT,
reported_score REAL,
created_at TEXT
);
CREATE TABLE IF NOT EXISTS digest_queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_contact_id INTEGER UNIQUE,
created_at TEXT
);

73
scripts/db_migrate.py Normal file
View file

@ -0,0 +1,73 @@
"""
db_migrate.py Rails-style numbered SQL migration runner for Peregrine user DBs.
Migration files live in migrations/ (sibling to this script's parent directory),
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
order and tracked in the schema_migrations table so each runs exactly once.
Usage:
from scripts.db_migrate import migrate_db
migrate_db(Path("/path/to/user.db"))
"""
import logging
import sqlite3
from pathlib import Path
log = logging.getLogger(__name__)
# Resolved at import time: peregrine repo root / migrations/
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
_CREATE_MIGRATIONS_TABLE = """
CREATE TABLE IF NOT EXISTS schema_migrations (
version TEXT PRIMARY KEY,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)
"""
def migrate_db(db_path: Path) -> list[str]:
"""Apply any pending migrations to db_path. Returns list of applied versions."""
applied: list[str] = []
con = sqlite3.connect(db_path)
try:
con.execute(_CREATE_MIGRATIONS_TABLE)
con.commit()
if not _MIGRATIONS_DIR.is_dir():
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
return applied
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
if not migration_files:
return applied
already_applied = {
row[0] for row in con.execute("SELECT version FROM schema_migrations")
}
for path in migration_files:
version = path.stem # e.g. "001_baseline"
if version in already_applied:
continue
sql = path.read_text(encoding="utf-8")
log.info("Applying migration %s to %s", version, db_path.name)
try:
con.executescript(sql)
con.execute(
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
)
con.commit()
applied.append(version)
log.info("Migration %s applied successfully", version)
except Exception as exc:
con.rollback()
log.error("Migration %s failed: %s", version, exc)
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
finally:
con.close()
return applied

148
tests/test_db_migrate.py Normal file
View file

@ -0,0 +1,148 @@
"""Tests for scripts/db_migrate.py — numbered SQL migration runner."""
import sqlite3
import textwrap
from pathlib import Path
import pytest
from scripts.db_migrate import migrate_db
# ── helpers ───────────────────────────────────────────────────────────────────
def _applied(db_path: Path) -> list[str]:
con = sqlite3.connect(db_path)
try:
rows = con.execute("SELECT version FROM schema_migrations ORDER BY version").fetchall()
return [r[0] for r in rows]
finally:
con.close()
def _tables(db_path: Path) -> set[str]:
con = sqlite3.connect(db_path)
try:
rows = con.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
).fetchall()
return {r[0] for r in rows}
finally:
con.close()
# ── tests ──────────────────────────────────────────────────────────────────────
def test_creates_schema_migrations_table(tmp_path):
"""Running against an empty DB creates the tracking table."""
db = tmp_path / "test.db"
(tmp_path / "migrations").mkdir() # empty migrations dir
# Patch the module-level _MIGRATIONS_DIR
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = tmp_path / "migrations"
try:
migrate_db(db)
assert "schema_migrations" in _tables(db)
finally:
m._MIGRATIONS_DIR = orig
def test_applies_migration_file(tmp_path):
"""A .sql file in migrations/ is applied and recorded."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_test.sql").write_text(
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
)
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
applied = migrate_db(db)
assert applied == ["001_test"]
assert "widgets" in _tables(db)
assert _applied(db) == ["001_test"]
finally:
m._MIGRATIONS_DIR = orig
def test_idempotent_second_run(tmp_path):
"""Running migrate_db twice does not re-apply migrations."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_test.sql").write_text(
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
)
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
migrate_db(db)
applied = migrate_db(db) # second run
assert applied == []
assert _applied(db) == ["001_test"]
finally:
m._MIGRATIONS_DIR = orig
def test_applies_only_new_migrations(tmp_path):
"""Migrations already in schema_migrations are skipped; only new ones run."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_first.sql").write_text(
"CREATE TABLE IF NOT EXISTS first_table (id INTEGER PRIMARY KEY);"
)
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
migrate_db(db)
# Add a second migration
(mdir / "002_second.sql").write_text(
"CREATE TABLE IF NOT EXISTS second_table (id INTEGER PRIMARY KEY);"
)
applied = migrate_db(db)
assert applied == ["002_second"]
assert set(_applied(db)) == {"001_first", "002_second"}
assert "second_table" in _tables(db)
finally:
m._MIGRATIONS_DIR = orig
def test_migration_failure_raises(tmp_path):
"""A bad migration raises RuntimeError and does not record the version."""
db = tmp_path / "test.db"
mdir = tmp_path / "migrations"
mdir.mkdir()
(mdir / "001_bad.sql").write_text("THIS IS NOT VALID SQL !!!")
import scripts.db_migrate as m
orig = m._MIGRATIONS_DIR
m._MIGRATIONS_DIR = mdir
try:
with pytest.raises(RuntimeError, match="001_bad"):
migrate_db(db)
assert _applied(db) == []
finally:
m._MIGRATIONS_DIR = orig
def test_baseline_migration_runs(tmp_path):
"""The real 001_baseline.sql applies cleanly to a fresh database."""
db = tmp_path / "test.db"
applied = migrate_db(db)
assert "001_baseline" in applied
expected_tables = {
"jobs", "job_contacts", "company_research",
"background_tasks", "survey_responses", "digest_queue",
"schema_migrations",
}
assert expected_tables <= _tables(db)