From 64554dbef1a72feca8ecd9e3ab08d0f06c2c0651 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Sat, 4 Apr 2026 22:17:42 -0700 Subject: [PATCH] feat(#43): numbered SQL migration runner (Rails-style) - migrations/001_baseline.sql: full schema baseline (all tables/cols) - scripts/db_migrate.py: apply sorted *.sql files, track in schema_migrations - Wired into FastAPI startup and Streamlit app.py startup - Replaces ad-hoc digest_queue CREATE in _startup() - 6 tests covering apply, idempotency, partial apply, failure rollback - docs/developer-guide/contributing.md: migration authoring guide --- app/app.py | 2 + dev-api.py | 17 +-- docs/developer-guide/contributing.md | 17 +++ migrations/001_baseline.sql | 97 ++++++++++++++++++ scripts/db_migrate.py | 73 +++++++++++++ tests/test_db_migrate.py | 148 +++++++++++++++++++++++++++ 6 files changed, 340 insertions(+), 14 deletions(-) create mode 100644 migrations/001_baseline.sql create mode 100644 scripts/db_migrate.py create mode 100644 tests/test_db_migrate.py diff --git a/app/app.py b/app/app.py index c61fe4c..efa6e51 100644 --- a/app/app.py +++ b/app/app.py @@ -26,6 +26,7 @@ IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes") import streamlit as st from scripts.db import DEFAULT_DB, init_db, get_active_tasks +from scripts.db_migrate import migrate_db from app.feedback import inject_feedback_button from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier import sqlite3 @@ -41,6 +42,7 @@ st.set_page_config( resolve_session("peregrine") init_db(get_db_path()) +migrate_db(Path(get_db_path())) # Demo tier — initialize once per session (cookie persistence handled client-side) if IS_DEMO and "simulated_tier" not in st.session_state: diff --git a/dev-api.py b/dev-api.py index 7fc57e4..02ff9bb 100644 --- a/dev-api.py +++ b/dev-api.py @@ -35,6 +35,7 @@ if str(PEREGRINE_ROOT) not in sys.path: from circuitforge_core.config.settings import load_env as _load_env # noqa: E402 from scripts.credential_store import get_credential, set_credential, delete_credential # noqa: E402 +from scripts.db_migrate import migrate_db # noqa: E402 DB_PATH = os.environ.get("STAGING_DB", "/devl/job-seeker/staging.db") @@ -132,23 +133,11 @@ def _strip_html(text: str | None) -> str | None: @app.on_event("startup") def _startup(): - """Load .env then ensure digest_queue table exists.""" + """Load .env then run pending SQLite migrations.""" # Load .env before any runtime env reads — safe because startup doesn't run # when dev_api is imported by tests (only when uvicorn actually starts). _load_env(PEREGRINE_ROOT / ".env") - db = _get_db() - try: - db.execute(""" - CREATE TABLE IF NOT EXISTS digest_queue ( - id INTEGER PRIMARY KEY, - job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id), - created_at TEXT DEFAULT (datetime('now')), - UNIQUE(job_contact_id) - ) - """) - db.commit() - finally: - db.close() + migrate_db(Path(DB_PATH)) # ── Link extraction helpers ─────────────────────────────────────────────── diff --git a/docs/developer-guide/contributing.md b/docs/developer-guide/contributing.md index d160182..e4d6261 100644 --- a/docs/developer-guide/contributing.md +++ b/docs/developer-guide/contributing.md @@ -102,6 +102,23 @@ Before opening a pull request: --- +## Database Migrations + +Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database. + +### Adding a migration + +1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits). +2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible. +3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner. +4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`). + +### Rollbacks + +SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it. + +--- + ## What NOT to Do - Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored diff --git a/migrations/001_baseline.sql b/migrations/001_baseline.sql new file mode 100644 index 0000000..36e3526 --- /dev/null +++ b/migrations/001_baseline.sql @@ -0,0 +1,97 @@ +-- Migration 001: Baseline schema +-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE) + +CREATE TABLE IF NOT EXISTS jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + title TEXT, + company TEXT, + url TEXT UNIQUE, + source TEXT, + location TEXT, + is_remote INTEGER DEFAULT 0, + salary TEXT, + description TEXT, + match_score REAL, + keyword_gaps TEXT, + date_found TEXT, + status TEXT DEFAULT 'pending', + notion_page_id TEXT, + cover_letter TEXT, + applied_at TEXT, + interview_date TEXT, + rejection_stage TEXT, + phone_screen_at TEXT, + interviewing_at TEXT, + offer_at TEXT, + hired_at TEXT, + survey_at TEXT, + calendar_event_id TEXT, + optimized_resume TEXT, + ats_gap_report TEXT +); + +CREATE TABLE IF NOT EXISTS job_contacts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id INTEGER, + direction TEXT, + subject TEXT, + from_addr TEXT, + to_addr TEXT, + body TEXT, + received_at TEXT, + is_response_needed INTEGER DEFAULT 0, + responded_at TEXT, + message_id TEXT, + stage_signal TEXT, + suggestion_dismissed INTEGER DEFAULT 0 +); + +CREATE TABLE IF NOT EXISTS company_research ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id INTEGER UNIQUE, + generated_at TEXT, + company_brief TEXT, + ceo_brief TEXT, + talking_points TEXT, + raw_output TEXT, + tech_brief TEXT, + funding_brief TEXT, + competitors_brief TEXT, + red_flags TEXT, + scrape_used INTEGER DEFAULT 0, + accessibility_brief TEXT +); + +CREATE TABLE IF NOT EXISTS background_tasks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_type TEXT, + job_id INTEGER, + params TEXT, + status TEXT DEFAULT 'pending', + error TEXT, + created_at TEXT, + started_at TEXT, + finished_at TEXT, + stage TEXT, + updated_at TEXT +); + +CREATE TABLE IF NOT EXISTS survey_responses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_id INTEGER, + survey_name TEXT, + received_at TEXT, + source TEXT, + raw_input TEXT, + image_path TEXT, + mode TEXT, + llm_output TEXT, + reported_score REAL, + created_at TEXT +); + +CREATE TABLE IF NOT EXISTS digest_queue ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + job_contact_id INTEGER UNIQUE, + created_at TEXT +); diff --git a/scripts/db_migrate.py b/scripts/db_migrate.py new file mode 100644 index 0000000..bbb407f --- /dev/null +++ b/scripts/db_migrate.py @@ -0,0 +1,73 @@ +""" +db_migrate.py — Rails-style numbered SQL migration runner for Peregrine user DBs. + +Migration files live in migrations/ (sibling to this script's parent directory), +named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted +order and tracked in the schema_migrations table so each runs exactly once. + +Usage: + from scripts.db_migrate import migrate_db + migrate_db(Path("/path/to/user.db")) +""" + +import logging +import sqlite3 +from pathlib import Path + +log = logging.getLogger(__name__) + +# Resolved at import time: peregrine repo root / migrations/ +_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations" + +_CREATE_MIGRATIONS_TABLE = """ +CREATE TABLE IF NOT EXISTS schema_migrations ( + version TEXT PRIMARY KEY, + applied_at TEXT NOT NULL DEFAULT (datetime('now')) +) +""" + + +def migrate_db(db_path: Path) -> list[str]: + """Apply any pending migrations to db_path. Returns list of applied versions.""" + applied: list[str] = [] + + con = sqlite3.connect(db_path) + try: + con.execute(_CREATE_MIGRATIONS_TABLE) + con.commit() + + if not _MIGRATIONS_DIR.is_dir(): + log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR) + return applied + + migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql")) + if not migration_files: + return applied + + already_applied = { + row[0] for row in con.execute("SELECT version FROM schema_migrations") + } + + for path in migration_files: + version = path.stem # e.g. "001_baseline" + if version in already_applied: + continue + + sql = path.read_text(encoding="utf-8") + log.info("Applying migration %s to %s", version, db_path.name) + try: + con.executescript(sql) + con.execute( + "INSERT INTO schema_migrations (version) VALUES (?)", (version,) + ) + con.commit() + applied.append(version) + log.info("Migration %s applied successfully", version) + except Exception as exc: + con.rollback() + log.error("Migration %s failed: %s", version, exc) + raise RuntimeError(f"Migration {version} failed: {exc}") from exc + finally: + con.close() + + return applied diff --git a/tests/test_db_migrate.py b/tests/test_db_migrate.py new file mode 100644 index 0000000..8da4a24 --- /dev/null +++ b/tests/test_db_migrate.py @@ -0,0 +1,148 @@ +"""Tests for scripts/db_migrate.py — numbered SQL migration runner.""" + +import sqlite3 +import textwrap +from pathlib import Path + +import pytest + +from scripts.db_migrate import migrate_db + + +# ── helpers ─────────────────────────────────────────────────────────────────── + +def _applied(db_path: Path) -> list[str]: + con = sqlite3.connect(db_path) + try: + rows = con.execute("SELECT version FROM schema_migrations ORDER BY version").fetchall() + return [r[0] for r in rows] + finally: + con.close() + + +def _tables(db_path: Path) -> set[str]: + con = sqlite3.connect(db_path) + try: + rows = con.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'" + ).fetchall() + return {r[0] for r in rows} + finally: + con.close() + + +# ── tests ────────────────────────────────────────────────────────────────────── + +def test_creates_schema_migrations_table(tmp_path): + """Running against an empty DB creates the tracking table.""" + db = tmp_path / "test.db" + (tmp_path / "migrations").mkdir() # empty migrations dir + # Patch the module-level _MIGRATIONS_DIR + import scripts.db_migrate as m + orig = m._MIGRATIONS_DIR + m._MIGRATIONS_DIR = tmp_path / "migrations" + try: + migrate_db(db) + assert "schema_migrations" in _tables(db) + finally: + m._MIGRATIONS_DIR = orig + + +def test_applies_migration_file(tmp_path): + """A .sql file in migrations/ is applied and recorded.""" + db = tmp_path / "test.db" + mdir = tmp_path / "migrations" + mdir.mkdir() + (mdir / "001_test.sql").write_text( + "CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);" + ) + + import scripts.db_migrate as m + orig = m._MIGRATIONS_DIR + m._MIGRATIONS_DIR = mdir + try: + applied = migrate_db(db) + assert applied == ["001_test"] + assert "widgets" in _tables(db) + assert _applied(db) == ["001_test"] + finally: + m._MIGRATIONS_DIR = orig + + +def test_idempotent_second_run(tmp_path): + """Running migrate_db twice does not re-apply migrations.""" + db = tmp_path / "test.db" + mdir = tmp_path / "migrations" + mdir.mkdir() + (mdir / "001_test.sql").write_text( + "CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);" + ) + + import scripts.db_migrate as m + orig = m._MIGRATIONS_DIR + m._MIGRATIONS_DIR = mdir + try: + migrate_db(db) + applied = migrate_db(db) # second run + assert applied == [] + assert _applied(db) == ["001_test"] + finally: + m._MIGRATIONS_DIR = orig + + +def test_applies_only_new_migrations(tmp_path): + """Migrations already in schema_migrations are skipped; only new ones run.""" + db = tmp_path / "test.db" + mdir = tmp_path / "migrations" + mdir.mkdir() + (mdir / "001_first.sql").write_text( + "CREATE TABLE IF NOT EXISTS first_table (id INTEGER PRIMARY KEY);" + ) + + import scripts.db_migrate as m + orig = m._MIGRATIONS_DIR + m._MIGRATIONS_DIR = mdir + try: + migrate_db(db) + + # Add a second migration + (mdir / "002_second.sql").write_text( + "CREATE TABLE IF NOT EXISTS second_table (id INTEGER PRIMARY KEY);" + ) + applied = migrate_db(db) + assert applied == ["002_second"] + assert set(_applied(db)) == {"001_first", "002_second"} + assert "second_table" in _tables(db) + finally: + m._MIGRATIONS_DIR = orig + + +def test_migration_failure_raises(tmp_path): + """A bad migration raises RuntimeError and does not record the version.""" + db = tmp_path / "test.db" + mdir = tmp_path / "migrations" + mdir.mkdir() + (mdir / "001_bad.sql").write_text("THIS IS NOT VALID SQL !!!") + + import scripts.db_migrate as m + orig = m._MIGRATIONS_DIR + m._MIGRATIONS_DIR = mdir + try: + with pytest.raises(RuntimeError, match="001_bad"): + migrate_db(db) + assert _applied(db) == [] + finally: + m._MIGRATIONS_DIR = orig + + +def test_baseline_migration_runs(tmp_path): + """The real 001_baseline.sql applies cleanly to a fresh database.""" + db = tmp_path / "test.db" + applied = migrate_db(db) + assert "001_baseline" in applied + expected_tables = { + "jobs", "job_contacts", "company_research", + "background_tasks", "survey_responses", "digest_queue", + "schema_migrations", + } + assert expected_tables <= _tables(db)