feat(#43): numbered SQL migration runner (Rails-style)
Some checks failed
CI / test (push) Failing after 19s
Some checks failed
CI / test (push) Failing after 19s
- migrations/001_baseline.sql: full schema baseline (all tables/cols) - scripts/db_migrate.py: apply sorted *.sql files, track in schema_migrations - Wired into FastAPI startup and Streamlit app.py startup - Replaces ad-hoc digest_queue CREATE in _startup() - 6 tests covering apply, idempotency, partial apply, failure rollback - docs/developer-guide/contributing.md: migration authoring guide
This commit is contained in:
parent
065c02feb7
commit
64554dbef1
6 changed files with 340 additions and 14 deletions
|
|
@ -26,6 +26,7 @@ IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
|||
|
||||
import streamlit as st
|
||||
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
|
||||
from scripts.db_migrate import migrate_db
|
||||
from app.feedback import inject_feedback_button
|
||||
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
|
||||
import sqlite3
|
||||
|
|
@ -41,6 +42,7 @@ st.set_page_config(
|
|||
|
||||
resolve_session("peregrine")
|
||||
init_db(get_db_path())
|
||||
migrate_db(Path(get_db_path()))
|
||||
|
||||
# Demo tier — initialize once per session (cookie persistence handled client-side)
|
||||
if IS_DEMO and "simulated_tier" not in st.session_state:
|
||||
|
|
|
|||
17
dev-api.py
17
dev-api.py
|
|
@ -35,6 +35,7 @@ if str(PEREGRINE_ROOT) not in sys.path:
|
|||
|
||||
from circuitforge_core.config.settings import load_env as _load_env # noqa: E402
|
||||
from scripts.credential_store import get_credential, set_credential, delete_credential # noqa: E402
|
||||
from scripts.db_migrate import migrate_db # noqa: E402
|
||||
|
||||
DB_PATH = os.environ.get("STAGING_DB", "/devl/job-seeker/staging.db")
|
||||
|
||||
|
|
@ -132,23 +133,11 @@ def _strip_html(text: str | None) -> str | None:
|
|||
|
||||
@app.on_event("startup")
|
||||
def _startup():
|
||||
"""Load .env then ensure digest_queue table exists."""
|
||||
"""Load .env then run pending SQLite migrations."""
|
||||
# Load .env before any runtime env reads — safe because startup doesn't run
|
||||
# when dev_api is imported by tests (only when uvicorn actually starts).
|
||||
_load_env(PEREGRINE_ROOT / ".env")
|
||||
db = _get_db()
|
||||
try:
|
||||
db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS digest_queue (
|
||||
id INTEGER PRIMARY KEY,
|
||||
job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id),
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
UNIQUE(job_contact_id)
|
||||
)
|
||||
""")
|
||||
db.commit()
|
||||
finally:
|
||||
db.close()
|
||||
migrate_db(Path(DB_PATH))
|
||||
|
||||
|
||||
# ── Link extraction helpers ───────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -102,6 +102,23 @@ Before opening a pull request:
|
|||
|
||||
---
|
||||
|
||||
## Database Migrations
|
||||
|
||||
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
|
||||
|
||||
### Adding a migration
|
||||
|
||||
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
|
||||
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
|
||||
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
|
||||
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
|
||||
|
||||
### Rollbacks
|
||||
|
||||
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
|
||||
|
||||
---
|
||||
|
||||
## What NOT to Do
|
||||
|
||||
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored
|
||||
|
|
|
|||
97
migrations/001_baseline.sql
Normal file
97
migrations/001_baseline.sql
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
-- Migration 001: Baseline schema
|
||||
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
title TEXT,
|
||||
company TEXT,
|
||||
url TEXT UNIQUE,
|
||||
source TEXT,
|
||||
location TEXT,
|
||||
is_remote INTEGER DEFAULT 0,
|
||||
salary TEXT,
|
||||
description TEXT,
|
||||
match_score REAL,
|
||||
keyword_gaps TEXT,
|
||||
date_found TEXT,
|
||||
status TEXT DEFAULT 'pending',
|
||||
notion_page_id TEXT,
|
||||
cover_letter TEXT,
|
||||
applied_at TEXT,
|
||||
interview_date TEXT,
|
||||
rejection_stage TEXT,
|
||||
phone_screen_at TEXT,
|
||||
interviewing_at TEXT,
|
||||
offer_at TEXT,
|
||||
hired_at TEXT,
|
||||
survey_at TEXT,
|
||||
calendar_event_id TEXT,
|
||||
optimized_resume TEXT,
|
||||
ats_gap_report TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS job_contacts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER,
|
||||
direction TEXT,
|
||||
subject TEXT,
|
||||
from_addr TEXT,
|
||||
to_addr TEXT,
|
||||
body TEXT,
|
||||
received_at TEXT,
|
||||
is_response_needed INTEGER DEFAULT 0,
|
||||
responded_at TEXT,
|
||||
message_id TEXT,
|
||||
stage_signal TEXT,
|
||||
suggestion_dismissed INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS company_research (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER UNIQUE,
|
||||
generated_at TEXT,
|
||||
company_brief TEXT,
|
||||
ceo_brief TEXT,
|
||||
talking_points TEXT,
|
||||
raw_output TEXT,
|
||||
tech_brief TEXT,
|
||||
funding_brief TEXT,
|
||||
competitors_brief TEXT,
|
||||
red_flags TEXT,
|
||||
scrape_used INTEGER DEFAULT 0,
|
||||
accessibility_brief TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS background_tasks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
task_type TEXT,
|
||||
job_id INTEGER,
|
||||
params TEXT,
|
||||
status TEXT DEFAULT 'pending',
|
||||
error TEXT,
|
||||
created_at TEXT,
|
||||
started_at TEXT,
|
||||
finished_at TEXT,
|
||||
stage TEXT,
|
||||
updated_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS survey_responses (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER,
|
||||
survey_name TEXT,
|
||||
received_at TEXT,
|
||||
source TEXT,
|
||||
raw_input TEXT,
|
||||
image_path TEXT,
|
||||
mode TEXT,
|
||||
llm_output TEXT,
|
||||
reported_score REAL,
|
||||
created_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS digest_queue (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_contact_id INTEGER UNIQUE,
|
||||
created_at TEXT
|
||||
);
|
||||
73
scripts/db_migrate.py
Normal file
73
scripts/db_migrate.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
"""
|
||||
db_migrate.py — Rails-style numbered SQL migration runner for Peregrine user DBs.
|
||||
|
||||
Migration files live in migrations/ (sibling to this script's parent directory),
|
||||
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
|
||||
order and tracked in the schema_migrations table so each runs exactly once.
|
||||
|
||||
Usage:
|
||||
from scripts.db_migrate import migrate_db
|
||||
migrate_db(Path("/path/to/user.db"))
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Resolved at import time: peregrine repo root / migrations/
|
||||
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
|
||||
|
||||
_CREATE_MIGRATIONS_TABLE = """
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
version TEXT PRIMARY KEY,
|
||||
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
def migrate_db(db_path: Path) -> list[str]:
|
||||
"""Apply any pending migrations to db_path. Returns list of applied versions."""
|
||||
applied: list[str] = []
|
||||
|
||||
con = sqlite3.connect(db_path)
|
||||
try:
|
||||
con.execute(_CREATE_MIGRATIONS_TABLE)
|
||||
con.commit()
|
||||
|
||||
if not _MIGRATIONS_DIR.is_dir():
|
||||
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
|
||||
return applied
|
||||
|
||||
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
|
||||
if not migration_files:
|
||||
return applied
|
||||
|
||||
already_applied = {
|
||||
row[0] for row in con.execute("SELECT version FROM schema_migrations")
|
||||
}
|
||||
|
||||
for path in migration_files:
|
||||
version = path.stem # e.g. "001_baseline"
|
||||
if version in already_applied:
|
||||
continue
|
||||
|
||||
sql = path.read_text(encoding="utf-8")
|
||||
log.info("Applying migration %s to %s", version, db_path.name)
|
||||
try:
|
||||
con.executescript(sql)
|
||||
con.execute(
|
||||
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
|
||||
)
|
||||
con.commit()
|
||||
applied.append(version)
|
||||
log.info("Migration %s applied successfully", version)
|
||||
except Exception as exc:
|
||||
con.rollback()
|
||||
log.error("Migration %s failed: %s", version, exc)
|
||||
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
return applied
|
||||
148
tests/test_db_migrate.py
Normal file
148
tests/test_db_migrate.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
"""Tests for scripts/db_migrate.py — numbered SQL migration runner."""
|
||||
|
||||
import sqlite3
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from scripts.db_migrate import migrate_db
|
||||
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def _applied(db_path: Path) -> list[str]:
|
||||
con = sqlite3.connect(db_path)
|
||||
try:
|
||||
rows = con.execute("SELECT version FROM schema_migrations ORDER BY version").fetchall()
|
||||
return [r[0] for r in rows]
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def _tables(db_path: Path) -> set[str]:
|
||||
con = sqlite3.connect(db_path)
|
||||
try:
|
||||
rows = con.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
|
||||
).fetchall()
|
||||
return {r[0] for r in rows}
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
# ── tests ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def test_creates_schema_migrations_table(tmp_path):
|
||||
"""Running against an empty DB creates the tracking table."""
|
||||
db = tmp_path / "test.db"
|
||||
(tmp_path / "migrations").mkdir() # empty migrations dir
|
||||
# Patch the module-level _MIGRATIONS_DIR
|
||||
import scripts.db_migrate as m
|
||||
orig = m._MIGRATIONS_DIR
|
||||
m._MIGRATIONS_DIR = tmp_path / "migrations"
|
||||
try:
|
||||
migrate_db(db)
|
||||
assert "schema_migrations" in _tables(db)
|
||||
finally:
|
||||
m._MIGRATIONS_DIR = orig
|
||||
|
||||
|
||||
def test_applies_migration_file(tmp_path):
|
||||
"""A .sql file in migrations/ is applied and recorded."""
|
||||
db = tmp_path / "test.db"
|
||||
mdir = tmp_path / "migrations"
|
||||
mdir.mkdir()
|
||||
(mdir / "001_test.sql").write_text(
|
||||
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
|
||||
)
|
||||
|
||||
import scripts.db_migrate as m
|
||||
orig = m._MIGRATIONS_DIR
|
||||
m._MIGRATIONS_DIR = mdir
|
||||
try:
|
||||
applied = migrate_db(db)
|
||||
assert applied == ["001_test"]
|
||||
assert "widgets" in _tables(db)
|
||||
assert _applied(db) == ["001_test"]
|
||||
finally:
|
||||
m._MIGRATIONS_DIR = orig
|
||||
|
||||
|
||||
def test_idempotent_second_run(tmp_path):
|
||||
"""Running migrate_db twice does not re-apply migrations."""
|
||||
db = tmp_path / "test.db"
|
||||
mdir = tmp_path / "migrations"
|
||||
mdir.mkdir()
|
||||
(mdir / "001_test.sql").write_text(
|
||||
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
|
||||
)
|
||||
|
||||
import scripts.db_migrate as m
|
||||
orig = m._MIGRATIONS_DIR
|
||||
m._MIGRATIONS_DIR = mdir
|
||||
try:
|
||||
migrate_db(db)
|
||||
applied = migrate_db(db) # second run
|
||||
assert applied == []
|
||||
assert _applied(db) == ["001_test"]
|
||||
finally:
|
||||
m._MIGRATIONS_DIR = orig
|
||||
|
||||
|
||||
def test_applies_only_new_migrations(tmp_path):
|
||||
"""Migrations already in schema_migrations are skipped; only new ones run."""
|
||||
db = tmp_path / "test.db"
|
||||
mdir = tmp_path / "migrations"
|
||||
mdir.mkdir()
|
||||
(mdir / "001_first.sql").write_text(
|
||||
"CREATE TABLE IF NOT EXISTS first_table (id INTEGER PRIMARY KEY);"
|
||||
)
|
||||
|
||||
import scripts.db_migrate as m
|
||||
orig = m._MIGRATIONS_DIR
|
||||
m._MIGRATIONS_DIR = mdir
|
||||
try:
|
||||
migrate_db(db)
|
||||
|
||||
# Add a second migration
|
||||
(mdir / "002_second.sql").write_text(
|
||||
"CREATE TABLE IF NOT EXISTS second_table (id INTEGER PRIMARY KEY);"
|
||||
)
|
||||
applied = migrate_db(db)
|
||||
assert applied == ["002_second"]
|
||||
assert set(_applied(db)) == {"001_first", "002_second"}
|
||||
assert "second_table" in _tables(db)
|
||||
finally:
|
||||
m._MIGRATIONS_DIR = orig
|
||||
|
||||
|
||||
def test_migration_failure_raises(tmp_path):
|
||||
"""A bad migration raises RuntimeError and does not record the version."""
|
||||
db = tmp_path / "test.db"
|
||||
mdir = tmp_path / "migrations"
|
||||
mdir.mkdir()
|
||||
(mdir / "001_bad.sql").write_text("THIS IS NOT VALID SQL !!!")
|
||||
|
||||
import scripts.db_migrate as m
|
||||
orig = m._MIGRATIONS_DIR
|
||||
m._MIGRATIONS_DIR = mdir
|
||||
try:
|
||||
with pytest.raises(RuntimeError, match="001_bad"):
|
||||
migrate_db(db)
|
||||
assert _applied(db) == []
|
||||
finally:
|
||||
m._MIGRATIONS_DIR = orig
|
||||
|
||||
|
||||
def test_baseline_migration_runs(tmp_path):
|
||||
"""The real 001_baseline.sql applies cleanly to a fresh database."""
|
||||
db = tmp_path / "test.db"
|
||||
applied = migrate_db(db)
|
||||
assert "001_baseline" in applied
|
||||
expected_tables = {
|
||||
"jobs", "job_contacts", "company_research",
|
||||
"background_tasks", "survey_responses", "digest_queue",
|
||||
"schema_migrations",
|
||||
}
|
||||
assert expected_tables <= _tables(db)
|
||||
Loading…
Reference in a new issue