feat(#43): numbered SQL migration runner (Rails-style)
Some checks failed
CI / test (push) Failing after 19s
Some checks failed
CI / test (push) Failing after 19s
- migrations/001_baseline.sql: full schema baseline (all tables/cols) - scripts/db_migrate.py: apply sorted *.sql files, track in schema_migrations - Wired into FastAPI startup and Streamlit app.py startup - Replaces ad-hoc digest_queue CREATE in _startup() - 6 tests covering apply, idempotency, partial apply, failure rollback - docs/developer-guide/contributing.md: migration authoring guide
This commit is contained in:
parent
065c02feb7
commit
64554dbef1
6 changed files with 340 additions and 14 deletions
|
|
@ -26,6 +26,7 @@ IS_DEMO = os.environ.get("DEMO_MODE", "").lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
|
from scripts.db import DEFAULT_DB, init_db, get_active_tasks
|
||||||
|
from scripts.db_migrate import migrate_db
|
||||||
from app.feedback import inject_feedback_button
|
from app.feedback import inject_feedback_button
|
||||||
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
|
from app.cloud_session import resolve_session, get_db_path, get_config_dir, get_cloud_tier
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
@ -41,6 +42,7 @@ st.set_page_config(
|
||||||
|
|
||||||
resolve_session("peregrine")
|
resolve_session("peregrine")
|
||||||
init_db(get_db_path())
|
init_db(get_db_path())
|
||||||
|
migrate_db(Path(get_db_path()))
|
||||||
|
|
||||||
# Demo tier — initialize once per session (cookie persistence handled client-side)
|
# Demo tier — initialize once per session (cookie persistence handled client-side)
|
||||||
if IS_DEMO and "simulated_tier" not in st.session_state:
|
if IS_DEMO and "simulated_tier" not in st.session_state:
|
||||||
|
|
|
||||||
17
dev-api.py
17
dev-api.py
|
|
@ -35,6 +35,7 @@ if str(PEREGRINE_ROOT) not in sys.path:
|
||||||
|
|
||||||
from circuitforge_core.config.settings import load_env as _load_env # noqa: E402
|
from circuitforge_core.config.settings import load_env as _load_env # noqa: E402
|
||||||
from scripts.credential_store import get_credential, set_credential, delete_credential # noqa: E402
|
from scripts.credential_store import get_credential, set_credential, delete_credential # noqa: E402
|
||||||
|
from scripts.db_migrate import migrate_db # noqa: E402
|
||||||
|
|
||||||
DB_PATH = os.environ.get("STAGING_DB", "/devl/job-seeker/staging.db")
|
DB_PATH = os.environ.get("STAGING_DB", "/devl/job-seeker/staging.db")
|
||||||
|
|
||||||
|
|
@ -132,23 +133,11 @@ def _strip_html(text: str | None) -> str | None:
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
def _startup():
|
def _startup():
|
||||||
"""Load .env then ensure digest_queue table exists."""
|
"""Load .env then run pending SQLite migrations."""
|
||||||
# Load .env before any runtime env reads — safe because startup doesn't run
|
# Load .env before any runtime env reads — safe because startup doesn't run
|
||||||
# when dev_api is imported by tests (only when uvicorn actually starts).
|
# when dev_api is imported by tests (only when uvicorn actually starts).
|
||||||
_load_env(PEREGRINE_ROOT / ".env")
|
_load_env(PEREGRINE_ROOT / ".env")
|
||||||
db = _get_db()
|
migrate_db(Path(DB_PATH))
|
||||||
try:
|
|
||||||
db.execute("""
|
|
||||||
CREATE TABLE IF NOT EXISTS digest_queue (
|
|
||||||
id INTEGER PRIMARY KEY,
|
|
||||||
job_contact_id INTEGER NOT NULL REFERENCES job_contacts(id),
|
|
||||||
created_at TEXT DEFAULT (datetime('now')),
|
|
||||||
UNIQUE(job_contact_id)
|
|
||||||
)
|
|
||||||
""")
|
|
||||||
db.commit()
|
|
||||||
finally:
|
|
||||||
db.close()
|
|
||||||
|
|
||||||
|
|
||||||
# ── Link extraction helpers ───────────────────────────────────────────────
|
# ── Link extraction helpers ───────────────────────────────────────────────
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,23 @@ Before opening a pull request:
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Database Migrations
|
||||||
|
|
||||||
|
Peregrine uses a numbered SQL migration system (Rails-style). Each migration is a `.sql` file in the `migrations/` directory at the repo root, named `NNN_description.sql` (e.g. `002_add_foo_column.sql`). Applied migrations are tracked in a `schema_migrations` table in each user database.
|
||||||
|
|
||||||
|
### Adding a migration
|
||||||
|
|
||||||
|
1. Create `migrations/NNN_description.sql` where `NNN` is the next sequential number (zero-padded to 3 digits).
|
||||||
|
2. Write standard SQL — `CREATE TABLE IF NOT EXISTS`, `ALTER TABLE ADD COLUMN`, etc. Keep each migration idempotent where possible.
|
||||||
|
3. Do **not** modify `scripts/db.py`'s legacy `_MIGRATIONS` lists — those are superseded and will be removed once all active databases have been bootstrapped by the migration runner.
|
||||||
|
4. The runner (`scripts/db_migrate.py`) applies pending migrations at startup automatically (both FastAPI and Streamlit paths call `migrate_db(db_path)`).
|
||||||
|
|
||||||
|
### Rollbacks
|
||||||
|
|
||||||
|
SQLite does not support transactional DDL for all statement types. Write forward-only migrations. If you need to undo a schema change, add a new migration that reverses it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## What NOT to Do
|
## What NOT to Do
|
||||||
|
|
||||||
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored
|
- Do not commit `config/user.yaml`, `config/notion.yaml`, `config/email.yaml`, `config/adzuna.yaml`, or any `config/integrations/*.yaml` — all are gitignored
|
||||||
|
|
|
||||||
97
migrations/001_baseline.sql
Normal file
97
migrations/001_baseline.sql
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
-- Migration 001: Baseline schema
|
||||||
|
-- Captures the full schema as of v0.8.5 (all columns including those added via ALTER TABLE)
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS jobs (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
title TEXT,
|
||||||
|
company TEXT,
|
||||||
|
url TEXT UNIQUE,
|
||||||
|
source TEXT,
|
||||||
|
location TEXT,
|
||||||
|
is_remote INTEGER DEFAULT 0,
|
||||||
|
salary TEXT,
|
||||||
|
description TEXT,
|
||||||
|
match_score REAL,
|
||||||
|
keyword_gaps TEXT,
|
||||||
|
date_found TEXT,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
notion_page_id TEXT,
|
||||||
|
cover_letter TEXT,
|
||||||
|
applied_at TEXT,
|
||||||
|
interview_date TEXT,
|
||||||
|
rejection_stage TEXT,
|
||||||
|
phone_screen_at TEXT,
|
||||||
|
interviewing_at TEXT,
|
||||||
|
offer_at TEXT,
|
||||||
|
hired_at TEXT,
|
||||||
|
survey_at TEXT,
|
||||||
|
calendar_event_id TEXT,
|
||||||
|
optimized_resume TEXT,
|
||||||
|
ats_gap_report TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS job_contacts (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER,
|
||||||
|
direction TEXT,
|
||||||
|
subject TEXT,
|
||||||
|
from_addr TEXT,
|
||||||
|
to_addr TEXT,
|
||||||
|
body TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
is_response_needed INTEGER DEFAULT 0,
|
||||||
|
responded_at TEXT,
|
||||||
|
message_id TEXT,
|
||||||
|
stage_signal TEXT,
|
||||||
|
suggestion_dismissed INTEGER DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS company_research (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER UNIQUE,
|
||||||
|
generated_at TEXT,
|
||||||
|
company_brief TEXT,
|
||||||
|
ceo_brief TEXT,
|
||||||
|
talking_points TEXT,
|
||||||
|
raw_output TEXT,
|
||||||
|
tech_brief TEXT,
|
||||||
|
funding_brief TEXT,
|
||||||
|
competitors_brief TEXT,
|
||||||
|
red_flags TEXT,
|
||||||
|
scrape_used INTEGER DEFAULT 0,
|
||||||
|
accessibility_brief TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS background_tasks (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
task_type TEXT,
|
||||||
|
job_id INTEGER,
|
||||||
|
params TEXT,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
error TEXT,
|
||||||
|
created_at TEXT,
|
||||||
|
started_at TEXT,
|
||||||
|
finished_at TEXT,
|
||||||
|
stage TEXT,
|
||||||
|
updated_at TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS survey_responses (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER,
|
||||||
|
survey_name TEXT,
|
||||||
|
received_at TEXT,
|
||||||
|
source TEXT,
|
||||||
|
raw_input TEXT,
|
||||||
|
image_path TEXT,
|
||||||
|
mode TEXT,
|
||||||
|
llm_output TEXT,
|
||||||
|
reported_score REAL,
|
||||||
|
created_at TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS digest_queue (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_contact_id INTEGER UNIQUE,
|
||||||
|
created_at TEXT
|
||||||
|
);
|
||||||
73
scripts/db_migrate.py
Normal file
73
scripts/db_migrate.py
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
"""
|
||||||
|
db_migrate.py — Rails-style numbered SQL migration runner for Peregrine user DBs.
|
||||||
|
|
||||||
|
Migration files live in migrations/ (sibling to this script's parent directory),
|
||||||
|
named NNN_description.sql (e.g. 001_baseline.sql). They are applied in sorted
|
||||||
|
order and tracked in the schema_migrations table so each runs exactly once.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from scripts.db_migrate import migrate_db
|
||||||
|
migrate_db(Path("/path/to/user.db"))
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Resolved at import time: peregrine repo root / migrations/
|
||||||
|
_MIGRATIONS_DIR = Path(__file__).parent.parent / "migrations"
|
||||||
|
|
||||||
|
_CREATE_MIGRATIONS_TABLE = """
|
||||||
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||||
|
version TEXT PRIMARY KEY,
|
||||||
|
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_db(db_path: Path) -> list[str]:
|
||||||
|
"""Apply any pending migrations to db_path. Returns list of applied versions."""
|
||||||
|
applied: list[str] = []
|
||||||
|
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
con.execute(_CREATE_MIGRATIONS_TABLE)
|
||||||
|
con.commit()
|
||||||
|
|
||||||
|
if not _MIGRATIONS_DIR.is_dir():
|
||||||
|
log.warning("migrations/ directory not found at %s — skipping", _MIGRATIONS_DIR)
|
||||||
|
return applied
|
||||||
|
|
||||||
|
migration_files = sorted(_MIGRATIONS_DIR.glob("*.sql"))
|
||||||
|
if not migration_files:
|
||||||
|
return applied
|
||||||
|
|
||||||
|
already_applied = {
|
||||||
|
row[0] for row in con.execute("SELECT version FROM schema_migrations")
|
||||||
|
}
|
||||||
|
|
||||||
|
for path in migration_files:
|
||||||
|
version = path.stem # e.g. "001_baseline"
|
||||||
|
if version in already_applied:
|
||||||
|
continue
|
||||||
|
|
||||||
|
sql = path.read_text(encoding="utf-8")
|
||||||
|
log.info("Applying migration %s to %s", version, db_path.name)
|
||||||
|
try:
|
||||||
|
con.executescript(sql)
|
||||||
|
con.execute(
|
||||||
|
"INSERT INTO schema_migrations (version) VALUES (?)", (version,)
|
||||||
|
)
|
||||||
|
con.commit()
|
||||||
|
applied.append(version)
|
||||||
|
log.info("Migration %s applied successfully", version)
|
||||||
|
except Exception as exc:
|
||||||
|
con.rollback()
|
||||||
|
log.error("Migration %s failed: %s", version, exc)
|
||||||
|
raise RuntimeError(f"Migration {version} failed: {exc}") from exc
|
||||||
|
finally:
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
return applied
|
||||||
148
tests/test_db_migrate.py
Normal file
148
tests/test_db_migrate.py
Normal file
|
|
@ -0,0 +1,148 @@
|
||||||
|
"""Tests for scripts/db_migrate.py — numbered SQL migration runner."""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from scripts.db_migrate import migrate_db
|
||||||
|
|
||||||
|
|
||||||
|
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _applied(db_path: Path) -> list[str]:
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
rows = con.execute("SELECT version FROM schema_migrations ORDER BY version").fetchall()
|
||||||
|
return [r[0] for r in rows]
|
||||||
|
finally:
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _tables(db_path: Path) -> set[str]:
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
try:
|
||||||
|
rows = con.execute(
|
||||||
|
"SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'"
|
||||||
|
).fetchall()
|
||||||
|
return {r[0] for r in rows}
|
||||||
|
finally:
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
|
||||||
|
# ── tests ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def test_creates_schema_migrations_table(tmp_path):
|
||||||
|
"""Running against an empty DB creates the tracking table."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
(tmp_path / "migrations").mkdir() # empty migrations dir
|
||||||
|
# Patch the module-level _MIGRATIONS_DIR
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = tmp_path / "migrations"
|
||||||
|
try:
|
||||||
|
migrate_db(db)
|
||||||
|
assert "schema_migrations" in _tables(db)
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_applies_migration_file(tmp_path):
|
||||||
|
"""A .sql file in migrations/ is applied and recorded."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_test.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
|
||||||
|
)
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
applied = migrate_db(db)
|
||||||
|
assert applied == ["001_test"]
|
||||||
|
assert "widgets" in _tables(db)
|
||||||
|
assert _applied(db) == ["001_test"]
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_idempotent_second_run(tmp_path):
|
||||||
|
"""Running migrate_db twice does not re-apply migrations."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_test.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS widgets (id INTEGER PRIMARY KEY, name TEXT);"
|
||||||
|
)
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
migrate_db(db)
|
||||||
|
applied = migrate_db(db) # second run
|
||||||
|
assert applied == []
|
||||||
|
assert _applied(db) == ["001_test"]
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_applies_only_new_migrations(tmp_path):
|
||||||
|
"""Migrations already in schema_migrations are skipped; only new ones run."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_first.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS first_table (id INTEGER PRIMARY KEY);"
|
||||||
|
)
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
migrate_db(db)
|
||||||
|
|
||||||
|
# Add a second migration
|
||||||
|
(mdir / "002_second.sql").write_text(
|
||||||
|
"CREATE TABLE IF NOT EXISTS second_table (id INTEGER PRIMARY KEY);"
|
||||||
|
)
|
||||||
|
applied = migrate_db(db)
|
||||||
|
assert applied == ["002_second"]
|
||||||
|
assert set(_applied(db)) == {"001_first", "002_second"}
|
||||||
|
assert "second_table" in _tables(db)
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_migration_failure_raises(tmp_path):
|
||||||
|
"""A bad migration raises RuntimeError and does not record the version."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
mdir = tmp_path / "migrations"
|
||||||
|
mdir.mkdir()
|
||||||
|
(mdir / "001_bad.sql").write_text("THIS IS NOT VALID SQL !!!")
|
||||||
|
|
||||||
|
import scripts.db_migrate as m
|
||||||
|
orig = m._MIGRATIONS_DIR
|
||||||
|
m._MIGRATIONS_DIR = mdir
|
||||||
|
try:
|
||||||
|
with pytest.raises(RuntimeError, match="001_bad"):
|
||||||
|
migrate_db(db)
|
||||||
|
assert _applied(db) == []
|
||||||
|
finally:
|
||||||
|
m._MIGRATIONS_DIR = orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_baseline_migration_runs(tmp_path):
|
||||||
|
"""The real 001_baseline.sql applies cleanly to a fresh database."""
|
||||||
|
db = tmp_path / "test.db"
|
||||||
|
applied = migrate_db(db)
|
||||||
|
assert "001_baseline" in applied
|
||||||
|
expected_tables = {
|
||||||
|
"jobs", "job_contacts", "company_research",
|
||||||
|
"background_tasks", "survey_responses", "digest_queue",
|
||||||
|
"schema_migrations",
|
||||||
|
}
|
||||||
|
assert expected_tables <= _tables(db)
|
||||||
Loading…
Reference in a new issue