feat: add database schema and migration runner
This commit is contained in:
parent
3c9598c443
commit
9797e76931
9 changed files with 161 additions and 0 deletions
0
app/__init__.py
Normal file
0
app/__init__.py
Normal file
0
app/api/__init__.py
Normal file
0
app/api/__init__.py
Normal file
32
app/config.py
Normal file
32
app/config.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
"""Configuration from environment variables — no file parsing required for basic use."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
DATA_DIR = Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
|
||||
DB_PATH = str(DATA_DIR / "pagepiper.db")
|
||||
VEC_DB_PATH = str(DATA_DIR / "pagepiper_vecs.db")
|
||||
WATCH_DIR = Path(os.environ.get("PAGEPIPER_WATCH_DIR", "books"))
|
||||
|
||||
|
||||
def get_llm_config() -> dict | None:
|
||||
"""Build LLMRouter config from env vars. Returns None if PAGEPIPER_OLLAMA_URL is unset."""
|
||||
url = os.environ.get("PAGEPIPER_OLLAMA_URL", "").strip()
|
||||
if not url:
|
||||
return None
|
||||
return {
|
||||
"fallback_order": ["ollama"],
|
||||
"backends": {
|
||||
"ollama": {
|
||||
"type": "openai_compat",
|
||||
"base_url": url.rstrip("/") + "/v1",
|
||||
"model": os.environ.get("PAGEPIPER_CHAT_MODEL", "mistral:7b"),
|
||||
"embedding_model": os.environ.get(
|
||||
"PAGEPIPER_EMBED_MODEL", "nomic-embed-text"
|
||||
),
|
||||
"supports_images": False,
|
||||
}
|
||||
},
|
||||
}
|
||||
0
app/services/__init__.py
Normal file
0
app/services/__init__.py
Normal file
29
migrations/001_initial_schema.sql
Normal file
29
migrations/001_initial_schema.sql
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
-- pagepiper initial schema
|
||||
-- Run via: conda run -n cf python scripts/db_migrate.py
|
||||
|
||||
CREATE TABLE IF NOT EXISTS documents (
|
||||
id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
|
||||
title TEXT NOT NULL,
|
||||
file_path TEXT NOT NULL UNIQUE,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
-- status: pending | processing | ready | error
|
||||
task_id TEXT,
|
||||
page_count INTEGER,
|
||||
error_msg TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS page_chunks (
|
||||
id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
|
||||
doc_id TEXT NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
|
||||
page_number INTEGER NOT NULL,
|
||||
text TEXT NOT NULL DEFAULT '',
|
||||
source TEXT NOT NULL,
|
||||
-- source: text_layer | ocr
|
||||
word_count INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_page_chunks_doc_id ON page_chunks(doc_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_page_chunks_doc_page ON page_chunks(doc_id, page_number);
|
||||
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
30
scripts/db_migrate.py
Normal file
30
scripts/db_migrate.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
"""Apply all pending migrations to the pagepiper SQLite database."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from app.config import DB_PATH
|
||||
|
||||
|
||||
def migrate(db_path: str = DB_PATH) -> None:
|
||||
migrations_dir = Path(__file__).parent.parent / "migrations"
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.execute("PRAGMA foreign_keys = ON")
|
||||
conn.execute("PRAGMA journal_mode = WAL")
|
||||
|
||||
for sql_file in sorted(migrations_dir.glob("*.sql")):
|
||||
print(f"Applying {sql_file.name}...")
|
||||
conn.executescript(sql_file.read_text())
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"Migrations applied to {db_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate()
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
70
tests/test_db_migrate.py
Normal file
70
tests/test_db_migrate.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""Tests for migrations/001_initial_schema.sql via scripts/db_migrate.py."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_migration_creates_documents_table(tmp_path):
|
||||
db_path = str(tmp_path / "test.db")
|
||||
schema = Path("migrations/001_initial_schema.sql").read_text()
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.executescript(schema)
|
||||
conn.commit()
|
||||
|
||||
tables = {
|
||||
r[0]
|
||||
for r in conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table'"
|
||||
).fetchall()
|
||||
}
|
||||
assert "documents" in tables
|
||||
assert "page_chunks" in tables
|
||||
|
||||
|
||||
def test_documents_table_has_required_columns(tmp_path):
|
||||
db_path = str(tmp_path / "test.db")
|
||||
schema = Path("migrations/001_initial_schema.sql").read_text()
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.executescript(schema)
|
||||
|
||||
cols = {r[1] for r in conn.execute("PRAGMA table_info(documents)").fetchall()}
|
||||
assert {
|
||||
"id",
|
||||
"title",
|
||||
"file_path",
|
||||
"status",
|
||||
"task_id",
|
||||
"page_count",
|
||||
"created_at",
|
||||
} <= cols
|
||||
|
||||
|
||||
def test_page_chunks_foreign_key_cascades(tmp_path):
|
||||
db_path = str(tmp_path / "test.db")
|
||||
schema = Path("migrations/001_initial_schema.sql").read_text()
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.executescript(schema)
|
||||
conn.execute("PRAGMA foreign_keys = ON")
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO documents(id, title, file_path, status) VALUES ('d1','Book','path.pdf','ready')"
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO page_chunks(doc_id, page_number, text, source, word_count) VALUES ('d1',1,'text','text_layer',1)"
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
conn.execute("DELETE FROM documents WHERE id='d1'")
|
||||
conn.commit()
|
||||
|
||||
count = conn.execute(
|
||||
"SELECT COUNT(*) FROM page_chunks WHERE doc_id='d1'"
|
||||
).fetchone()[0]
|
||||
assert count == 0
|
||||
Loading…
Reference in a new issue