"""CLI: ingest a corpus directory into the Turnstone SQLite database.""" from __future__ import annotations import logging import sys from pathlib import Path logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") # Allow running from repo root sys.path.insert(0, str(Path(__file__).parent.parent)) from app.ingest.pipeline import ingest if __name__ == "__main__": corpus_dir = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("corpus/raw") db_path = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("data/turnstone.db") pattern_file = Path("patterns/default.yaml") db_path.parent.mkdir(parents=True, exist_ok=True) print(f"Ingesting {corpus_dir} → {db_path}") stats = ingest(corpus_dir, db_path, pattern_file) total = sum(stats.values()) for fname, count in sorted(stats.items()): print(f" {fname}: {count:,}") print(f" TOTAL: {total:,} entries")