"""CLI: ingest a log file or corpus directory into the Turnstone SQLite database.""" from __future__ import annotations import logging import sys from pathlib import Path logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") sys.path.insert(0, str(Path(__file__).parent.parent)) from app.ingest.pipeline import ingest, ingest_file if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: ingest_corpus.py [db_path]", file=sys.stderr) sys.exit(1) target = Path(sys.argv[1]) db_path = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("data/turnstone.db") db_path.parent.mkdir(parents=True, exist_ok=True) print(f"Ingesting {target} → {db_path}") if target.is_file(): stats = ingest_file(target, db_path) elif target.is_dir(): stats = ingest(target, db_path) else: print(f"Error: {target} is not a file or directory", file=sys.stderr) sys.exit(1) total = sum(stats.values()) for fname, count in sorted(stats.items()): print(f" {fname}: {count:,}") print(f" TOTAL: {total:,} entries")