pagepiper/app/config.py
pyr0ball 8eef52a054 feat: per-user database isolation for cloud instances (closes #4)
Implements Option A from the issue design: each cloud user gets their own
data directory (DATA_DIR/users/{user_id}/) with separate pagepiper.db,
pagepiper_vecs.db, uploads/, and books/. Local mode is unchanged.

Key changes:
- app/startup.py: extract apply_migrations, reembed_docs,
  check_and_rebuild_vec_schema out of main.py (no circular imports)
- app/config.py: add LOCAL_USER_ID constant and user_data_dir() helper
- app/cloud_session.py: extract resolve_authenticated_user(); require_paid_tier
  now returns user_id (str) instead of None
- app/deps.py: add UserCtx dataclass (db_path, vec_db_path, data_dir,
  watch_dir, bm25) + get_user_ctx dependency; per-user startup guard runs
  migrations + vec schema check once per process per user
- app/main.py: _bm25 singleton -> _bm25_map dict keyed by user_id;
  add _get_bm25_for(); lifespan only runs startup checks in local mode
- app/api/library.py, search.py, chat.py: thread UserCtx through all
  endpoints; remove module-level _mark_bm25_dirty injection pattern
- tests/conftest.py: override get_user_ctx in addition to get_db so all
  endpoints get a consistent test UserCtx
2026-05-13 16:31:51 -07:00

63 lines
1.9 KiB
Python

"""Configuration from environment variables — no file parsing required for basic use."""
from __future__ import annotations
import os
from pathlib import Path
DATA_DIR = Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
DATA_DIR.mkdir(parents=True, exist_ok=True)
DB_PATH = str(DATA_DIR / "pagepiper.db")
VEC_DB_PATH = str(DATA_DIR / "pagepiper_vecs.db")
WATCH_DIR = Path(os.environ.get("PAGEPIPER_WATCH_DIR", "books"))
VEC_DIMENSIONS = int(os.environ.get("PAGEPIPER_EMBED_DIMS", "1024"))
LOCAL_USER_ID = "__local__"
def user_data_dir(user_id: str) -> Path:
"""Return (and create) the per-user data directory under DATA_DIR/users/."""
d = DATA_DIR / "users" / user_id
d.mkdir(parents=True, exist_ok=True)
return d
def get_llm_config() -> dict | None:
"""Build LLMRouter config from env vars.
Returns None only when neither PAGEPIPER_OLLAMA_URL nor CF_ORCH_URL is set.
CF_ORCH_URL alone is sufficient — the coordinator resolves the service URL at
allocation time so PAGEPIPER_OLLAMA_URL becomes optional.
"""
url = os.environ.get("PAGEPIPER_OLLAMA_URL", "").strip()
orch_url = os.environ.get("CF_ORCH_URL", "").strip()
if not url and not orch_url:
return None
chat_model = os.environ.get("PAGEPIPER_CHAT_MODEL", "mistral:7b")
_base_url = ""
if url:
_clean = url.rstrip("/")
_base_url = _clean if _clean.endswith("/v1") else _clean + "/v1"
backend: dict = {
"type": "openai_compat",
"base_url": _base_url,
"model": chat_model,
"embedding_model": os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text"),
"supports_images": False,
}
if orch_url:
backend["cf_orch"] = {
"service": os.environ.get("PAGEPIPER_ORCH_SERVICE", "ollama"),
"model_candidates": [chat_model],
"ttl_s": 3600,
}
return {
"fallback_order": ["ollama"],
"backends": {"ollama": backend},
}