Implements Option A from the issue design: each cloud user gets their own
data directory (DATA_DIR/users/{user_id}/) with separate pagepiper.db,
pagepiper_vecs.db, uploads/, and books/. Local mode is unchanged.
Key changes:
- app/startup.py: extract apply_migrations, reembed_docs,
check_and_rebuild_vec_schema out of main.py (no circular imports)
- app/config.py: add LOCAL_USER_ID constant and user_data_dir() helper
- app/cloud_session.py: extract resolve_authenticated_user(); require_paid_tier
now returns user_id (str) instead of None
- app/deps.py: add UserCtx dataclass (db_path, vec_db_path, data_dir,
watch_dir, bm25) + get_user_ctx dependency; per-user startup guard runs
migrations + vec schema check once per process per user
- app/main.py: _bm25 singleton -> _bm25_map dict keyed by user_id;
add _get_bm25_for(); lifespan only runs startup checks in local mode
- app/api/library.py, search.py, chat.py: thread UserCtx through all
endpoints; remove module-level _mark_bm25_dirty injection pattern
- tests/conftest.py: override get_user_ctx in addition to get_db so all
endpoints get a consistent test UserCtx
63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
"""Configuration from environment variables — no file parsing required for basic use."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
DATA_DIR = Path(os.environ.get("PAGEPIPER_DATA_DIR", "data"))
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
|
DB_PATH = str(DATA_DIR / "pagepiper.db")
|
|
VEC_DB_PATH = str(DATA_DIR / "pagepiper_vecs.db")
|
|
WATCH_DIR = Path(os.environ.get("PAGEPIPER_WATCH_DIR", "books"))
|
|
VEC_DIMENSIONS = int(os.environ.get("PAGEPIPER_EMBED_DIMS", "1024"))
|
|
|
|
LOCAL_USER_ID = "__local__"
|
|
|
|
|
|
def user_data_dir(user_id: str) -> Path:
|
|
"""Return (and create) the per-user data directory under DATA_DIR/users/."""
|
|
d = DATA_DIR / "users" / user_id
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
return d
|
|
|
|
|
|
def get_llm_config() -> dict | None:
|
|
"""Build LLMRouter config from env vars.
|
|
|
|
Returns None only when neither PAGEPIPER_OLLAMA_URL nor CF_ORCH_URL is set.
|
|
CF_ORCH_URL alone is sufficient — the coordinator resolves the service URL at
|
|
allocation time so PAGEPIPER_OLLAMA_URL becomes optional.
|
|
"""
|
|
url = os.environ.get("PAGEPIPER_OLLAMA_URL", "").strip()
|
|
orch_url = os.environ.get("CF_ORCH_URL", "").strip()
|
|
|
|
if not url and not orch_url:
|
|
return None
|
|
|
|
chat_model = os.environ.get("PAGEPIPER_CHAT_MODEL", "mistral:7b")
|
|
|
|
_base_url = ""
|
|
if url:
|
|
_clean = url.rstrip("/")
|
|
_base_url = _clean if _clean.endswith("/v1") else _clean + "/v1"
|
|
|
|
backend: dict = {
|
|
"type": "openai_compat",
|
|
"base_url": _base_url,
|
|
"model": chat_model,
|
|
"embedding_model": os.environ.get("PAGEPIPER_EMBED_MODEL", "nomic-embed-text"),
|
|
"supports_images": False,
|
|
}
|
|
|
|
if orch_url:
|
|
backend["cf_orch"] = {
|
|
"service": os.environ.get("PAGEPIPER_ORCH_SERVICE", "ollama"),
|
|
"model_candidates": [chat_model],
|
|
"ttl_s": 3600,
|
|
}
|
|
|
|
return {
|
|
"fallback_order": ["ollama"],
|
|
"backends": {"ollama": backend},
|
|
}
|