avocet/app/api.py

347 lines
12 KiB
Python

"""Avocet — FastAPI REST layer.
JSONL read/write helpers and FastAPI app instance.
Endpoints and static file serving are added in subsequent tasks.
"""
from __future__ import annotations
import hashlib
import json
import yaml
from pathlib import Path
from datetime import datetime, timezone
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
_ROOT = Path(__file__).parent.parent
_DATA_DIR: Path = _ROOT / "data" # overridable in tests via set_data_dir()
_CONFIG_DIR: Path | None = None # None = use real path
def set_data_dir(path: Path) -> None:
"""Override data directory — used by tests."""
global _DATA_DIR
_DATA_DIR = path
def set_config_dir(path: Path | None) -> None:
"""Override config directory — used by tests."""
global _CONFIG_DIR
_CONFIG_DIR = path
def _config_file() -> Path:
if _CONFIG_DIR is not None:
return _CONFIG_DIR / "label_tool.yaml"
return _ROOT / "config" / "label_tool.yaml"
def reset_last_action() -> None:
"""Reset undo state — used by tests."""
global _last_action
_last_action = None
def _queue_file() -> Path:
return _DATA_DIR / "email_label_queue.jsonl"
def _score_file() -> Path:
return _DATA_DIR / "email_score.jsonl"
def _discarded_file() -> Path:
return _DATA_DIR / "discarded.jsonl"
def _read_jsonl(path: Path) -> list[dict]:
if not path.exists():
return []
lines = path.read_text(encoding="utf-8").splitlines()
return [json.loads(l) for l in lines if l.strip()]
def _write_jsonl(path: Path, records: list[dict]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
text = "\n".join(json.dumps(r, ensure_ascii=False) for r in records)
path.write_text(text + "\n" if records else "", encoding="utf-8")
def _append_jsonl(path: Path, record: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
def _item_id(item: dict) -> str:
"""Stable content-hash ID — matches label_tool.py _entry_key dedup logic."""
key = (item.get("subject", "") + (item.get("body", "") or "")[:100])
return hashlib.md5(key.encode("utf-8", errors="replace")).hexdigest()
def _normalize(item: dict) -> dict:
"""Normalize JSONL item to the Vue frontend schema.
label_tool.py stores: subject, body, from_addr, date, account (no id).
The Vue app expects: id, subject, body, from, date, source.
Both old (from_addr/account) and new (from/source) field names are handled.
"""
return {
"id": item.get("id") or _item_id(item),
"subject": item.get("subject", ""),
"body": item.get("body", ""),
"from": item.get("from") or item.get("from_addr", ""),
"date": item.get("date", ""),
"source": item.get("source") or item.get("account", ""),
}
app = FastAPI(title="Avocet API")
# In-memory last-action store (single user, local tool — in-memory is fine)
_last_action: dict | None = None
@app.get("/api/queue")
def get_queue(limit: int = Query(default=10, ge=1, le=50)):
items = _read_jsonl(_queue_file())
return {"items": [_normalize(x) for x in items[:limit]], "total": len(items)}
class LabelRequest(BaseModel):
id: str
label: str
@app.post("/api/label")
def post_label(req: LabelRequest):
global _last_action
items = _read_jsonl(_queue_file())
match = next((x for x in items if _normalize(x)["id"] == req.id), None)
if not match:
raise HTTPException(404, f"Item {req.id!r} not found in queue")
record = {**match, "label": req.label,
"labeled_at": datetime.now(timezone.utc).isoformat()}
_append_jsonl(_score_file(), record)
_write_jsonl(_queue_file(), [x for x in items if _normalize(x)["id"] != req.id])
_last_action = {"type": "label", "item": match, "label": req.label}
return {"ok": True}
class SkipRequest(BaseModel):
id: str
@app.post("/api/skip")
def post_skip(req: SkipRequest):
global _last_action
items = _read_jsonl(_queue_file())
match = next((x for x in items if _normalize(x)["id"] == req.id), None)
if not match:
raise HTTPException(404, f"Item {req.id!r} not found in queue")
reordered = [x for x in items if _normalize(x)["id"] != req.id] + [match]
_write_jsonl(_queue_file(), reordered)
_last_action = {"type": "skip", "item": match}
return {"ok": True}
class DiscardRequest(BaseModel):
id: str
@app.post("/api/discard")
def post_discard(req: DiscardRequest):
global _last_action
items = _read_jsonl(_queue_file())
match = next((x for x in items if _normalize(x)["id"] == req.id), None)
if not match:
raise HTTPException(404, f"Item {req.id!r} not found in queue")
record = {**match, "label": "__discarded__",
"discarded_at": datetime.now(timezone.utc).isoformat()}
_append_jsonl(_discarded_file(), record)
_write_jsonl(_queue_file(), [x for x in items if _normalize(x)["id"] != req.id])
_last_action = {"type": "discard", "item": match}
return {"ok": True}
@app.delete("/api/label/undo")
def delete_undo():
global _last_action
if not _last_action:
raise HTTPException(404, "No action to undo")
action = _last_action
item = action["item"] # always the original clean queue item
# Perform file operations FIRST — only clear _last_action on success
if action["type"] == "label":
records = _read_jsonl(_score_file())
if not records:
raise HTTPException(409, "Score file is empty — cannot undo label")
_write_jsonl(_score_file(), records[:-1])
items = _read_jsonl(_queue_file())
_write_jsonl(_queue_file(), [item] + items)
elif action["type"] == "discard":
records = _read_jsonl(_discarded_file())
if not records:
raise HTTPException(409, "Discarded file is empty — cannot undo discard")
_write_jsonl(_discarded_file(), records[:-1])
items = _read_jsonl(_queue_file())
_write_jsonl(_queue_file(), [item] + items)
elif action["type"] == "skip":
items = _read_jsonl(_queue_file())
item_id = _normalize(item)["id"]
items = [item] + [x for x in items if _normalize(x)["id"] != item_id]
_write_jsonl(_queue_file(), items)
# Clear AFTER all file operations succeed
_last_action = None
return {"undone": {"type": action["type"], "item": _normalize(item)}}
# Label metadata — 10 labels matching label_tool.py
_LABEL_META = [
{"name": "interview_scheduled", "emoji": "\U0001f4c5", "color": "#4CAF50", "key": "1"},
{"name": "offer_received", "emoji": "\U0001f389", "color": "#2196F3", "key": "2"},
{"name": "rejected", "emoji": "\u274c", "color": "#F44336", "key": "3"},
{"name": "positive_response", "emoji": "\U0001f44d", "color": "#FF9800", "key": "4"},
{"name": "survey_received", "emoji": "\U0001f4cb", "color": "#9C27B0", "key": "5"},
{"name": "neutral", "emoji": "\u2b1c", "color": "#607D8B", "key": "6"},
{"name": "event_rescheduled", "emoji": "\U0001f504", "color": "#FF5722", "key": "7"},
{"name": "digest", "emoji": "\U0001f4f0", "color": "#00BCD4", "key": "8"},
{"name": "new_lead", "emoji": "\U0001f91d", "color": "#009688", "key": "9"},
{"name": "hired", "emoji": "\U0001f38a", "color": "#FFC107", "key": "h"},
]
@app.get("/api/config/labels")
def get_labels():
return _LABEL_META
@app.get("/api/config")
def get_config():
f = _config_file()
if not f.exists():
return {"accounts": [], "max_per_account": 500}
raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
return {"accounts": raw.get("accounts", []), "max_per_account": raw.get("max_per_account", 500)}
class ConfigPayload(BaseModel):
accounts: list[dict]
max_per_account: int = 500
@app.post("/api/config")
def post_config(payload: ConfigPayload):
f = _config_file()
f.parent.mkdir(parents=True, exist_ok=True)
tmp = f.with_suffix(".tmp")
tmp.write_text(yaml.dump(payload.model_dump(), allow_unicode=True, sort_keys=False),
encoding="utf-8")
tmp.rename(f)
return {"ok": True}
@app.get("/api/stats")
def get_stats():
records = _read_jsonl(_score_file())
counts: dict[str, int] = {}
for r in records:
lbl = r.get("label", "")
if lbl:
counts[lbl] = counts.get(lbl, 0) + 1
return {
"total": len(records),
"counts": counts,
"score_file_bytes": _score_file().stat().st_size if _score_file().exists() else 0,
}
@app.get("/api/stats/download")
def download_stats():
from fastapi.responses import FileResponse
if not _score_file().exists():
raise HTTPException(404, "No score file")
return FileResponse(
str(_score_file()),
filename="email_score.jsonl",
media_type="application/jsonlines",
headers={"Content-Disposition": 'attachment; filename="email_score.jsonl"'},
)
class AccountTestRequest(BaseModel):
account: dict
@app.post("/api/accounts/test")
def test_account(req: AccountTestRequest):
from app.imap_fetch import test_connection
ok, message, count = test_connection(req.account)
return {"ok": ok, "message": message, "count": count}
from fastapi.responses import StreamingResponse
@app.get("/api/fetch/stream")
def fetch_stream(
accounts: str = Query(default=""),
days_back: int = Query(default=90, ge=1, le=365),
limit: int = Query(default=150, ge=1, le=1000),
mode: str = Query(default="wide"),
):
from app.imap_fetch import fetch_account_stream
selected_names = {n.strip() for n in accounts.split(",") if n.strip()}
config = get_config() # reuse existing endpoint logic
selected = [a for a in config["accounts"] if a.get("name") in selected_names]
def generate():
known_keys = {_item_id(x) for x in _read_jsonl(_queue_file())}
total_added = 0
for acc in selected:
try:
batch_emails: list[dict] = []
for event in fetch_account_stream(acc, days_back, limit, known_keys):
if event["type"] == "done":
batch_emails = event.pop("emails", [])
total_added += event["added"]
yield f"data: {json.dumps(event)}\n\n"
# Write new emails to queue after each account
if batch_emails:
existing = _read_jsonl(_queue_file())
_write_jsonl(_queue_file(), existing + batch_emails)
except Exception as exc:
error_event = {"type": "error", "account": acc.get("name", "?"),
"message": str(exc)}
yield f"data: {json.dumps(error_event)}\n\n"
queue_size = len(_read_jsonl(_queue_file()))
complete = {"type": "complete", "total_added": total_added, "queue_size": queue_size}
yield f"data: {json.dumps(complete)}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"})
# Static SPA — MUST be last (catches all unmatched paths)
_DIST = _ROOT / "web" / "dist"
if _DIST.exists():
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
# Serve index.html with no-cache so browsers always fetch fresh HTML after rebuilds.
# Hashed assets (/assets/index-abc123.js) can be cached forever — they change names
# when content changes (standard Vite cache-busting strategy).
_NO_CACHE = {"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache"}
@app.get("/")
def get_spa_root():
return FileResponse(str(_DIST / "index.html"), headers=_NO_CACHE)
app.mount("/", StaticFiles(directory=str(_DIST), html=True), name="spa")