From 167d7351e3c8c76ba7b1424f3c820e44eef5c967 Mon Sep 17 00:00:00 2001 From: pyr0ball Date: Fri, 1 May 2026 18:48:14 -0700 Subject: [PATCH] feat: extract label queue API into app/data/label.py --- app/api.py | 183 +------------------------------- app/data/__init__.py | 0 app/data/label.py | 222 +++++++++++++++++++++++++++++++++++++++ tests/test_api.py | 8 ++ tests/test_data_label.py | 219 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 454 insertions(+), 178 deletions(-) create mode 100644 app/data/__init__.py create mode 100644 app/data/label.py create mode 100644 tests/test_data_label.py diff --git a/app/api.py b/app/api.py index bb09886..3303bf2 100644 --- a/app/api.py +++ b/app/api.py @@ -142,6 +142,9 @@ def _normalize(item: dict) -> dict: app = FastAPI(title="Avocet API") +from app.data.label import router as label_router +app.include_router(label_router, prefix="/api") + from app.sft import router as sft_router app.include_router(sft_router, prefix="/api/sft") @@ -162,183 +165,6 @@ app.include_router(style_router, prefix="/api/style") _last_action: dict | None = None -@app.get("/api/queue") -def get_queue(limit: int = Query(default=10, ge=1, le=50)): - items = _read_jsonl(_queue_file()) - return {"items": [_normalize(x) for x in items[:limit]], "total": len(items)} - - -class LabelRequest(BaseModel): - id: str - label: str - - -@app.post("/api/label") -def post_label(req: LabelRequest): - global _last_action - items = _read_jsonl(_queue_file()) - match = next((x for x in items if _normalize(x)["id"] == req.id), None) - if not match: - raise HTTPException(404, f"Item {req.id!r} not found in queue") - record = {**match, "label": req.label, - "labeled_at": datetime.now(timezone.utc).isoformat()} - _append_jsonl(_score_file(), record) - _write_jsonl(_queue_file(), [x for x in items if _normalize(x)["id"] != req.id]) - _last_action = {"type": "label", "item": match, "label": req.label} - return {"ok": True} - - -class SkipRequest(BaseModel): - id: str - - -@app.post("/api/skip") -def post_skip(req: SkipRequest): - global _last_action - items = _read_jsonl(_queue_file()) - match = next((x for x in items if _normalize(x)["id"] == req.id), None) - if not match: - raise HTTPException(404, f"Item {req.id!r} not found in queue") - reordered = [x for x in items if _normalize(x)["id"] != req.id] + [match] - _write_jsonl(_queue_file(), reordered) - _last_action = {"type": "skip", "item": match} - return {"ok": True} - - -class DiscardRequest(BaseModel): - id: str - - -@app.post("/api/discard") -def post_discard(req: DiscardRequest): - global _last_action - items = _read_jsonl(_queue_file()) - match = next((x for x in items if _normalize(x)["id"] == req.id), None) - if not match: - raise HTTPException(404, f"Item {req.id!r} not found in queue") - record = {**match, "label": "__discarded__", - "discarded_at": datetime.now(timezone.utc).isoformat()} - _append_jsonl(_discarded_file(), record) - _write_jsonl(_queue_file(), [x for x in items if _normalize(x)["id"] != req.id]) - _last_action = {"type": "discard", "item": match} - return {"ok": True} - - -@app.delete("/api/label/undo") -def delete_undo(): - global _last_action - if not _last_action: - raise HTTPException(404, "No action to undo") - action = _last_action - item = action["item"] # always the original clean queue item - - # Perform file operations FIRST — only clear _last_action on success - if action["type"] == "label": - records = _read_jsonl(_score_file()) - if not records: - raise HTTPException(409, "Score file is empty — cannot undo label") - _write_jsonl(_score_file(), records[:-1]) - items = _read_jsonl(_queue_file()) - _write_jsonl(_queue_file(), [item] + items) - elif action["type"] == "discard": - records = _read_jsonl(_discarded_file()) - if not records: - raise HTTPException(409, "Discarded file is empty — cannot undo discard") - _write_jsonl(_discarded_file(), records[:-1]) - items = _read_jsonl(_queue_file()) - _write_jsonl(_queue_file(), [item] + items) - elif action["type"] == "skip": - items = _read_jsonl(_queue_file()) - item_id = _normalize(item)["id"] - items = [item] + [x for x in items if _normalize(x)["id"] != item_id] - _write_jsonl(_queue_file(), items) - - # Clear AFTER all file operations succeed - _last_action = None - return {"undone": {"type": action["type"], "item": _normalize(item)}} - - -# Label metadata — 10 labels matching label_tool.py -_LABEL_META = [ - {"name": "interview_scheduled", "emoji": "\U0001f4c5", "color": "#4CAF50", "key": "1"}, - {"name": "offer_received", "emoji": "\U0001f389", "color": "#2196F3", "key": "2"}, - {"name": "rejected", "emoji": "\u274c", "color": "#F44336", "key": "3"}, - {"name": "positive_response", "emoji": "\U0001f44d", "color": "#FF9800", "key": "4"}, - {"name": "survey_received", "emoji": "\U0001f4cb", "color": "#9C27B0", "key": "5"}, - {"name": "neutral", "emoji": "\u2b1c", "color": "#607D8B", "key": "6"}, - {"name": "event_rescheduled", "emoji": "\U0001f504", "color": "#FF5722", "key": "7"}, - {"name": "digest", "emoji": "\U0001f4f0", "color": "#00BCD4", "key": "8"}, - {"name": "new_lead", "emoji": "\U0001f91d", "color": "#009688", "key": "9"}, - {"name": "hired", "emoji": "\U0001f38a", "color": "#FFC107", "key": "h"}, -] - - -@app.get("/api/config/labels") -def get_labels(): - return _LABEL_META - - -@app.get("/api/config") -def get_config(): - f = _config_file() - if not f.exists(): - return {"accounts": [], "max_per_account": 500} - raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {} - return {"accounts": raw.get("accounts", []), "max_per_account": raw.get("max_per_account", 500)} - - -class ConfigPayload(BaseModel): - accounts: list[dict] - max_per_account: int = 500 - - -@app.post("/api/config") -def post_config(payload: ConfigPayload): - f = _config_file() - f.parent.mkdir(parents=True, exist_ok=True) - tmp = f.with_suffix(".tmp") - tmp.write_text(yaml.dump(payload.model_dump(), allow_unicode=True, sort_keys=False), - encoding="utf-8") - tmp.rename(f) - return {"ok": True} - - -@app.get("/api/stats") -def get_stats(): - records = _read_jsonl(_score_file()) - counts: dict[str, int] = {} - for r in records: - lbl = r.get("label", "") - if lbl: - counts[lbl] = counts.get(lbl, 0) + 1 - benchmark_results: dict = {} - benchmark_path = _DATA_DIR / "benchmark_results.json" - if benchmark_path.exists(): - try: - benchmark_results = json.loads(benchmark_path.read_text(encoding="utf-8")) - except Exception: - pass - return { - "total": len(records), - "counts": counts, - "score_file_bytes": _score_file().stat().st_size if _score_file().exists() else 0, - "benchmark_results": benchmark_results, - } - - -@app.get("/api/stats/download") -def download_stats(): - from fastapi.responses import FileResponse - if not _score_file().exists(): - raise HTTPException(404, "No score file") - return FileResponse( - str(_score_file()), - filename="email_score.jsonl", - media_type="application/jsonlines", - headers={"Content-Disposition": 'attachment; filename="email_score.jsonl"'}, - ) - - class AccountTestRequest(BaseModel): account: dict @@ -575,9 +401,10 @@ def fetch_stream( mode: str = Query(default="wide"), ): from app.imap_fetch import fetch_account_stream + from app.data.label import get_config selected_names = {n.strip() for n in accounts.split(",") if n.strip()} - config = get_config() # reuse existing endpoint logic + config = get_config() # reuse label module config logic selected = [a for a in config["accounts"] if a.get("name") in selected_names] def generate(): diff --git a/app/data/__init__.py b/app/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/data/label.py b/app/data/label.py new file mode 100644 index 0000000..cb140ad --- /dev/null +++ b/app/data/label.py @@ -0,0 +1,222 @@ +"""Avocet -- label queue API. + +All label/skip/discard/undo/stats/config endpoints. +Extracted from app/api.py as part of the v2 domain split. +""" +from __future__ import annotations + +import hashlib +import json +import yaml +from datetime import datetime, timezone +from pathlib import Path + +from fastapi import APIRouter, HTTPException, Query +from fastapi.responses import FileResponse +from pydantic import BaseModel + +from app.utils import append_jsonl, read_jsonl, write_jsonl + +_ROOT = Path(__file__).parent.parent.parent +_DATA_DIR: Path = _ROOT / "data" +_CONFIG_DIR: Path | None = None +_last_action: dict | None = None + +router = APIRouter() + + +def set_data_dir(path: Path) -> None: + global _DATA_DIR + _DATA_DIR = path + +def set_config_dir(path: Path | None) -> None: + global _CONFIG_DIR + _CONFIG_DIR = path + +def reset_last_action() -> None: + global _last_action + _last_action = None + +def _config_file() -> Path: + if _CONFIG_DIR is not None: + return _CONFIG_DIR / "label_tool.yaml" + return _ROOT / "config" / "label_tool.yaml" + +def _queue_file() -> Path: + return _DATA_DIR / "email_label_queue.jsonl" + +def _score_file() -> Path: + return _DATA_DIR / "email_score.jsonl" + +def _discarded_file() -> Path: + return _DATA_DIR / "discarded.jsonl" + +def _item_id(item: dict) -> str: + key = (item.get("subject", "") + (item.get("body", "") or "")[:100]) + return hashlib.md5(key.encode("utf-8", errors="replace")).hexdigest() + +def _normalize(item: dict) -> dict: + return { + "id": item.get("id") or _item_id(item), + "subject": item.get("subject", ""), + "body": item.get("body", ""), + "from": item.get("from") or item.get("from_addr", ""), + "date": item.get("date", ""), + "source": item.get("source") or item.get("account", ""), + } + +_LABEL_META = [ + {"name": "interview_scheduled", "emoji": "\U0001f4c5", "color": "#4CAF50", "key": "1"}, + {"name": "offer_received", "emoji": "\U0001f389", "color": "#2196F3", "key": "2"}, + {"name": "rejected", "emoji": "❌", "color": "#F44336", "key": "3"}, + {"name": "positive_response", "emoji": "\U0001f44d", "color": "#FF9800", "key": "4"}, + {"name": "survey_received", "emoji": "\U0001f4cb", "color": "#9C27B0", "key": "5"}, + {"name": "neutral", "emoji": "⬜", "color": "#607D8B", "key": "6"}, + {"name": "event_rescheduled", "emoji": "\U0001f504", "color": "#FF5722", "key": "7"}, + {"name": "digest", "emoji": "\U0001f4f0", "color": "#00BCD4", "key": "8"}, + {"name": "new_lead", "emoji": "\U0001f91d", "color": "#009688", "key": "9"}, + {"name": "hired", "emoji": "\U0001f38a", "color": "#FFC107", "key": "h"}, +] + +@router.get("/queue") +def get_queue(limit: int = Query(default=10, ge=1, le=50)): + items = read_jsonl(_queue_file()) + return {"items": [_normalize(x) for x in items[:limit]], "total": len(items)} + +class LabelRequest(BaseModel): + id: str + label: str + +@router.post("/label") +def post_label(req: LabelRequest): + global _last_action + items = read_jsonl(_queue_file()) + match = next((x for x in items if _normalize(x)["id"] == req.id), None) + if not match: + raise HTTPException(404, f"Item {req.id!r} not found in queue") + record = {**match, "label": req.label, + "labeled_at": datetime.now(timezone.utc).isoformat()} + append_jsonl(_score_file(), record) + write_jsonl(_queue_file(), [x for x in items if _normalize(x)["id"] != req.id]) + _last_action = {"type": "label", "item": match, "label": req.label} + return {"ok": True} + +class SkipRequest(BaseModel): + id: str + +@router.post("/skip") +def post_skip(req: SkipRequest): + global _last_action + items = read_jsonl(_queue_file()) + match = next((x for x in items if _normalize(x)["id"] == req.id), None) + if not match: + raise HTTPException(404, f"Item {req.id!r} not found in queue") + reordered = [x for x in items if _normalize(x)["id"] != req.id] + [match] + write_jsonl(_queue_file(), reordered) + _last_action = {"type": "skip", "item": match} + return {"ok": True} + +class DiscardRequest(BaseModel): + id: str + +@router.post("/discard") +def post_discard(req: DiscardRequest): + global _last_action + items = read_jsonl(_queue_file()) + match = next((x for x in items if _normalize(x)["id"] == req.id), None) + if not match: + raise HTTPException(404, f"Item {req.id!r} not found in queue") + record = {**match, "label": "__discarded__", + "discarded_at": datetime.now(timezone.utc).isoformat()} + append_jsonl(_discarded_file(), record) + write_jsonl(_queue_file(), [x for x in items if _normalize(x)["id"] != req.id]) + _last_action = {"type": "discard", "item": match} + return {"ok": True} + +@router.delete("/label/undo") +def delete_undo(): + global _last_action + if not _last_action: + raise HTTPException(404, "No action to undo") + action = _last_action + item = action["item"] + if action["type"] == "label": + records = read_jsonl(_score_file()) + if not records: + raise HTTPException(409, "Score file is empty -- cannot undo label") + write_jsonl(_score_file(), records[:-1]) + items = read_jsonl(_queue_file()) + write_jsonl(_queue_file(), [item] + items) + elif action["type"] == "discard": + records = read_jsonl(_discarded_file()) + if not records: + raise HTTPException(409, "Discarded file is empty -- cannot undo discard") + write_jsonl(_discarded_file(), records[:-1]) + items = read_jsonl(_queue_file()) + write_jsonl(_queue_file(), [item] + items) + elif action["type"] == "skip": + items = read_jsonl(_queue_file()) + item_id = _normalize(item)["id"] + items = [item] + [x for x in items if _normalize(x)["id"] != item_id] + write_jsonl(_queue_file(), items) + _last_action = None + return {"undone": {"type": action["type"], "item": _normalize(item)}} + +@router.get("/config/labels") +def get_labels(): + return _LABEL_META + +@router.get("/config") +def get_config(): + f = _config_file() + if not f.exists(): + return {"accounts": [], "max_per_account": 500} + raw = yaml.safe_load(f.read_text(encoding="utf-8")) or {} + return {"accounts": raw.get("accounts", []), "max_per_account": raw.get("max_per_account", 500)} + +class ConfigPayload(BaseModel): + accounts: list[dict] + max_per_account: int = 500 + +@router.post("/config") +def post_config(payload: ConfigPayload): + f = _config_file() + f.parent.mkdir(parents=True, exist_ok=True) + tmp = f.with_suffix(".tmp") + tmp.write_text(yaml.dump(payload.model_dump(), allow_unicode=True, sort_keys=False), + encoding="utf-8") + tmp.rename(f) + return {"ok": True} + +@router.get("/stats") +def get_stats(): + records = read_jsonl(_score_file()) + counts: dict[str, int] = {} + for r in records: + lbl = r.get("label", "") + if lbl: + counts[lbl] = counts.get(lbl, 0) + 1 + benchmark_results: dict = {} + benchmark_path = _DATA_DIR / "benchmark_results.json" + if benchmark_path.exists(): + try: + benchmark_results = json.loads(benchmark_path.read_text(encoding="utf-8")) + except Exception: + pass + return { + "total": len(records), + "counts": counts, + "score_file_bytes": _score_file().stat().st_size if _score_file().exists() else 0, + "benchmark_results": benchmark_results, + } + +@router.get("/stats/download") +def download_stats(): + if not _score_file().exists(): + raise HTTPException(404, "No score file") + return FileResponse( + str(_score_file()), + filename="email_score.jsonl", + media_type="application/jsonlines", + headers={"Content-Disposition": 'attachment; filename="email_score.jsonl"'}, + ) diff --git a/tests/test_api.py b/tests/test_api.py index 2360b02..101a701 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -7,10 +7,15 @@ from app import api as api_module # noqa: F401 @pytest.fixture(autouse=True) def reset_globals(tmp_path): from app import api + from app.data import label as label_module api.set_data_dir(tmp_path) api.reset_last_action() + label_module.set_data_dir(tmp_path) + label_module.set_config_dir(tmp_path) + label_module.reset_last_action() yield api.reset_last_action() + label_module.reset_last_action() def test_import(): @@ -160,9 +165,12 @@ def test_config_labels_returns_metadata(client): def config_dir(tmp_path): """Give the API a writable config directory.""" from app import api as api_module + from app.data import label as label_module api_module.set_config_dir(tmp_path) + label_module.set_config_dir(tmp_path) yield tmp_path api_module.set_config_dir(None) # reset to default + label_module.set_config_dir(None) @pytest.fixture diff --git a/tests/test_data_label.py b/tests/test_data_label.py new file mode 100644 index 0000000..09d44d5 --- /dev/null +++ b/tests/test_data_label.py @@ -0,0 +1,219 @@ +"""Tests for app/data/label.py""" +import json +import pytest +import yaml +from fastapi.testclient import TestClient + + +@pytest.fixture(autouse=True) +def reset_globals(tmp_path): + from app.data import label as label_module + label_module.set_data_dir(tmp_path) + label_module.set_config_dir(tmp_path) + label_module.reset_last_action() + yield + label_module.reset_last_action() + + +@pytest.fixture +def client(): + from app.api import app + return TestClient(app) + + +@pytest.fixture +def queue_with_items(tmp_path): + from app.data import label as label_module + items = [ + {"id": f"id{i}", "subject": f"Subject {i}", "body": f"Body {i}", + "from": "test@example.com", "date": "2026-03-01", "source": "imap:test"} + for i in range(3) + ] + (label_module._DATA_DIR / "email_label_queue.jsonl").write_text( + "\n".join(json.dumps(x) for x in items) + "\n") + return items + + +def test_queue_returns_items(client, queue_with_items): + r = client.get("/api/queue?limit=2") + assert r.status_code == 200 + data = r.json() + assert len(data["items"]) == 2 + assert data["total"] == 3 + + +def test_queue_empty_when_no_file(client): + r = client.get("/api/queue") + assert r.status_code == 200 + assert r.json() == {"items": [], "total": 0} + + +def test_label_appends_to_score(client, queue_with_items): + from app.data import label as label_module + r = client.post("/api/label", json={"id": "id0", "label": "interview_scheduled"}) + assert r.status_code == 200 + records = label_module.read_jsonl(label_module._score_file()) + assert len(records) == 1 + assert records[0]["id"] == "id0" + assert records[0]["label"] == "interview_scheduled" + assert "labeled_at" in records[0] + + +def test_label_removes_from_queue(client, queue_with_items): + from app.data import label as label_module + client.post("/api/label", json={"id": "id0", "label": "rejected"}) + queue = label_module.read_jsonl(label_module._queue_file()) + assert not any(x["id"] == "id0" for x in queue) + + +def test_label_unknown_id_returns_404(client, queue_with_items): + r = client.post("/api/label", json={"id": "unknown", "label": "neutral"}) + assert r.status_code == 404 + + +def test_skip_moves_to_back(client, queue_with_items): + from app.data import label as label_module + r = client.post("/api/skip", json={"id": "id0"}) + assert r.status_code == 200 + queue = label_module.read_jsonl(label_module._queue_file()) + assert queue[-1]["id"] == "id0" + assert queue[0]["id"] == "id1" + + +def test_skip_unknown_id_returns_404(client, queue_with_items): + r = client.post("/api/skip", json={"id": "nope"}) + assert r.status_code == 404 + + +def test_discard_writes_to_discarded_file(client, queue_with_items): + from app.data import label as label_module + r = client.post("/api/discard", json={"id": "id1"}) + assert r.status_code == 200 + discarded = label_module.read_jsonl(label_module._discarded_file()) + assert len(discarded) == 1 + assert discarded[0]["id"] == "id1" + assert discarded[0]["label"] == "__discarded__" + + +def test_discard_removes_from_queue(client, queue_with_items): + from app.data import label as label_module + client.post("/api/discard", json={"id": "id1"}) + queue = label_module.read_jsonl(label_module._queue_file()) + assert not any(x["id"] == "id1" for x in queue) + + +def test_undo_label_removes_from_score(client, queue_with_items): + from app.data import label as label_module + client.post("/api/label", json={"id": "id0", "label": "neutral"}) + r = client.delete("/api/label/undo") + assert r.status_code == 200 + assert r.json()["undone"]["type"] == "label" + assert label_module.read_jsonl(label_module._score_file()) == [] + queue = label_module.read_jsonl(label_module._queue_file()) + assert queue[0]["id"] == "id0" + + +def test_undo_discard_removes_from_discarded(client, queue_with_items): + from app.data import label as label_module + client.post("/api/discard", json={"id": "id0"}) + r = client.delete("/api/label/undo") + assert r.status_code == 200 + assert label_module.read_jsonl(label_module._discarded_file()) == [] + + +def test_undo_skip_restores_to_front(client, queue_with_items): + from app.data import label as label_module + client.post("/api/skip", json={"id": "id0"}) + r = client.delete("/api/label/undo") + assert r.status_code == 200 + queue = label_module.read_jsonl(label_module._queue_file()) + assert queue[0]["id"] == "id0" + + +def test_undo_with_no_action_returns_404(client): + r = client.delete("/api/label/undo") + assert r.status_code == 404 + + +def test_config_labels_returns_10_labels(client): + r = client.get("/api/config/labels") + assert r.status_code == 200 + labels = r.json() + assert len(labels) == 10 + assert labels[0]["key"] == "1" + for lbl in labels: + assert "emoji" in lbl and "color" in lbl and "name" in lbl + + +def test_get_config_returns_empty_when_no_file(client): + r = client.get("/api/config") + assert r.status_code == 200 + data = r.json() + assert data["accounts"] == [] + assert data["max_per_account"] == 500 + + +def test_post_config_writes_yaml(client, tmp_path): + from app.data import label as label_module + label_module.set_config_dir(tmp_path) + payload = {"accounts": [{"name": "Test", "host": "imap.test.com", "port": 993, + "use_ssl": True, "username": "u@t.com", "password": "pw", + "folder": "INBOX", "days_back": 30}], "max_per_account": 200} + r = client.post("/api/config", json=payload) + assert r.status_code == 200 + assert r.json()["ok"] is True + saved = yaml.safe_load((tmp_path / "label_tool.yaml").read_text()) + assert saved["max_per_account"] == 200 + assert saved["accounts"][0]["name"] == "Test" + + +def test_get_config_round_trips(client, tmp_path): + from app.data import label as label_module + label_module.set_config_dir(tmp_path) + payload = {"accounts": [{"name": "R", "host": "h", "port": 993, "use_ssl": True, + "username": "u", "password": "p", "folder": "INBOX", + "days_back": 90}], "max_per_account": 300} + client.post("/api/config", json=payload) + r = client.get("/api/config") + data = r.json() + assert data["max_per_account"] == 300 + assert data["accounts"][0]["name"] == "R" + + +def test_stats_returns_counts(client, tmp_path): + from app.data import label as label_module + label_module.set_data_dir(tmp_path) + score_path = tmp_path / "email_score.jsonl" + records = [{"id": "a", "label": "interview_scheduled"}, + {"id": "b", "label": "interview_scheduled"}, + {"id": "c", "label": "rejected"}] + score_path.write_text("\n".join(json.dumps(r) for r in records) + "\n") + r = client.get("/api/stats") + assert r.status_code == 200 + data = r.json() + assert data["total"] == 3 + assert data["counts"]["interview_scheduled"] == 2 + assert data["counts"]["rejected"] == 1 + + +def test_stats_empty_when_no_file(client): + r = client.get("/api/stats") + assert r.status_code == 200 + data = r.json() + assert data["total"] == 0 + assert data["counts"] == {} + assert data["score_file_bytes"] == 0 + + +def test_stats_download_returns_file(client, tmp_path): + from app.data import label as label_module + label_module.set_data_dir(tmp_path) + (tmp_path / "email_score.jsonl").write_text(json.dumps({"id": "a", "label": "neutral"}) + "\n") + r = client.get("/api/stats/download") + assert r.status_code == 200 + assert "jsonlines" in r.headers.get("content-type", "") + + +def test_stats_download_404_when_no_file(client): + r = client.get("/api/stats/download") + assert r.status_code == 404