feat(scheduler): add durability — re-queue surviving LLM tasks on startup

This commit is contained in:
pyr0ball 2026-03-15 04:24:11 -07:00
parent 1d9020c99a
commit dfd2f0214e
2 changed files with 76 additions and 0 deletions

View file

@ -91,6 +91,9 @@ class TaskScheduler:
except Exception: except Exception:
self._available_vram = 999.0 self._available_vram = 999.0
# Durability: reload surviving 'queued' LLM tasks from prior run
self._load_queued_tasks()
def enqueue(self, task_id: int, task_type: str, job_id: int, def enqueue(self, task_id: int, task_type: str, job_id: int,
params: Optional[str]) -> None: params: Optional[str]) -> None:
"""Add an LLM task to the scheduler queue. """Add an LLM task to the scheduler queue.
@ -186,6 +189,26 @@ class TaskScheduler:
self._reserved_vram -= self._budgets.get(task_type, 0.0) self._reserved_vram -= self._budgets.get(task_type, 0.0)
self._wake.set() self._wake.set()
def _load_queued_tasks(self) -> None:
"""Load pre-existing queued LLM tasks from SQLite into deques (called once in __init__)."""
llm_types = sorted(LLM_TASK_TYPES) # sorted for deterministic SQL params in logs
placeholders = ",".join("?" * len(llm_types))
conn = sqlite3.connect(self._db_path)
rows = conn.execute(
f"SELECT id, task_type, job_id, params FROM background_tasks"
f" WHERE status='queued' AND task_type IN ({placeholders})"
f" ORDER BY created_at ASC",
llm_types,
).fetchall()
conn.close()
for row_id, task_type, job_id, params in rows:
q = self._queues.setdefault(task_type, deque())
q.append(TaskSpec(row_id, job_id, params))
if rows:
logger.info("Scheduler: resumed %d queued task(s) from prior run", len(rows))
# ── Singleton ───────────────────────────────────────────────────────────────── # ── Singleton ─────────────────────────────────────────────────────────────────

View file

@ -371,3 +371,56 @@ def test_reset_scheduler_cleans_up(tmp_db):
# After reset, get_scheduler creates a fresh instance # After reset, get_scheduler creates a fresh instance
s2 = get_scheduler(tmp_db, _noop_run_task) s2 = get_scheduler(tmp_db, _noop_run_task)
assert s2 is not s assert s2 is not s
def test_durability_loads_queued_llm_tasks_on_startup(tmp_db):
"""Scheduler loads pre-existing queued LLM tasks into deques at construction."""
from scripts.db import insert_task
# Pre-insert queued rows simulating a prior run
id1, _ = insert_task(tmp_db, "cover_letter", 1)
id2, _ = insert_task(tmp_db, "company_research", 2)
s = TaskScheduler(tmp_db, _noop_run_task)
assert len(s._queues.get("cover_letter", [])) == 1
assert s._queues["cover_letter"][0].id == id1
assert len(s._queues.get("company_research", [])) == 1
assert s._queues["company_research"][0].id == id2
def test_durability_excludes_non_llm_queued_tasks(tmp_db):
"""Non-LLM queued tasks are not loaded into the scheduler deques."""
from scripts.db import insert_task
insert_task(tmp_db, "discovery", 0)
insert_task(tmp_db, "email_sync", 0)
s = TaskScheduler(tmp_db, _noop_run_task)
assert "discovery" not in s._queues or len(s._queues["discovery"]) == 0
assert "email_sync" not in s._queues or len(s._queues["email_sync"]) == 0
def test_durability_preserves_fifo_order(tmp_db):
"""Queued tasks are loaded in created_at (FIFO) order."""
conn = sqlite3.connect(tmp_db)
# Insert with explicit timestamps to control order
conn.execute(
"INSERT INTO background_tasks (task_type, job_id, params, status, created_at)"
" VALUES (?,?,?,?,?)", ("cover_letter", 1, None, "queued", "2026-01-01 10:00:00")
)
conn.execute(
"INSERT INTO background_tasks (task_type, job_id, params, status, created_at)"
" VALUES (?,?,?,?,?)", ("cover_letter", 2, None, "queued", "2026-01-01 09:00:00")
)
conn.commit()
ids = [r[0] for r in conn.execute(
"SELECT id FROM background_tasks ORDER BY created_at ASC"
).fetchall()]
conn.close()
s = TaskScheduler(tmp_db, _noop_run_task)
loaded_ids = [t.id for t in s._queues["cover_letter"]]
assert loaded_ids == ids