fix: guard extract_digest_links db.close(), remove domain-in-path false positive, add hint assertion

This commit is contained in:
pyr0ball 2026-03-20 07:04:24 -07:00
parent aee8038be9
commit 4873201242
2 changed files with 15 additions and 9 deletions

View file

@ -100,7 +100,7 @@ def _score_url(url: str) -> int:
hostname = (parsed.hostname or '').lower() hostname = (parsed.hostname or '').lower()
path = parsed.path.lower() path = parsed.path.lower()
for domain in _JOB_DOMAINS: for domain in _JOB_DOMAINS:
if domain in hostname or domain in path: if domain in hostname:
return 2 return 2
for seg in _JOB_PATH_SEGMENTS: for seg in _JOB_PATH_SEGMENTS:
if f'/{seg}/' in path or path.startswith(f'/{seg}'): if f'/{seg}/' in path or path.startswith(f'/{seg}'):
@ -563,14 +563,16 @@ def add_to_digest_queue(body: DigestQueueBody):
@app.post("/api/digest-queue/{digest_id}/extract-links") @app.post("/api/digest-queue/{digest_id}/extract-links")
def extract_digest_links(digest_id: int): def extract_digest_links(digest_id: int):
db = _get_db() db = _get_db()
row = db.execute( try:
"""SELECT jc.body row = db.execute(
FROM digest_queue dq """SELECT jc.body
JOIN job_contacts jc ON jc.id = dq.job_contact_id FROM digest_queue dq
WHERE dq.id = ?""", JOIN job_contacts jc ON jc.id = dq.job_contact_id
(digest_id,), WHERE dq.id = ?""",
).fetchone() (digest_id,),
db.close() ).fetchone()
finally:
db.close()
if not row: if not row:
raise HTTPException(404, "Digest entry not found") raise HTTPException(404, "Digest entry not found")
return {"links": _extract_links(row["body"] or "")} return {"links": _extract_links(row["body"] or "")}

View file

@ -138,6 +138,10 @@ def test_digest_extract_links(client, tmp_db):
assert len(lever_links) == 1 assert len(lever_links) == 1
assert lever_links[0]["score"] == 2 assert lever_links[0]["score"] == 2
# Each link must have a hint key (may be empty string for links at start of body)
for link in links:
assert "hint" in link
def test_digest_extract_links_filters_trackers(client, tmp_db): def test_digest_extract_links_filters_trackers(client, tmp_db):
entry_id = _add_digest_entry(tmp_db) entry_id = _add_digest_entry(tmp_db)