fix: guard extract_digest_links db.close(), remove domain-in-path false positive, add hint assertion

This commit is contained in:
pyr0ball 2026-03-20 07:04:24 -07:00
parent 182ab789df
commit 5bb3674fea
2 changed files with 15 additions and 9 deletions

View file

@ -100,7 +100,7 @@ def _score_url(url: str) -> int:
hostname = (parsed.hostname or '').lower()
path = parsed.path.lower()
for domain in _JOB_DOMAINS:
if domain in hostname or domain in path:
if domain in hostname:
return 2
for seg in _JOB_PATH_SEGMENTS:
if f'/{seg}/' in path or path.startswith(f'/{seg}'):
@ -563,6 +563,7 @@ def add_to_digest_queue(body: DigestQueueBody):
@app.post("/api/digest-queue/{digest_id}/extract-links")
def extract_digest_links(digest_id: int):
db = _get_db()
try:
row = db.execute(
"""SELECT jc.body
FROM digest_queue dq
@ -570,6 +571,7 @@ def extract_digest_links(digest_id: int):
WHERE dq.id = ?""",
(digest_id,),
).fetchone()
finally:
db.close()
if not row:
raise HTTPException(404, "Digest entry not found")

View file

@ -138,6 +138,10 @@ def test_digest_extract_links(client, tmp_db):
assert len(lever_links) == 1
assert lever_links[0]["score"] == 2
# Each link must have a hint key (may be empty string for links at start of body)
for link in links:
assert "hint" in link
def test_digest_extract_links_filters_trackers(client, tmp_db):
entry_id = _add_digest_entry(tmp_db)