fix: guard extract_digest_links db.close(), remove domain-in-path false positive, add hint assertion
This commit is contained in:
parent
182ab789df
commit
5bb3674fea
2 changed files with 15 additions and 9 deletions
20
dev-api.py
20
dev-api.py
|
|
@ -100,7 +100,7 @@ def _score_url(url: str) -> int:
|
||||||
hostname = (parsed.hostname or '').lower()
|
hostname = (parsed.hostname or '').lower()
|
||||||
path = parsed.path.lower()
|
path = parsed.path.lower()
|
||||||
for domain in _JOB_DOMAINS:
|
for domain in _JOB_DOMAINS:
|
||||||
if domain in hostname or domain in path:
|
if domain in hostname:
|
||||||
return 2
|
return 2
|
||||||
for seg in _JOB_PATH_SEGMENTS:
|
for seg in _JOB_PATH_SEGMENTS:
|
||||||
if f'/{seg}/' in path or path.startswith(f'/{seg}'):
|
if f'/{seg}/' in path or path.startswith(f'/{seg}'):
|
||||||
|
|
@ -563,14 +563,16 @@ def add_to_digest_queue(body: DigestQueueBody):
|
||||||
@app.post("/api/digest-queue/{digest_id}/extract-links")
|
@app.post("/api/digest-queue/{digest_id}/extract-links")
|
||||||
def extract_digest_links(digest_id: int):
|
def extract_digest_links(digest_id: int):
|
||||||
db = _get_db()
|
db = _get_db()
|
||||||
row = db.execute(
|
try:
|
||||||
"""SELECT jc.body
|
row = db.execute(
|
||||||
FROM digest_queue dq
|
"""SELECT jc.body
|
||||||
JOIN job_contacts jc ON jc.id = dq.job_contact_id
|
FROM digest_queue dq
|
||||||
WHERE dq.id = ?""",
|
JOIN job_contacts jc ON jc.id = dq.job_contact_id
|
||||||
(digest_id,),
|
WHERE dq.id = ?""",
|
||||||
).fetchone()
|
(digest_id,),
|
||||||
db.close()
|
).fetchone()
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
if not row:
|
if not row:
|
||||||
raise HTTPException(404, "Digest entry not found")
|
raise HTTPException(404, "Digest entry not found")
|
||||||
return {"links": _extract_links(row["body"] or "")}
|
return {"links": _extract_links(row["body"] or "")}
|
||||||
|
|
|
||||||
|
|
@ -138,6 +138,10 @@ def test_digest_extract_links(client, tmp_db):
|
||||||
assert len(lever_links) == 1
|
assert len(lever_links) == 1
|
||||||
assert lever_links[0]["score"] == 2
|
assert lever_links[0]["score"] == 2
|
||||||
|
|
||||||
|
# Each link must have a hint key (may be empty string for links at start of body)
|
||||||
|
for link in links:
|
||||||
|
assert "hint" in link
|
||||||
|
|
||||||
|
|
||||||
def test_digest_extract_links_filters_trackers(client, tmp_db):
|
def test_digest_extract_links_filters_trackers(client, tmp_db):
|
||||||
entry_id = _add_digest_entry(tmp_db)
|
entry_id = _add_digest_entry(tmp_db)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue