magpie/app/services/lemmy/discovery.py
Alan Weinstock 35c6e5f7bc feat(discovery): add Lemmy community search, fix dead request, add platform field
- Add app/services/lemmy/discovery.py: searches 5 major Lemmy instances,
  deduplicates by actor_id (AP canonical URL), skips NSFW communities,
  uses community@instance naming convention matching existing Lemmy client
- Update POST /subs/discover: accepts platforms[] param (default both),
  fans out to Reddit + Lemmy search, merges and sorts by subscribers
- Add platform field to all discovery result dicts (Reddit and Lemmy)
- Fix: remove dead _get() call left in search_subs() during earlier refactor
- Frontend: show platform badge on each discovery row, correct hyperlink
  format for Lemmy (https://{instance}/c/{community}), pass r.platform
  to upsertRules on import so Lemmy subs land in the lemmy platform slot
2026-06-13 22:23:31 -07:00

100 lines
3.3 KiB
Python

"""
Lemmy community discovery.
Searches major Lemmy instances for communities matching a keyword and
returns candidates for user review. Nothing is stored until the user
imports via PUT /subs/{community@instance}.
"""
from __future__ import annotations
import logging
from typing import Any
import httpx
from app.services.lemmy.client import KNOWN_INSTANCES, _DEFAULT_USER_AGENT
logger = logging.getLogger(__name__)
# Instances to fan search across (ordered by size)
_SEARCH_INSTANCES = ["lemmy.world", "lemmy.ml", "beehaw.org", "sh.itjust.works", "reddthat.com"]
_TIMEOUT = 10.0
def search_lemmy(
keyword: str,
limit: int = 15,
known_subs: set[str] | None = None,
) -> list[dict[str, Any]]:
"""
Search multiple Lemmy instances for communities matching keyword.
Deduplicates by actor_id (canonical AP URL) so federated communities
don't appear twice. Returns candidates sorted by subscriber count.
"""
seen_actor_ids: set[str] = set()
results: list[dict] = []
for instance in _SEARCH_INSTANCES:
try:
r = httpx.get(
f"https://{instance}/api/v3/search",
params={
"q": keyword,
"type_": "Communities",
"limit": 20,
"sort": "TopAll",
},
headers={"User-Agent": _DEFAULT_USER_AGENT},
timeout=_TIMEOUT,
follow_redirects=True,
)
if r.status_code != 200:
logger.debug("Lemmy search on %s returned %d", instance, r.status_code)
continue
communities = r.json().get("communities", [])
for cv in communities:
comm = cv.get("community", {})
counts = cv.get("counts", {})
actor_id: str = comm.get("actor_id", "")
if not actor_id or actor_id in seen_actor_ids:
continue
seen_actor_ids.add(actor_id)
# Derive home instance from actor_id (e.g. https://lemmy.world/c/selfhosted)
try:
home_instance = actor_id.split("/")[2]
except IndexError:
home_instance = instance
if comm.get("nsfw"):
continue
name = comm.get("name", "")
sub_key = f"{name}@{home_instance}"
subscribers = counts.get("subscribers", 0)
description = (comm.get("description") or "").strip()
results.append({
"sub": sub_key,
"title": comm.get("title") or name,
"subscribers": subscribers,
"description": description[:280],
"promo_allowed": None, # Lemmy has no standardized promo rules
"flair_required": False,
"available_flairs": [],
"rule_warning": False,
"notes": None,
"already_tracked": (sub_key.lower() in known_subs) if known_subs else False,
"platform": "lemmy",
})
except Exception:
logger.warning("Error searching Lemmy instance %s", instance, exc_info=True)
results.sort(key=lambda x: x["subscribers"], reverse=True)
return results[:limit]