- Add app/services/lemmy/discovery.py: searches 5 major Lemmy instances, deduplicates by actor_id (AP canonical URL), skips NSFW communities, uses community@instance naming convention matching existing Lemmy client - Update POST /subs/discover: accepts platforms[] param (default both), fans out to Reddit + Lemmy search, merges and sorts by subscribers - Add platform field to all discovery result dicts (Reddit and Lemmy) - Fix: remove dead _get() call left in search_subs() during earlier refactor - Frontend: show platform badge on each discovery row, correct hyperlink format for Lemmy (https://{instance}/c/{community}), pass r.platform to upsertRules on import so Lemmy subs land in the lemmy platform slot
100 lines
3.3 KiB
Python
100 lines
3.3 KiB
Python
"""
|
|
Lemmy community discovery.
|
|
|
|
Searches major Lemmy instances for communities matching a keyword and
|
|
returns candidates for user review. Nothing is stored until the user
|
|
imports via PUT /subs/{community@instance}.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
from app.services.lemmy.client import KNOWN_INSTANCES, _DEFAULT_USER_AGENT
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Instances to fan search across (ordered by size)
|
|
_SEARCH_INSTANCES = ["lemmy.world", "lemmy.ml", "beehaw.org", "sh.itjust.works", "reddthat.com"]
|
|
|
|
_TIMEOUT = 10.0
|
|
|
|
|
|
def search_lemmy(
|
|
keyword: str,
|
|
limit: int = 15,
|
|
known_subs: set[str] | None = None,
|
|
) -> list[dict[str, Any]]:
|
|
"""
|
|
Search multiple Lemmy instances for communities matching keyword.
|
|
|
|
Deduplicates by actor_id (canonical AP URL) so federated communities
|
|
don't appear twice. Returns candidates sorted by subscriber count.
|
|
"""
|
|
seen_actor_ids: set[str] = set()
|
|
results: list[dict] = []
|
|
|
|
for instance in _SEARCH_INSTANCES:
|
|
try:
|
|
r = httpx.get(
|
|
f"https://{instance}/api/v3/search",
|
|
params={
|
|
"q": keyword,
|
|
"type_": "Communities",
|
|
"limit": 20,
|
|
"sort": "TopAll",
|
|
},
|
|
headers={"User-Agent": _DEFAULT_USER_AGENT},
|
|
timeout=_TIMEOUT,
|
|
follow_redirects=True,
|
|
)
|
|
if r.status_code != 200:
|
|
logger.debug("Lemmy search on %s returned %d", instance, r.status_code)
|
|
continue
|
|
|
|
communities = r.json().get("communities", [])
|
|
|
|
for cv in communities:
|
|
comm = cv.get("community", {})
|
|
counts = cv.get("counts", {})
|
|
|
|
actor_id: str = comm.get("actor_id", "")
|
|
if not actor_id or actor_id in seen_actor_ids:
|
|
continue
|
|
seen_actor_ids.add(actor_id)
|
|
|
|
# Derive home instance from actor_id (e.g. https://lemmy.world/c/selfhosted)
|
|
try:
|
|
home_instance = actor_id.split("/")[2]
|
|
except IndexError:
|
|
home_instance = instance
|
|
|
|
if comm.get("nsfw"):
|
|
continue
|
|
|
|
name = comm.get("name", "")
|
|
sub_key = f"{name}@{home_instance}"
|
|
subscribers = counts.get("subscribers", 0)
|
|
description = (comm.get("description") or "").strip()
|
|
|
|
results.append({
|
|
"sub": sub_key,
|
|
"title": comm.get("title") or name,
|
|
"subscribers": subscribers,
|
|
"description": description[:280],
|
|
"promo_allowed": None, # Lemmy has no standardized promo rules
|
|
"flair_required": False,
|
|
"available_flairs": [],
|
|
"rule_warning": False,
|
|
"notes": None,
|
|
"already_tracked": (sub_key.lower() in known_subs) if known_subs else False,
|
|
"platform": "lemmy",
|
|
})
|
|
|
|
except Exception:
|
|
logger.warning("Error searching Lemmy instance %s", instance, exc_info=True)
|
|
|
|
results.sort(key=lambda x: x["subscribers"], reverse=True)
|
|
return results[:limit]
|