diff --git a/app/api/endpoints/subs.py b/app/api/endpoints/subs.py index 25aafa1..3c8f5f7 100644 --- a/app/api/endpoints/subs.py +++ b/app/api/endpoints/subs.py @@ -14,6 +14,7 @@ router = APIRouter(prefix="/subs", tags=["subs"]) class DiscoverBody(BaseModel): keyword: str limit: int = 15 + platforms: list[str] = ["reddit", "lemmy"] def _in_thread(fn): @@ -70,18 +71,35 @@ async def discover_subs(body: DiscoverBody): from app.services.reddit.discovery import search_and_analyze def _run(store: Store): - # Collect already-tracked sub names so the UI can flag them - existing = {r["sub"].lower() for r in store.list_sub_rules("reddit")} - try: - from app.services.reddit.client import RedditClient - cookies = RedditClient().cookies - except Exception: - cookies = None - return search_and_analyze( - keyword=body.keyword, - limit=body.limit, - cookies=cookies, - known_subs=existing, - ) + platforms = set(body.platforms or ["reddit", "lemmy"]) + results: list[dict] = [] + + if "reddit" in platforms: + from app.services.reddit.discovery import search_and_analyze + existing_reddit = {r["sub"].lower() for r in store.list_sub_rules("reddit")} + try: + from app.services.reddit.client import RedditClient + cookies = RedditClient().cookies + except Exception: + cookies = None + results.extend(search_and_analyze( + keyword=body.keyword, + limit=body.limit, + cookies=cookies, + known_subs=existing_reddit, + )) + + if "lemmy" in platforms: + from app.services.lemmy.discovery import search_lemmy + existing_lemmy = {r["sub"].lower() for r in store.list_sub_rules("lemmy")} + results.extend(search_lemmy( + keyword=body.keyword, + limit=body.limit, + known_subs=existing_lemmy, + )) + + # Merge and sort by subscribers descending + results.sort(key=lambda x: x.get("subscribers", 0), reverse=True) + return results return await asyncio.to_thread(_in_thread, _run) diff --git a/app/services/lemmy/discovery.py b/app/services/lemmy/discovery.py new file mode 100644 index 0000000..9859adb --- /dev/null +++ b/app/services/lemmy/discovery.py @@ -0,0 +1,100 @@ +""" +Lemmy community discovery. + +Searches major Lemmy instances for communities matching a keyword and +returns candidates for user review. Nothing is stored until the user +imports via PUT /subs/{community@instance}. +""" +from __future__ import annotations + +import logging +from typing import Any + +import httpx + +from app.services.lemmy.client import KNOWN_INSTANCES, _DEFAULT_USER_AGENT + +logger = logging.getLogger(__name__) + +# Instances to fan search across (ordered by size) +_SEARCH_INSTANCES = ["lemmy.world", "lemmy.ml", "beehaw.org", "sh.itjust.works", "reddthat.com"] + +_TIMEOUT = 10.0 + + +def search_lemmy( + keyword: str, + limit: int = 15, + known_subs: set[str] | None = None, +) -> list[dict[str, Any]]: + """ + Search multiple Lemmy instances for communities matching keyword. + + Deduplicates by actor_id (canonical AP URL) so federated communities + don't appear twice. Returns candidates sorted by subscriber count. + """ + seen_actor_ids: set[str] = set() + results: list[dict] = [] + + for instance in _SEARCH_INSTANCES: + try: + r = httpx.get( + f"https://{instance}/api/v3/search", + params={ + "q": keyword, + "type_": "Communities", + "limit": 20, + "sort": "TopAll", + }, + headers={"User-Agent": _DEFAULT_USER_AGENT}, + timeout=_TIMEOUT, + follow_redirects=True, + ) + if r.status_code != 200: + logger.debug("Lemmy search on %s returned %d", instance, r.status_code) + continue + + communities = r.json().get("communities", []) + + for cv in communities: + comm = cv.get("community", {}) + counts = cv.get("counts", {}) + + actor_id: str = comm.get("actor_id", "") + if not actor_id or actor_id in seen_actor_ids: + continue + seen_actor_ids.add(actor_id) + + # Derive home instance from actor_id (e.g. https://lemmy.world/c/selfhosted) + try: + home_instance = actor_id.split("/")[2] + except IndexError: + home_instance = instance + + if comm.get("nsfw"): + continue + + name = comm.get("name", "") + sub_key = f"{name}@{home_instance}" + subscribers = counts.get("subscribers", 0) + description = (comm.get("description") or "").strip() + + results.append({ + "sub": sub_key, + "title": comm.get("title") or name, + "subscribers": subscribers, + "description": description[:280], + "promo_allowed": None, # Lemmy has no standardized promo rules + "flair_required": False, + "available_flairs": [], + "rule_warning": False, + "notes": None, + "already_tracked": (sub_key.lower() in known_subs) if known_subs else False, + "platform": "lemmy", + }) + + except Exception: + logger.warning("Error searching Lemmy instance %s", instance, exc_info=True) + + results.sort(key=lambda x: x["subscribers"], reverse=True) + return results[:limit] diff --git a/app/services/reddit/discovery.py b/app/services/reddit/discovery.py index e44cac0..e480c0e 100644 --- a/app/services/reddit/discovery.py +++ b/app/services/reddit/discovery.py @@ -136,6 +136,7 @@ def analyze_sub( "rule_warning": False, "notes": notes, "already_tracked": (sub.lower() in known_subs) if known_subs is not None else False, + "platform": "reddit", } except Exception: logger.exception("Error analyzing r/%s", sub) @@ -154,11 +155,6 @@ def search_subs( Returns a list of analysis dicts sorted by subscriber count (desc). """ try: - search_r = _get( - f"{_BASE}/subreddits/search.json", - cookies=cookies, - ) - # httpx doesn't support params kwarg above since we're using _get; rebuild r = httpx.get( f"{_BASE}/subreddits/search.json", params={"q": keyword, "limit": min(limit, 50), "sort": "relevance"}, @@ -201,6 +197,7 @@ def search_subs( "rule_warning": False, "notes": None, "already_tracked": (sub_name.lower() in known_subs) if known_subs is not None else False, + "platform": "reddit", }) # Sort by subscribers descending diff --git a/frontend/src/components/SubRulesView.vue b/frontend/src/components/SubRulesView.vue index e289423..fb9bd96 100644 --- a/frontend/src/components/SubRulesView.vue +++ b/frontend/src/components/SubRulesView.vue @@ -144,8 +144,13 @@ >
- r/{{ r.sub }} + {{ r.platform === 'lemmy' ? r.sub : `r/${r.sub}` }} {{ formatSubs(r.subscribers) }} + {{ r.platform }} tracked
{{ r.description }}
@@ -282,7 +287,7 @@ async function importSub(r: SubDiscoveryResult) { notes: r.notes, } try { - const saved = await api.subs.upsertRules(r.sub, payload, 'reddit') + const saved = await api.subs.upsertRules(r.sub, payload, r.platform) const idx = rules.value.findIndex(x => x.sub === r.sub) if (idx !== -1) { rules.value = [...rules.value.slice(0, idx), saved, ...rules.value.slice(idx + 1)] diff --git a/frontend/src/services/api.ts b/frontend/src/services/api.ts index 2a32767..f8d0574 100644 --- a/frontend/src/services/api.ts +++ b/frontend/src/services/api.ts @@ -109,6 +109,7 @@ export interface SubDiscoveryResult { rule_warning: boolean notes: string | null already_tracked: boolean + platform: string } export interface SubRulesUpsert { @@ -286,8 +287,8 @@ export const api = { upsertRules: (sub: string, data: SubRulesUpsert, platform = 'reddit') => http.put(`/subs/${sub}`, data, { params: { platform } }).then(r => r.data), - discover: (keyword: string, limit = 15) => - http.post('/subs/discover', { keyword, limit }).then(r => r.data), + discover: (keyword: string, limit = 15, platforms = ['reddit', 'lemmy']) => + http.post('/subs/discover', { keyword, limit, platforms }).then(r => r.data), }, posts: {