feat(discovery): add Lemmy community search, fix dead request, add platform field

- Add app/services/lemmy/discovery.py: searches 5 major Lemmy instances,
  deduplicates by actor_id (AP canonical URL), skips NSFW communities,
  uses community@instance naming convention matching existing Lemmy client
- Update POST /subs/discover: accepts platforms[] param (default both),
  fans out to Reddit + Lemmy search, merges and sorts by subscribers
- Add platform field to all discovery result dicts (Reddit and Lemmy)
- Fix: remove dead _get() call left in search_subs() during earlier refactor
- Frontend: show platform badge on each discovery row, correct hyperlink
  format for Lemmy (https://{instance}/c/{community}), pass r.platform
  to upsertRules on import so Lemmy subs land in the lemmy platform slot
This commit is contained in:
Alan Weinstock 2026-06-13 22:23:31 -07:00
parent f39f36e258
commit 35c6e5f7bc
5 changed files with 143 additions and 22 deletions

View file

@ -14,6 +14,7 @@ router = APIRouter(prefix="/subs", tags=["subs"])
class DiscoverBody(BaseModel):
keyword: str
limit: int = 15
platforms: list[str] = ["reddit", "lemmy"]
def _in_thread(fn):
@ -70,18 +71,35 @@ async def discover_subs(body: DiscoverBody):
from app.services.reddit.discovery import search_and_analyze
def _run(store: Store):
# Collect already-tracked sub names so the UI can flag them
existing = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
platforms = set(body.platforms or ["reddit", "lemmy"])
results: list[dict] = []
if "reddit" in platforms:
from app.services.reddit.discovery import search_and_analyze
existing_reddit = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
try:
from app.services.reddit.client import RedditClient
cookies = RedditClient().cookies
except Exception:
cookies = None
return search_and_analyze(
results.extend(search_and_analyze(
keyword=body.keyword,
limit=body.limit,
cookies=cookies,
known_subs=existing,
)
known_subs=existing_reddit,
))
if "lemmy" in platforms:
from app.services.lemmy.discovery import search_lemmy
existing_lemmy = {r["sub"].lower() for r in store.list_sub_rules("lemmy")}
results.extend(search_lemmy(
keyword=body.keyword,
limit=body.limit,
known_subs=existing_lemmy,
))
# Merge and sort by subscribers descending
results.sort(key=lambda x: x.get("subscribers", 0), reverse=True)
return results
return await asyncio.to_thread(_in_thread, _run)

View file

@ -0,0 +1,100 @@
"""
Lemmy community discovery.
Searches major Lemmy instances for communities matching a keyword and
returns candidates for user review. Nothing is stored until the user
imports via PUT /subs/{community@instance}.
"""
from __future__ import annotations
import logging
from typing import Any
import httpx
from app.services.lemmy.client import KNOWN_INSTANCES, _DEFAULT_USER_AGENT
logger = logging.getLogger(__name__)
# Instances to fan search across (ordered by size)
_SEARCH_INSTANCES = ["lemmy.world", "lemmy.ml", "beehaw.org", "sh.itjust.works", "reddthat.com"]
_TIMEOUT = 10.0
def search_lemmy(
keyword: str,
limit: int = 15,
known_subs: set[str] | None = None,
) -> list[dict[str, Any]]:
"""
Search multiple Lemmy instances for communities matching keyword.
Deduplicates by actor_id (canonical AP URL) so federated communities
don't appear twice. Returns candidates sorted by subscriber count.
"""
seen_actor_ids: set[str] = set()
results: list[dict] = []
for instance in _SEARCH_INSTANCES:
try:
r = httpx.get(
f"https://{instance}/api/v3/search",
params={
"q": keyword,
"type_": "Communities",
"limit": 20,
"sort": "TopAll",
},
headers={"User-Agent": _DEFAULT_USER_AGENT},
timeout=_TIMEOUT,
follow_redirects=True,
)
if r.status_code != 200:
logger.debug("Lemmy search on %s returned %d", instance, r.status_code)
continue
communities = r.json().get("communities", [])
for cv in communities:
comm = cv.get("community", {})
counts = cv.get("counts", {})
actor_id: str = comm.get("actor_id", "")
if not actor_id or actor_id in seen_actor_ids:
continue
seen_actor_ids.add(actor_id)
# Derive home instance from actor_id (e.g. https://lemmy.world/c/selfhosted)
try:
home_instance = actor_id.split("/")[2]
except IndexError:
home_instance = instance
if comm.get("nsfw"):
continue
name = comm.get("name", "")
sub_key = f"{name}@{home_instance}"
subscribers = counts.get("subscribers", 0)
description = (comm.get("description") or "").strip()
results.append({
"sub": sub_key,
"title": comm.get("title") or name,
"subscribers": subscribers,
"description": description[:280],
"promo_allowed": None, # Lemmy has no standardized promo rules
"flair_required": False,
"available_flairs": [],
"rule_warning": False,
"notes": None,
"already_tracked": (sub_key.lower() in known_subs) if known_subs else False,
"platform": "lemmy",
})
except Exception:
logger.warning("Error searching Lemmy instance %s", instance, exc_info=True)
results.sort(key=lambda x: x["subscribers"], reverse=True)
return results[:limit]

View file

@ -136,6 +136,7 @@ def analyze_sub(
"rule_warning": False,
"notes": notes,
"already_tracked": (sub.lower() in known_subs) if known_subs is not None else False,
"platform": "reddit",
}
except Exception:
logger.exception("Error analyzing r/%s", sub)
@ -154,11 +155,6 @@ def search_subs(
Returns a list of analysis dicts sorted by subscriber count (desc).
"""
try:
search_r = _get(
f"{_BASE}/subreddits/search.json",
cookies=cookies,
)
# httpx doesn't support params kwarg above since we're using _get; rebuild
r = httpx.get(
f"{_BASE}/subreddits/search.json",
params={"q": keyword, "limit": min(limit, 50), "sort": "relevance"},
@ -201,6 +197,7 @@ def search_subs(
"rule_warning": False,
"notes": None,
"already_tracked": (sub_name.lower() in known_subs) if known_subs is not None else False,
"platform": "reddit",
})
# Sort by subscribers descending

View file

@ -144,8 +144,13 @@
>
<div class="discover-row-main">
<div class="discover-row-name">
<a :href="`https://reddit.com/r/${r.sub}`" target="_blank" class="sub-link">r/{{ r.sub }}</a>
<a
:href="r.platform === 'lemmy' ? `https://${r.sub.split('@')[1]}/c/${r.sub.split('@')[0]}` : `https://reddit.com/r/${r.sub}`"
target="_blank"
class="sub-link"
>{{ r.platform === 'lemmy' ? r.sub : `r/${r.sub}` }}</a>
<span class="sub-size">{{ formatSubs(r.subscribers) }}</span>
<span class="badge badge-muted">{{ r.platform }}</span>
<span v-if="r.already_tracked" class="badge badge-muted">tracked</span>
</div>
<div v-if="r.description" class="discover-row-desc">{{ r.description }}</div>
@ -282,7 +287,7 @@ async function importSub(r: SubDiscoveryResult) {
notes: r.notes,
}
try {
const saved = await api.subs.upsertRules(r.sub, payload, 'reddit')
const saved = await api.subs.upsertRules(r.sub, payload, r.platform)
const idx = rules.value.findIndex(x => x.sub === r.sub)
if (idx !== -1) {
rules.value = [...rules.value.slice(0, idx), saved, ...rules.value.slice(idx + 1)]

View file

@ -109,6 +109,7 @@ export interface SubDiscoveryResult {
rule_warning: boolean
notes: string | null
already_tracked: boolean
platform: string
}
export interface SubRulesUpsert {
@ -286,8 +287,8 @@ export const api = {
upsertRules: (sub: string, data: SubRulesUpsert, platform = 'reddit') =>
http.put<SubRules>(`/subs/${sub}`, data, { params: { platform } }).then(r => r.data),
discover: (keyword: string, limit = 15) =>
http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit }).then(r => r.data),
discover: (keyword: string, limit = 15, platforms = ['reddit', 'lemmy']) =>
http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit, platforms }).then(r => r.data),
},
posts: {