magpie/app/db/migrations/011_signals.sql
pyr0ball a6ea0b9c58 feat(#7,#10): signal crawler -- Reddit + Lemmy community monitoring
Implements the full signal detection pipeline:

Backend:
- app/services/lemmy/client.py: async Lemmy API v3 client, community@instance
  addressing, integer cursor dedup, normalised post dicts
- app/services/scraper.py: platform-agnostic scraper; Reddit (.json API,
  fullname cursor) + Lemmy (integer ID cursor); keyword/regex/all match modes,
  min_score gate, NormalizedPost shape, upsert dedup via UNIQUE post_id
- app/api/endpoints/signals.py: CRUD for signal_rules + signals queue;
  POST /signals/scrape manual trigger; scrape-state viewer
- migrations 010-012: signal_rules, signals, signal_scrape_state tables
- scheduler: interval job every 30 min (scraper_enabled=True in config)
- Fixed migration collision: 007_signal_rules.sql → 010, 008 → 011, 009 → 012

Frontend:
- SignalsView.vue: signal feed with status filter (new/saved/dismissed),
  keyword chips, score/comment counts, save/dismiss actions, rules editor panel
- api.ts: SignalRule, Signal types + signalRules/signals API methods
- Nav: Signals as default landing route (replaces /campaigns default)

Closes #7 (signal extraction), closes #10 (Lemmy JSON crawler)
2026-04-22 11:00:14 -07:00

32 lines
1.6 KiB
SQL

-- Surfaced content instances from signal monitoring
CREATE TABLE IF NOT EXISTS signals (
id INTEGER PRIMARY KEY AUTOINCREMENT,
platform TEXT NOT NULL DEFAULT 'reddit',
sub TEXT NOT NULL,
post_id TEXT NOT NULL, -- platform-native ID (reddit: t3_xxxxx)
title TEXT NOT NULL,
body_snippet TEXT, -- first ~500 chars
score INTEGER,
comment_count INTEGER,
author TEXT,
url TEXT,
posted_at TEXT, -- original post timestamp
surfaced_at TEXT NOT NULL DEFAULT (datetime('now')),
matched_keywords TEXT NOT NULL DEFAULT '[]', -- JSON array of matched terms
status TEXT NOT NULL DEFAULT 'new',-- new|saved|dismissed
notes TEXT,
UNIQUE(platform, post_id) -- deduplicate across rule runs
);
-- Junction table: which rules matched each signal (many-to-many)
CREATE TABLE IF NOT EXISTS signal_rule_matches (
signal_id INTEGER NOT NULL REFERENCES signals(id) ON DELETE CASCADE,
rule_id INTEGER NOT NULL REFERENCES signal_rules(id) ON DELETE CASCADE,
matched_at TEXT NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (signal_id, rule_id)
);
CREATE INDEX IF NOT EXISTS idx_signals_status ON signals(status);
CREATE INDEX IF NOT EXISTS idx_signals_platform_sub ON signals(platform, sub);
CREATE INDEX IF NOT EXISTS idx_signals_surfaced_at ON signals(surfaced_at DESC);
CREATE INDEX IF NOT EXISTS idx_signal_rule_matches_rule ON signal_rule_matches(rule_id);