Backend:
- Session.last_subscriber_left_at: monotonic timestamp set when last SSE subscriber
leaves, cleared when a new one arrives
- Session.subscriber_count(): replaces len(_subscribers) access from outside the model
- session_store._reaper_loop(): kills sessions with no subscribers for >SESSION_IDLE_TTL_S
(default 90s); runs every TTL/2 seconds via asyncio.create_task at startup
- session_store._reaper_loop_once(): single-cycle variant for deterministic tests
- app/main.py lifespan: starts reaper on startup, cancels it cleanly on shutdown
- config.py: SESSION_IDLE_TTL_S setting (90s default, overridable per-env)
Frontend:
- useWakeLock.ts: Screen Wake Lock API wrapper; acquires on connect, releases on
disconnect; degrades silently when unsupported (battery saver, iOS Safari)
- useToneStream.ts: visibilitychange handler — on hidden: closes EventSource without
ending backend session (grace window stays open); on visible: GET /session/{id}
liveness check, reconnects SSE + re-acquires wake lock if alive, sets expired=true
and calls store.reset() if reaped
- ComposeBar.vue: surfaces expired state with calm 'Session timed out' notice
(not an error — expected behaviour on long screen-off)
Tests:
- test_reaper.py: 7 tests covering subscriber idle tracking, reaper eligibility
(kills idle, spares active subscriber, spares within-TTL)
356 lines
12 KiB
Python
356 lines
12 KiB
Python
# app/services/session_store.py — session lifecycle and classifier management
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
|
|
from app.config import settings
|
|
from app.models.session import Session
|
|
from app.models.tone_event import ToneEvent
|
|
from app.services.annotator import annotate
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Module-level singleton store — one per process
|
|
_sessions: dict[str, Session] = {}
|
|
_tasks: dict[str, asyncio.Task] = {}
|
|
_reaper_task: asyncio.Task | None = None
|
|
|
|
# Audio accumulation buffer per session.
|
|
# We accumulate 100ms PCM chunks until we have CLASSIFY_WINDOW_MS of audio,
|
|
# then fire a single /classify call. Real emotion models need ≥500ms context.
|
|
_CLASSIFY_WINDOW_MS = 1000 # ms of audio per classify call; wav2vec2 needs ≥1s context
|
|
_CHUNK_MS = 100 # AudioWorklet sends 1600 samples @ 16kHz = 100ms
|
|
_CHUNKS_PER_WINDOW = _CLASSIFY_WINDOW_MS // _CHUNK_MS # 10 chunks
|
|
_audio_buffers: dict[str, list[bytes]] = {}
|
|
|
|
|
|
async def create_session(elcor: bool = False) -> Session:
|
|
"""Create a new session and start its ContextClassifier background task.
|
|
|
|
If CF_ORCH_URL is configured, requests a managed cf-voice instance before
|
|
starting the classifier. Falls back to in-process mock if allocation fails.
|
|
"""
|
|
session = Session(elcor=elcor)
|
|
_sessions[session.session_id] = session
|
|
await _allocate_voice(session)
|
|
task = asyncio.create_task(
|
|
_run_classifier(session),
|
|
name=f"classifier-{session.session_id}",
|
|
)
|
|
_tasks[session.session_id] = task
|
|
session.state = "running"
|
|
logger.info(
|
|
"Session %s started (voice=%s)",
|
|
session.session_id,
|
|
session.cf_voice_url or "in-process",
|
|
)
|
|
return session
|
|
|
|
|
|
def get_session(session_id: str) -> Session | None:
|
|
return _sessions.get(session_id)
|
|
|
|
|
|
def active_session_count() -> int:
|
|
"""Return the number of currently running sessions."""
|
|
return sum(1 for s in _sessions.values() if s.state == "running")
|
|
|
|
|
|
async def end_session(session_id: str) -> bool:
|
|
"""Stop and remove a session. Returns True if it existed."""
|
|
session = _sessions.pop(session_id, None)
|
|
if session is None:
|
|
return False
|
|
session.state = "stopped"
|
|
task = _tasks.pop(session_id, None)
|
|
if task and not task.done():
|
|
task.cancel()
|
|
_audio_buffers.pop(session_id, None)
|
|
await _release_voice(session)
|
|
logger.info("Session %s ended", session_id)
|
|
return True
|
|
|
|
|
|
async def _allocate_voice(session: Session) -> None:
|
|
"""Request a managed cf-voice instance from cf-orch. No-op if CF_ORCH_URL is unset."""
|
|
# Static override takes precedence — skip cf-orch allocation
|
|
if settings.cf_voice_url:
|
|
session.cf_voice_url = settings.cf_voice_url
|
|
return
|
|
if not settings.cf_orch_url:
|
|
return
|
|
|
|
import httpx
|
|
|
|
url = settings.cf_orch_url.rstrip("/") + "/api/services/cf-voice/allocate"
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
resp = await client.post(url, json={"caller": "linnet", "ttl_s": 3600.0})
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
session.cf_voice_url = data["url"]
|
|
session.cf_voice_allocation_id = data["allocation_id"]
|
|
logger.info(
|
|
"cf-orch allocated cf-voice for session %s: %s (alloc=%s)",
|
|
session.session_id, data["url"], data["allocation_id"],
|
|
)
|
|
except Exception as exc:
|
|
logger.warning(
|
|
"cf-orch allocation failed for session %s: %s — falling back to in-process",
|
|
session.session_id, exc,
|
|
)
|
|
|
|
|
|
async def _release_voice(session: Session) -> None:
|
|
"""Release a cf-voice allocation back to cf-orch. No-op if not orch-managed."""
|
|
if not session.cf_voice_allocation_id or not settings.cf_orch_url:
|
|
return
|
|
|
|
import httpx
|
|
|
|
url = (
|
|
settings.cf_orch_url.rstrip("/")
|
|
+ f"/api/services/cf-voice/allocations/{session.cf_voice_allocation_id}"
|
|
)
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
await client.delete(url)
|
|
logger.info("Released cf-voice allocation for session %s", session.session_id)
|
|
except Exception as exc:
|
|
logger.warning(
|
|
"cf-orch release failed for session %s: %s", session.session_id, exc
|
|
)
|
|
|
|
|
|
async def _run_classifier(session: Session) -> None:
|
|
"""
|
|
Background task: stream VoiceFrames and broadcast ToneEvents.
|
|
|
|
Two modes:
|
|
- Sidecar (cf-orch allocated or CF_VOICE_URL set): holds the session open
|
|
while audio chunks are forwarded to cf-voice via forward_audio_chunk().
|
|
- In-process (no sidecar): runs ContextClassifier.from_env() directly.
|
|
Used for local dev and mock mode when CF_ORCH_URL is unset or allocation failed.
|
|
"""
|
|
if session.cf_voice_url:
|
|
await _run_classifier_sidecar(session)
|
|
else:
|
|
await _run_classifier_inprocess(session)
|
|
|
|
|
|
async def _run_classifier_inprocess(session: Session) -> None:
|
|
"""In-process path: ContextClassifier.stream() — used when CF_VOICE_URL is unset."""
|
|
from cf_voice.context import ContextClassifier
|
|
|
|
classifier = ContextClassifier.from_env()
|
|
try:
|
|
async for frame in classifier.stream():
|
|
if session.state == "stopped":
|
|
break
|
|
event = annotate(frame, session_id=session.session_id, elcor=session.elcor)
|
|
if event is not None:
|
|
session.broadcast(event)
|
|
except asyncio.CancelledError:
|
|
pass
|
|
finally:
|
|
await classifier.stop()
|
|
session.state = "stopped"
|
|
logger.info("Classifier stopped for session %s", session.session_id)
|
|
|
|
|
|
async def _run_classifier_sidecar(session: Session) -> None:
|
|
"""
|
|
Sidecar path: wait for audio chunks forwarded via forward_audio_chunk().
|
|
|
|
The sidecar does not self-generate frames — audio arrives from the browser
|
|
WebSocket and is sent to cf-voice /classify. This task holds the session
|
|
open and handles cleanup.
|
|
"""
|
|
try:
|
|
while session.state == "running":
|
|
await asyncio.sleep(1.0)
|
|
except asyncio.CancelledError:
|
|
pass
|
|
finally:
|
|
session.state = "stopped"
|
|
logger.info("Sidecar session ended for session %s", session.session_id)
|
|
|
|
|
|
async def forward_audio_chunk(
|
|
session: Session,
|
|
audio_b64: str,
|
|
timestamp: float,
|
|
) -> None:
|
|
"""
|
|
Accumulate PCM chunks into 500ms windows, then forward to cf-voice /classify.
|
|
|
|
The AudioWorklet sends 100ms chunks; we buffer 10 of them (_CHUNKS_PER_WINDOW)
|
|
before firing a classify call. This gives wav2vec2 a full second of context
|
|
and keeps the classify rate at 1/s instead of 10/s.
|
|
|
|
No-op when no cf-voice sidecar is allocated (in-process path handles its own input).
|
|
"""
|
|
voice_url = session.cf_voice_url
|
|
if not voice_url:
|
|
return
|
|
|
|
import base64 as _b64
|
|
import httpx
|
|
|
|
from app.models.speaker_event import SpeakerEvent
|
|
from cf_voice.models import VoiceFrame
|
|
|
|
# Decode incoming chunk and append to per-session buffer
|
|
raw = _b64.b64decode(audio_b64)
|
|
buf = _audio_buffers.setdefault(session.session_id, [])
|
|
buf.append(raw)
|
|
|
|
if len(buf) < _CHUNKS_PER_WINDOW:
|
|
return # not enough audio yet — wait for more chunks
|
|
|
|
# Flush: concatenate window, reset buffer
|
|
window_bytes = b"".join(buf)
|
|
buf.clear()
|
|
window_b64 = _b64.b64encode(window_bytes).decode()
|
|
|
|
url = voice_url.rstrip("/") + "/classify"
|
|
payload = {
|
|
"audio_chunk": window_b64,
|
|
"timestamp": timestamp,
|
|
"elcor": session.elcor,
|
|
"session_id": session.session_id,
|
|
}
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
resp = await client.post(url, json=payload)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
except Exception as exc:
|
|
logger.warning("cf-voice sidecar call failed for session %s: %s", session.session_id, exc)
|
|
return
|
|
|
|
from app.models.queue_event import QueueEvent
|
|
from app.models.transcript_event import TranscriptEvent
|
|
|
|
for ev in data.get("events", []):
|
|
etype = ev.get("event_type")
|
|
|
|
if etype == "tone":
|
|
frame = VoiceFrame(
|
|
label=ev["label"],
|
|
confidence=ev["confidence"],
|
|
speaker_id=ev.get("speaker_id", ""),
|
|
shift_magnitude=ev.get("shift_magnitude", 0.0),
|
|
timestamp=ev["timestamp"],
|
|
)
|
|
tone = annotate(frame, session_id=session.session_id, elcor=session.elcor)
|
|
if tone is not None:
|
|
session.broadcast(tone)
|
|
|
|
elif etype == "speaker":
|
|
speaker = SpeakerEvent(
|
|
session_id=session.session_id,
|
|
label=ev["label"],
|
|
confidence=ev.get("confidence", 1.0),
|
|
timestamp=ev["timestamp"],
|
|
)
|
|
session.broadcast(speaker)
|
|
|
|
elif etype == "transcript":
|
|
transcript = TranscriptEvent(
|
|
session_id=session.session_id,
|
|
text=ev["label"],
|
|
speaker_id=ev.get("speaker_id", "speaker_a"),
|
|
timestamp=ev["timestamp"],
|
|
)
|
|
session.broadcast(transcript)
|
|
|
|
elif etype in ("queue", "environ"):
|
|
queue_ev = QueueEvent(
|
|
session_id=session.session_id,
|
|
event_type=etype,
|
|
label=ev["label"],
|
|
confidence=ev.get("confidence", 1.0),
|
|
timestamp=ev["timestamp"],
|
|
)
|
|
session.broadcast(queue_ev)
|
|
|
|
|
|
# ── Idle session reaper ───────────────────────────────────────────────────────
|
|
|
|
async def _reaper_loop() -> None:
|
|
"""
|
|
Periodically kill sessions with no active SSE subscribers.
|
|
|
|
A session becomes eligible for reaping when:
|
|
- Its last SSE subscriber disconnected more than SESSION_IDLE_TTL_S seconds ago
|
|
- It has not been explicitly ended (state != "stopped")
|
|
|
|
This covers the common mobile pattern: screen locks → browser suspends tab →
|
|
EventSource closes → SSE subscriber count drops to zero. If the user doesn't
|
|
return within the TTL window, the session is cleaned up automatically.
|
|
|
|
The reaper runs every REAP_INTERVAL_S seconds (half the TTL, so the worst-case
|
|
overshoot is TTL + REAP_INTERVAL_S).
|
|
"""
|
|
ttl = settings.session_idle_ttl_s
|
|
interval = max(15, ttl // 2)
|
|
logger.info("Session reaper started (TTL=%ds, check every %ds)", ttl, interval)
|
|
while True:
|
|
await asyncio.sleep(interval)
|
|
now = time.monotonic()
|
|
to_reap = [
|
|
sid
|
|
for sid, session in list(_sessions.items())
|
|
if (
|
|
session.state == "running"
|
|
and session.subscriber_count() == 0
|
|
and session.last_subscriber_left_at is not None
|
|
and (now - session.last_subscriber_left_at) > ttl
|
|
)
|
|
]
|
|
for sid in to_reap:
|
|
logger.info(
|
|
"Reaping idle session %s (no subscribers for >%ds)", sid, ttl
|
|
)
|
|
await end_session(sid)
|
|
|
|
|
|
async def _reaper_loop_once() -> None:
|
|
"""Single reaper pass — used by tests to avoid sleeping."""
|
|
ttl = settings.session_idle_ttl_s
|
|
now = time.monotonic()
|
|
to_reap = [
|
|
sid
|
|
for sid, session in list(_sessions.items())
|
|
if (
|
|
session.state == "running"
|
|
and session.subscriber_count() == 0
|
|
and session.last_subscriber_left_at is not None
|
|
and (now - session.last_subscriber_left_at) > ttl
|
|
)
|
|
]
|
|
for sid in to_reap:
|
|
logger.info("Reaping idle session %s (no subscribers for >%ds)", sid, ttl)
|
|
await end_session(sid)
|
|
|
|
|
|
def start_reaper() -> None:
|
|
"""Start the idle session reaper background task. Called from app lifespan."""
|
|
global _reaper_task
|
|
if _reaper_task is None or _reaper_task.done():
|
|
_reaper_task = asyncio.create_task(_reaper_loop(), name="session-reaper")
|
|
|
|
|
|
async def stop_reaper() -> None:
|
|
"""Cancel the reaper task cleanly. Called from app lifespan shutdown."""
|
|
global _reaper_task
|
|
if _reaper_task and not _reaper_task.done():
|
|
_reaper_task.cancel()
|
|
try:
|
|
await _reaper_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
_reaper_task = None
|