New modules shipped (from Linnet integration): - acoustic.py: AST (MIT/ast-finetuned-audioset-10-10-0.4593) replaces YAMNet stub; 527 AudioSet classes mapped to queue/speaker/environ/scene labels; _LABEL_MAP includes hold_music, ringback, DTMF, background_shift, AMD signal chain - accent.py: facebook/mms-lid-126 language ID → regional accent labels (en_gb, en_us, en_au, fr, es, de, zh, …); lazy-loaded, gated by CF_VOICE_ACCENT - privacy.py: compound privacy risk scorer — public_env, background_voices, nature scene, accent signals; returns 0–3 score without storing any audio - prosody.py: openSMILE-backed prosody extractor (sarcasm_risk, flat_f0_score, speech_rate, pitch_range); mock mode returns neutral values - dimensional.py: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim valence/arousal/dominance scorer; gated by CF_VOICE_DIMENSIONAL - trajectory.py: rolling buffer for arousal/valence deltas, trend detection (escalating/suppressed/stable), coherence scoring, suppression/reframe flags - telephony.py: TelephonyBackend Protocol + MockTelephonyBackend + SignalWireBackend + FreeSWITCHBackend; CallSession dataclass; make_telephony() factory - app.py: FastAPI service (port 8007) — /health + /classify; accepts base64 PCM chunks, returns full AudioEventOut including dimensional/prosody/accent fields - prefs.py: voice preference helpers (elcor_mode, confidence_threshold, whisper_model, elcor_prior_frames); cf-core and env-var fallback Tests: fix stale tests (YAMNetAcousticBackend → ASTAcousticBackend, scene field added to AcousticResult, speaker_at gap now resolves dominant speaker not UNKNOWN, make_io real path returns MicVoiceIO when sounddevice installed). 78 tests passing. Closes #2, #3.
500 lines
17 KiB
Python
500 lines
17 KiB
Python
# cf_voice/telephony.py — outbound telephony abstraction
|
|
#
|
|
# Protocol + mock backend: MIT licensed.
|
|
# SignalWireBackend, FreeSWITCHBackend: BSL 1.1 (real telephony, cloud credentials).
|
|
#
|
|
# Consumers (Osprey, Harrier, Ibis, Kestrel) depend only on TelephonyBackend
|
|
# and CallSession — both MIT. The concrete backends are selected by make_telephony()
|
|
# based on the tier and available credentials.
|
|
#
|
|
# Requires optional extras for real backends:
|
|
# pip install cf-voice[signalwire] — SignalWire (paid tier, CF-provisioned)
|
|
# pip install cf-voice[freeswitch] — FreeSWITCH ESL (free tier, self-hosted)
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
from dataclasses import dataclass, field
|
|
from typing import Literal, Protocol, runtime_checkable
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
CallState = Literal[
|
|
"dialing",
|
|
"ringing",
|
|
"in_progress",
|
|
"hold",
|
|
"bridged",
|
|
"completed",
|
|
"failed",
|
|
"no_answer",
|
|
"busy",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class CallSession:
|
|
"""
|
|
Represents an active or completed outbound call.
|
|
|
|
call_sid is the backend-assigned identifier — for SignalWire this is a
|
|
Twilio-compatible SID string; for FreeSWITCH it is the UUID.
|
|
|
|
state is updated by the backend as the call progresses. Consumers should
|
|
poll via backend.get_state() or subscribe to webhook events.
|
|
"""
|
|
call_sid: str
|
|
to: str
|
|
from_: str
|
|
state: CallState = "dialing"
|
|
duration_s: float = 0.0
|
|
# AMD result: "human" | "machine" | "unknown"
|
|
# Populated once the backend resolves answering machine detection.
|
|
amd_result: str = "unknown"
|
|
error: str | None = None
|
|
|
|
|
|
@runtime_checkable
|
|
class TelephonyBackend(Protocol):
|
|
"""
|
|
Abstract telephony backend interface.
|
|
|
|
All methods are async. Implementations must be safe to call from an
|
|
asyncio event loop. Long-running network operations run in a thread pool
|
|
(not the caller's responsibility).
|
|
|
|
Field names are stable as of cf-voice v0.1.0.
|
|
"""
|
|
|
|
async def dial(
|
|
self,
|
|
to: str,
|
|
from_: str,
|
|
webhook_url: str,
|
|
*,
|
|
amd: bool = False,
|
|
) -> CallSession:
|
|
"""
|
|
Initiate an outbound call.
|
|
|
|
to / from_ E.164 numbers ("+15551234567").
|
|
webhook_url URL the backend will POST call events to (SignalWire/TwiML style).
|
|
amd If True, request answering machine detection. Result lands in
|
|
CallSession.amd_result once the backend resolves it.
|
|
|
|
Returns a CallSession with state="dialing".
|
|
"""
|
|
...
|
|
|
|
async def send_dtmf(self, call_sid: str, digits: str) -> None:
|
|
"""
|
|
Send DTMF (dual-tone multi-frequency) tones mid-call.
|
|
|
|
digits String of 0-9, *, #, A-D. Each character is one tone.
|
|
Pauses may be represented as 'w' (0.5s) or 'W' (1s) if the backend
|
|
supports them.
|
|
"""
|
|
...
|
|
|
|
async def bridge(self, call_sid: str, target: str) -> None:
|
|
"""
|
|
Bridge the active call to a second E.164 number or SIP URI.
|
|
|
|
Used to connect the user directly to a human agent after Osprey has
|
|
navigated the IVR. The original call leg remains connected.
|
|
"""
|
|
...
|
|
|
|
async def hangup(self, call_sid: str) -> None:
|
|
"""Terminate the call. Idempotent — safe to call on already-ended calls."""
|
|
...
|
|
|
|
async def announce(
|
|
self,
|
|
call_sid: str,
|
|
text: str,
|
|
voice: str = "default",
|
|
) -> None:
|
|
"""
|
|
Play synthesised speech into the call.
|
|
|
|
Implements the adaptive service identification requirement (osprey#21):
|
|
Osprey announces its identity before navigating an IVR so that the
|
|
other party can consent to automated interaction.
|
|
|
|
voice Backend-specific voice identifier. "default" uses the backend's
|
|
default TTS voice.
|
|
"""
|
|
...
|
|
|
|
async def get_state(self, call_sid: str) -> CallState:
|
|
"""Fetch the current state of a call from the backend."""
|
|
...
|
|
|
|
|
|
# ── Mock backend (MIT) ────────────────────────────────────────────────────────
|
|
|
|
|
|
class MockTelephonyBackend:
|
|
"""
|
|
Synthetic telephony backend for development and CI.
|
|
|
|
No real calls are placed. Operations log to cf_voice.telephony and update
|
|
in-memory CallSession objects. AMD resolves to "human" after a simulated
|
|
delay.
|
|
|
|
Usage:
|
|
backend = MockTelephonyBackend()
|
|
session = await backend.dial("+15551234567", "+18005550000", "https://...")
|
|
await backend.send_dtmf(session.call_sid, "1")
|
|
await backend.hangup(session.call_sid)
|
|
"""
|
|
|
|
def __init__(self, amd_delay_s: float = 0.5) -> None:
|
|
self._sessions: dict[str, CallSession] = {}
|
|
self._amd_delay_s = amd_delay_s
|
|
self._call_counter = 0
|
|
|
|
def _next_sid(self) -> str:
|
|
self._call_counter += 1
|
|
return f"mock_sid_{self._call_counter:04d}"
|
|
|
|
async def dial(
|
|
self,
|
|
to: str,
|
|
from_: str,
|
|
webhook_url: str,
|
|
*,
|
|
amd: bool = False,
|
|
) -> CallSession:
|
|
sid = self._next_sid()
|
|
session = CallSession(call_sid=sid, to=to, from_=from_, state="ringing")
|
|
self._sessions[sid] = session
|
|
logger.debug("MockTelephony: dial %s → %s (sid=%s)", from_, to, sid)
|
|
|
|
async def _progress() -> None:
|
|
await asyncio.sleep(0.05)
|
|
session.state = "in_progress"
|
|
if amd:
|
|
await asyncio.sleep(self._amd_delay_s)
|
|
session.amd_result = "human"
|
|
logger.debug("MockTelephony: AMD resolved human (sid=%s)", sid)
|
|
|
|
asyncio.create_task(_progress())
|
|
return session
|
|
|
|
async def send_dtmf(self, call_sid: str, digits: str) -> None:
|
|
self._sessions[call_sid] # KeyError if unknown — intentional
|
|
logger.debug("MockTelephony: DTMF %r (sid=%s)", digits, call_sid)
|
|
|
|
async def bridge(self, call_sid: str, target: str) -> None:
|
|
session = self._sessions[call_sid]
|
|
session.state = "bridged"
|
|
logger.debug("MockTelephony: bridge → %s (sid=%s)", target, call_sid)
|
|
|
|
async def hangup(self, call_sid: str) -> None:
|
|
session = self._sessions.get(call_sid)
|
|
if session:
|
|
session.state = "completed"
|
|
logger.debug("MockTelephony: hangup (sid=%s)", call_sid)
|
|
|
|
async def announce(
|
|
self,
|
|
call_sid: str,
|
|
text: str,
|
|
voice: str = "default",
|
|
) -> None:
|
|
self._sessions[call_sid] # KeyError if unknown — intentional
|
|
logger.debug(
|
|
"MockTelephony: announce voice=%s text=%r (sid=%s)", voice, text, call_sid
|
|
)
|
|
|
|
async def get_state(self, call_sid: str) -> CallState:
|
|
return self._sessions[call_sid].state
|
|
|
|
|
|
# ── SignalWire backend (BSL 1.1) ──────────────────────────────────────────────
|
|
|
|
|
|
class SignalWireBackend:
|
|
"""
|
|
SignalWire outbound telephony (Twilio-compatible REST API).
|
|
|
|
BSL 1.1 — requires paid tier or self-hosted CF SignalWire project.
|
|
|
|
Credentials sourced from environment:
|
|
CF_SW_PROJECT_ID — SignalWire project ID
|
|
CF_SW_AUTH_TOKEN — SignalWire auth token
|
|
CF_SW_SPACE_URL — space URL, e.g. "yourspace.signalwire.com"
|
|
|
|
Requires: pip install cf-voice[signalwire]
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
project_id: str | None = None,
|
|
auth_token: str | None = None,
|
|
space_url: str | None = None,
|
|
) -> None:
|
|
try:
|
|
from signalwire.rest import Client as SWClient # type: ignore[import]
|
|
except ImportError as exc:
|
|
raise ImportError(
|
|
"SignalWire SDK is required for SignalWireBackend. "
|
|
"Install with: pip install cf-voice[signalwire]"
|
|
) from exc
|
|
|
|
self._project_id = project_id or os.environ["CF_SW_PROJECT_ID"]
|
|
self._auth_token = auth_token or os.environ["CF_SW_AUTH_TOKEN"]
|
|
self._space_url = space_url or os.environ["CF_SW_SPACE_URL"]
|
|
self._client = SWClient(
|
|
self._project_id,
|
|
self._auth_token,
|
|
signalwire_space_url=self._space_url,
|
|
)
|
|
self._loop = asyncio.get_event_loop()
|
|
|
|
async def dial(
|
|
self,
|
|
to: str,
|
|
from_: str,
|
|
webhook_url: str,
|
|
*,
|
|
amd: bool = False,
|
|
) -> CallSession:
|
|
call_kwargs: dict = dict(
|
|
to=to,
|
|
from_=from_,
|
|
url=webhook_url,
|
|
status_callback=webhook_url,
|
|
)
|
|
if amd:
|
|
call_kwargs["machine_detection"] = "Enable"
|
|
call_kwargs["async_amd"] = True
|
|
|
|
call = await asyncio.get_event_loop().run_in_executor(
|
|
None,
|
|
lambda: self._client.calls.create(**call_kwargs),
|
|
)
|
|
return CallSession(
|
|
call_sid=call.sid,
|
|
to=to,
|
|
from_=from_,
|
|
state="dialing",
|
|
)
|
|
|
|
async def send_dtmf(self, call_sid: str, digits: str) -> None:
|
|
await asyncio.get_event_loop().run_in_executor(
|
|
None,
|
|
lambda: self._client.calls(call_sid).update(
|
|
twiml=f"<Response><Play digits='{digits}'/></Response>"
|
|
),
|
|
)
|
|
|
|
async def bridge(self, call_sid: str, target: str) -> None:
|
|
await asyncio.get_event_loop().run_in_executor(
|
|
None,
|
|
lambda: self._client.calls(call_sid).update(
|
|
twiml=(
|
|
f"<Response><Dial><Number>{target}</Number></Dial></Response>"
|
|
)
|
|
),
|
|
)
|
|
|
|
async def hangup(self, call_sid: str) -> None:
|
|
await asyncio.get_event_loop().run_in_executor(
|
|
None,
|
|
lambda: self._client.calls(call_sid).update(status="completed"),
|
|
)
|
|
|
|
async def announce(
|
|
self,
|
|
call_sid: str,
|
|
text: str,
|
|
voice: str = "alice",
|
|
) -> None:
|
|
await asyncio.get_event_loop().run_in_executor(
|
|
None,
|
|
lambda: self._client.calls(call_sid).update(
|
|
twiml=f"<Response><Say voice='{voice}'>{text}</Say></Response>"
|
|
),
|
|
)
|
|
|
|
async def get_state(self, call_sid: str) -> CallState:
|
|
call = await asyncio.get_event_loop().run_in_executor(
|
|
None,
|
|
lambda: self._client.calls(call_sid).fetch(),
|
|
)
|
|
_sw_map: dict[str, CallState] = {
|
|
"queued": "dialing", "ringing": "ringing", "in-progress": "in_progress",
|
|
"completed": "completed", "failed": "failed", "busy": "busy",
|
|
"no-answer": "no_answer",
|
|
}
|
|
return _sw_map.get(call.status, "failed")
|
|
|
|
|
|
# ── FreeSWITCH backend (BSL 1.1) ─────────────────────────────────────────────
|
|
|
|
|
|
class FreeSWITCHBackend:
|
|
"""
|
|
Self-hosted FreeSWITCH outbound telephony via ESL (event socket layer).
|
|
|
|
BSL 1.1 — requires free tier + user-provisioned FreeSWITCH + VoIP.ms SIP trunk.
|
|
|
|
Credentials sourced from environment:
|
|
CF_ESL_HOST — FreeSWITCH ESL host (default: 127.0.0.1)
|
|
CF_ESL_PORT — FreeSWITCH ESL port (default: 8021)
|
|
CF_ESL_PASSWORD — FreeSWITCH ESL password
|
|
|
|
Requires: pip install cf-voice[freeswitch]
|
|
|
|
Note: FreeSWITCH AMD (mod_vad + custom heuristic or Whisper pipe) is not
|
|
yet implemented. The amd parameter is accepted but amd_result stays "unknown".
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
host: str | None = None,
|
|
port: int | None = None,
|
|
password: str | None = None,
|
|
) -> None:
|
|
try:
|
|
import ESL # type: ignore[import]
|
|
except ImportError as exc:
|
|
raise ImportError(
|
|
"FreeSWITCH ESL bindings are required for FreeSWITCHBackend. "
|
|
"Install with: pip install cf-voice[freeswitch]"
|
|
) from exc
|
|
|
|
self._host = host or os.environ.get("CF_ESL_HOST", "127.0.0.1")
|
|
self._port = int(port or os.environ.get("CF_ESL_PORT", 8021))
|
|
self._password = password or os.environ["CF_ESL_PASSWORD"]
|
|
self._esl = ESL
|
|
|
|
def _connect(self):
|
|
conn = self._esl.ESLconnection(self._host, str(self._port), self._password)
|
|
if not conn.connected():
|
|
raise RuntimeError(
|
|
f"Could not connect to FreeSWITCH ESL at {self._host}:{self._port}"
|
|
)
|
|
return conn
|
|
|
|
async def dial(
|
|
self,
|
|
to: str,
|
|
from_: str,
|
|
webhook_url: str,
|
|
*,
|
|
amd: bool = False,
|
|
) -> CallSession:
|
|
def _originate() -> str:
|
|
conn = self._connect()
|
|
# ESL originate: sofia/gateway/voipms/{to} {from_} XML default
|
|
cmd = (
|
|
f"originate {{origination_caller_id_number={from_},"
|
|
f"origination_caller_id_name=CircuitForge}}"
|
|
f"sofia/gateway/voipms/{to.lstrip('+')} &park()"
|
|
)
|
|
result = conn.api("originate", cmd)
|
|
return result.getBody().strip()
|
|
|
|
body = await asyncio.get_event_loop().run_in_executor(None, _originate)
|
|
# FreeSWITCH returns "+OK <uuid>" on success
|
|
if not body.startswith("+OK"):
|
|
raise RuntimeError(f"FreeSWITCH originate failed: {body}")
|
|
uuid = body.removeprefix("+OK").strip()
|
|
return CallSession(call_sid=uuid, to=to, from_=from_, state="dialing")
|
|
|
|
async def send_dtmf(self, call_sid: str, digits: str) -> None:
|
|
def _dtmf() -> None:
|
|
conn = self._connect()
|
|
conn.api("uuid_send_dtmf", f"{call_sid} {digits}")
|
|
|
|
await asyncio.get_event_loop().run_in_executor(None, _dtmf)
|
|
|
|
async def bridge(self, call_sid: str, target: str) -> None:
|
|
def _bridge() -> None:
|
|
conn = self._connect()
|
|
conn.api(
|
|
"uuid_bridge",
|
|
f"{call_sid} sofia/gateway/voipms/{target.lstrip('+')}",
|
|
)
|
|
|
|
await asyncio.get_event_loop().run_in_executor(None, _bridge)
|
|
|
|
async def hangup(self, call_sid: str) -> None:
|
|
def _hangup() -> None:
|
|
conn = self._connect()
|
|
conn.api("uuid_kill", call_sid)
|
|
|
|
await asyncio.get_event_loop().run_in_executor(None, _hangup)
|
|
|
|
async def announce(
|
|
self,
|
|
call_sid: str,
|
|
text: str,
|
|
voice: str = "default",
|
|
) -> None:
|
|
# FreeSWITCH TTS via mod_tts_commandline or Piper pipe
|
|
def _say() -> None:
|
|
conn = self._connect()
|
|
conn.api("uuid_broadcast", f"{call_sid} say::en CHAT SPOKEN {text}")
|
|
|
|
await asyncio.get_event_loop().run_in_executor(None, _say)
|
|
|
|
async def get_state(self, call_sid: str) -> CallState:
|
|
def _fetch() -> str:
|
|
conn = self._connect()
|
|
return conn.api("uuid_getvar", f"{call_sid} call_state").getBody().strip()
|
|
|
|
raw = await asyncio.get_event_loop().run_in_executor(None, _fetch)
|
|
_fs_map: dict[str, CallState] = {
|
|
"CS_INIT": "dialing", "CS_ROUTING": "ringing",
|
|
"CS_EXECUTE": "in_progress", "CS_HANGUP": "completed",
|
|
"CS_DESTROY": "completed",
|
|
}
|
|
return _fs_map.get(raw, "failed")
|
|
|
|
|
|
# ── Factory ───────────────────────────────────────────────────────────────────
|
|
|
|
|
|
def make_telephony(
|
|
mock: bool | None = None,
|
|
backend: str | None = None,
|
|
) -> MockTelephonyBackend | SignalWireBackend | FreeSWITCHBackend:
|
|
"""
|
|
Factory: return a TelephonyBackend appropriate for the current environment.
|
|
|
|
Resolution order:
|
|
1. mock=True or CF_VOICE_MOCK=1 → MockTelephonyBackend
|
|
2. backend="signalwire" or CF_SW_PROJECT_ID present → SignalWireBackend
|
|
3. backend="freeswitch" or CF_ESL_PASSWORD present → FreeSWITCHBackend
|
|
4. Raises RuntimeError — no usable backend configured
|
|
|
|
In production, backend selection is driven by the tier system:
|
|
Free tier → FreeSWITCHBackend (BYOK VoIP)
|
|
Paid tier → SignalWireBackend (CF-provisioned)
|
|
"""
|
|
use_mock = mock if mock is not None else os.environ.get("CF_VOICE_MOCK", "") == "1"
|
|
if use_mock:
|
|
return MockTelephonyBackend()
|
|
|
|
resolved_backend = backend or (
|
|
"signalwire" if os.environ.get("CF_SW_PROJECT_ID") else
|
|
"freeswitch" if os.environ.get("CF_ESL_PASSWORD") else
|
|
None
|
|
)
|
|
|
|
if resolved_backend == "signalwire":
|
|
return SignalWireBackend()
|
|
|
|
if resolved_backend == "freeswitch":
|
|
return FreeSWITCHBackend()
|
|
|
|
raise RuntimeError(
|
|
"No telephony backend configured. "
|
|
"Set CF_VOICE_MOCK=1 for mock mode, or provide SignalWire / FreeSWITCH credentials."
|
|
)
|