"""Earnings-call transcripts via Financial Modeling Prep (§4.1, §12 — decision: FMP). Audio isn't reliably fetchable for large-caps (no uniform feed; ~30–90d replay expiry breaks backfill), so FMP's transcript API is the backbone and EDGAR filings remain the durable core. FMP also exposes an earnings *calendar* to trigger ingestion on the day a call drops. Endpoint paths/params are marked TODO(contract): confirm against the FMP 'stable' docs for the account tier at integration. Needs config.fmp_api_key. """ from __future__ import annotations import hashlib import sqlite3 from pathlib import Path from typing import Any import requests FMP_BASE = "https://financialmodelingprep.com/stable" class FMPClient: def __init__(self, api_key: str, *, base: str = FMP_BASE, timeout: int = 30) -> None: if not api_key: raise ValueError("FMP_API_KEY is required for earnings-call transcripts") self.api_key = api_key self.base = base self.timeout = timeout self.s = requests.Session() def _get(self, path: str, **params: Any) -> Any: params["apikey"] = self.api_key r = self.s.get(f"{self.base}/{path}", params=params, timeout=self.timeout) r.raise_for_status() return r.json() # Confirmed against FMP 'stable' 2026-06-07 (v3 is legacy/403). Note singular "earning". def transcript_dates(self, symbol: str) -> Any: """List available transcripts: [{quarter, fiscalYear, date}, ...].""" return self._get("earning-call-transcript-dates", symbol=symbol) def transcript(self, symbol: str, *, year: int, quarter: int) -> Any: """One transcript: [{symbol, period, year, date, content}]. Use the `date` field as the document date — FMP's year/quarter labels are fiscal and can be offset from the call date.""" return self._get("earning-call-transcript", symbol=symbol, year=year, quarter=quarter) def earnings_calendar(self, *, from_date: str, to_date: str) -> Any: """Earnings calendar (ingestion trigger): [{symbol, date, epsActual, ...}, ...].""" return self._get("earnings-calendar", **{"from": from_date, "to": to_date}) def ingest_transcript( conn: sqlite3.Connection, *, source_id: str, symbol: str, year: int, quarter: int, content: str, date: str | None, data_dir: Path, prompt_version: str = "extract-v0", ) -> tuple[bool, bool]: """Store one transcript (content written to disk → transcript_path) and enqueue an 'extract' job. Idempotent. Returns (new_document, new_job).""" from ..backfill import queue external_id = f"{symbol}-{year}Q{quarter}" doc_id = f"earnings:{external_id}" tdir = Path(data_dir) / "transcripts" tdir.mkdir(parents=True, exist_ok=True) tpath = tdir / f"{external_id}.txt" tpath.write_text(content) content_hash = hashlib.sha256(content.encode()).hexdigest() cur = conn.execute( """INSERT OR IGNORE INTO documents (doc_id, source_id, kind, external_id, title, date, transcript_path, content_hash, processed_at) VALUES (?,?,?,?,?,?,?,?, datetime('now'))""", (doc_id, source_id, "earnings_call", external_id, f"{symbol} {year} Q{quarter} call", date, str(tpath), content_hash), ) conn.commit() if not cur.rowcount: return (False, False) # earnings-call Q&A is the highest-yield text source (§4.1) → priority 40, ahead of filings (50). h = hashlib.sha256(f"{doc_id}|{prompt_version}".encode()).hexdigest() new_job = queue.enqueue(conn, job_type="extract", target_id=doc_id, input_hash=h, parent_doc_id=doc_id, priority=40) is not None return (True, new_job) def ingest_for_ticker( conn: sqlite3.Connection, fmp: FMPClient, *, source_id: str, symbol: str, data_dir: Path, since: str | None = None, until: str | None = None, limit: int = 8, ) -> tuple[int, int]: """Enumerate available transcripts via the dates index, fetch those in [since, until], and ingest. Uses each transcript's own `date` (FMP fiscal labels are offset). Returns (docs, jobs).""" dates = fmp.transcript_dates(symbol) picked = [] for d in dates if isinstance(dates, list) else []: dt = d.get("date") if since and dt and dt < since: continue if until and dt and dt > until: continue picked.append(d) n_docs = n_jobs = 0 for d in picked[:limit]: tr = fmp.transcript(symbol, year=d["fiscalYear"], quarter=d["quarter"]) item = (tr[0] if isinstance(tr, list) and tr else tr) or {} content = item.get("content") or "" if not content: continue nd, nj = ingest_transcript( conn, source_id=source_id, symbol=symbol, year=d["fiscalYear"], quarter=d["quarter"], content=content, date=item.get("date") or d.get("date"), data_dir=data_dir, ) n_docs += int(nd) n_jobs += int(nj) return n_docs, n_jobs