"""Corpus-management web UI (FastAPI). Pages: / dashboard — corpus + pipeline counts at a glance /corpus full source selection (companies + podcasts) + "add source" form /corpus/add POST handler (manual urlencoded parse → no python-multipart dependency) /source/{id} per-source detail: documents + extracted claims (inspect the signal) """ from __future__ import annotations import html import re import sqlite3 from urllib.parse import parse_qs from fastapi import FastAPI, Request from fastapi.responses import HTMLResponse, RedirectResponse from ..config import load_config from ..store import db _CSS = """ body{font:14px/1.5 -apple-system,Segoe UI,Roboto,sans-serif;margin:0;background:#0f1115;color:#e6e6e6} header{background:#161a22;padding:12px 20px;border-bottom:1px solid #2a2f3a} header a{color:#7aa2f7;text-decoration:none;margin-right:18px;font-weight:600} main{padding:20px;max-width:1100px;margin:0 auto} h1{font-size:20px}h2{font-size:16px;margin-top:28px;color:#9aa5b1} table{border-collapse:collapse;width:100%;margin:10px 0} th,td{text-align:left;padding:6px 10px;border-bottom:1px solid #232833;font-size:13px} th{color:#9aa5b1;font-weight:600} tr:hover td{background:#161a22} .tag{display:inline-block;padding:1px 7px;border-radius:10px;background:#232833;font-size:11px;color:#aab} .cards{display:flex;gap:14px;flex-wrap:wrap} .card{background:#161a22;border:1px solid #2a2f3a;border-radius:8px;padding:14px 18px;min-width:130px} .card .n{font-size:24px;font-weight:700;color:#7aa2f7}.card .l{color:#9aa5b1;font-size:12px} form{background:#161a22;border:1px solid #2a2f3a;border-radius:8px;padding:16px;margin:14px 0} label{display:block;margin:8px 0 2px;color:#9aa5b1;font-size:12px} input,select{background:#0f1115;border:1px solid #2a2f3a;color:#e6e6e6;border-radius:5px;padding:6px 8px;width:240px} button{background:#7aa2f7;color:#0f1115;border:0;border-radius:6px;padding:8px 16px;font-weight:700;cursor:pointer;margin-top:12px} a{color:#7aa2f7}.muted{color:#6b7280;font-size:12px} """ _CLUSTERS = ["macro", "ai_tech", "energy", "bitcoin", "vc_consensus", "generalist"] _KINDS = ["podcast", "youtube", "filing", "earnings_call"] _ROLES = ["none", "CB", "IND", "DX"] def _page(title: str, body: str) -> HTMLResponse: nav = ('
DashboardCorpus' 'Ten31 Signal Engine
') doc = f"{html.escape(title)}" \ f"{nav}
{body}
" return HTMLResponse(doc) def _slug(s: str) -> str: return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:40] or "src" def create_app() -> FastAPI: cfg = load_config() app = FastAPI(title="Ten31 Signal Engine") def conn() -> sqlite3.Connection: c = db.connect(cfg.db_path) db.init_db(c) return c @app.get("/", response_class=HTMLResponse) def dashboard() -> HTMLResponse: c = conn() def scalar(q, *a): r = c.execute(q, a).fetchone() return r[0] if r else 0 cards = { "Sources": scalar("SELECT COUNT(*) FROM sources"), "Documents": scalar("SELECT COUNT(*) FROM documents"), "Claims": scalar("SELECT COUNT(*) FROM claims"), "Embedded": scalar("SELECT COUNT(*) FROM claims WHERE qdrant_point_id IS NOT NULL"), "Convictions": scalar("SELECT COUNT(*) FROM conviction_log"), "Ledger": scalar("SELECT COUNT(*) FROM ledger"), } cards_html = "".join(f'
{v}
{k}
' for k, v in cards.items()) # breakdowns def rows(q): return "".join(f"{html.escape(str(a))}{b}" for a, b in c.execute(q)) claims_by_type = rows("SELECT claim_type, COUNT(*) FROM claims GROUP BY claim_type ORDER BY 2 DESC") claims_by_seam = rows("SELECT thesis_seam, COUNT(*) FROM claims GROUP BY thesis_seam ORDER BY 2 DESC") queue = rows("SELECT job_type||' / '||state, COUNT(*) FROM backfill_jobs GROUP BY 1 ORDER BY 1") c.close() body = f"""

Dashboard

{cards_html}

Claims by type

{claims_by_type or ''}
typen
none yet

Claims by thesis seam

{claims_by_seam or ''}
seamn
none yet

Backfill queue

{queue or ''}
type / staten
empty
""" return _page("Dashboard", body) @app.get("/corpus", response_class=HTMLResponse) def corpus() -> HTMLResponse: c = conn() srcs = c.execute(""" SELECT s.*, (SELECT COUNT(*) FROM documents d WHERE d.source_id=s.source_id) docs, (SELECT COUNT(*) FROM claims cl WHERE cl.source_id=s.source_id) claims FROM sources s ORDER BY s.kind, s.source_id""").fetchall() c.close() def row(s): extra = s["ticker"] or s["backtest_2022_2023"] or "" return (f"{html.escape(s['name'])}" f"{s['kind']}{s['source_cluster'] or ''}" f"{s['role'] or ''}{html.escape(str(extra))}" f"{s['docs']}{s['claims']}") table = "".join(row(s) for s in srcs) opt = lambda xs: "".join(f"" for x in xs) form = f"""
Add to corpus
""" body = f"""

Corpus ({len(srcs)} sources)

{form} {table}
namekindclusterroleticker / backtestdocsclaims
""" return _page("Corpus", body) @app.post("/corpus/add") async def corpus_add(request: Request): raw = (await request.body()).decode() f = {k: v[0].strip() for k, v in parse_qs(raw).items() if v and v[0].strip()} name = f.get("name") if not name: return RedirectResponse("/corpus", status_code=303) kind = f.get("kind", "podcast") ticker = f.get("ticker") sid = f"co-{ticker.lower()}" if ticker else f"{'pod' if kind in ('podcast','youtube') else kind}-{_slug(name)}" c = conn() c.execute("""INSERT OR IGNORE INTO sources (source_id, name, kind, source_cluster, role, ticker, rss_url, channel_url) VALUES (?,?,?,?,?,?,?,?)""", (sid, name, kind, f.get("cluster"), f.get("role", "none"), ticker.upper() if ticker else None, f.get("rss_url"), f.get("channel_url"))) c.commit() c.close() return RedirectResponse("/corpus", status_code=303) @app.get("/source/{source_id}", response_class=HTMLResponse) def source_detail(source_id: str) -> HTMLResponse: c = conn() s = c.execute("SELECT * FROM sources WHERE source_id=?", (source_id,)).fetchone() if not s: c.close() return _page("Not found", "

Source not found

") claims = c.execute("""SELECT proposition, claim_type, time_horizon, thesis_seam, topic_canonical, engages_consensus, date FROM claims WHERE source_id=? ORDER BY date DESC LIMIT 200""", (source_id,)).fetchall() c.close() def crow(cl): star = " ⚔" if cl["engages_consensus"] else "" return (f"{cl['date'] or ''}{cl['claim_type']}" f"{cl['thesis_seam']}{html.escape(cl['topic_canonical'] or '')}" f"{html.escape(cl['proposition'])}{star}") rows = "".join(crow(cl) for cl in claims) or 'no claims extracted yet' meta = f"{s['kind']} cluster={s['source_cluster'] or '-'} role={s['role'] or '-'}" if s["ticker"]: meta += f" ticker={s['ticker']}" if s["backtest_2022_2023"]: meta += f" · backtest={s['backtest_2022_2023']}" body = f"""

{html.escape(s['name'])}

{meta}

{html.escape(s['notes'] or '')}

Claims ({len(claims)}) ⚔ = engages consensus

{rows}
datetypeseamtopicproposition
""" return _page(s["name"], body) return app