"""Corpus-management web UI (FastAPI).
Pages:
/ dashboard — corpus + pipeline counts at a glance
/corpus full source selection (companies + podcasts) + "add source" form
/corpus/add POST handler (manual urlencoded parse → no python-multipart dependency)
/source/{id} per-source detail: documents + extracted claims (inspect the signal)
"""
from __future__ import annotations
import html
import re
import sqlite3
from urllib.parse import parse_qs
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse, RedirectResponse
from ..config import load_config
from ..store import db
_CSS = """
body{font:14px/1.5 -apple-system,Segoe UI,Roboto,sans-serif;margin:0;background:#0f1115;color:#e6e6e6}
header{background:#161a22;padding:12px 20px;border-bottom:1px solid #2a2f3a}
header a{color:#7aa2f7;text-decoration:none;margin-right:18px;font-weight:600}
main{padding:20px;max-width:1100px;margin:0 auto}
h1{font-size:20px}h2{font-size:16px;margin-top:28px;color:#9aa5b1}
table{border-collapse:collapse;width:100%;margin:10px 0}
th,td{text-align:left;padding:6px 10px;border-bottom:1px solid #232833;font-size:13px}
th{color:#9aa5b1;font-weight:600}
tr:hover td{background:#161a22}
.tag{display:inline-block;padding:1px 7px;border-radius:10px;background:#232833;font-size:11px;color:#aab}
.cards{display:flex;gap:14px;flex-wrap:wrap}
.card{background:#161a22;border:1px solid #2a2f3a;border-radius:8px;padding:14px 18px;min-width:130px}
.card .n{font-size:24px;font-weight:700;color:#7aa2f7}.card .l{color:#9aa5b1;font-size:12px}
form{background:#161a22;border:1px solid #2a2f3a;border-radius:8px;padding:16px;margin:14px 0}
label{display:block;margin:8px 0 2px;color:#9aa5b1;font-size:12px}
input,select{background:#0f1115;border:1px solid #2a2f3a;color:#e6e6e6;border-radius:5px;padding:6px 8px;width:240px}
button{background:#7aa2f7;color:#0f1115;border:0;border-radius:6px;padding:8px 16px;font-weight:700;cursor:pointer;margin-top:12px}
a{color:#7aa2f7}.muted{color:#6b7280;font-size:12px}
"""
_CLUSTERS = ["macro", "ai_tech", "energy", "bitcoin", "vc_consensus", "generalist"]
_KINDS = ["podcast", "youtube", "filing", "earnings_call"]
_ROLES = ["none", "CB", "IND", "DX"]
def _page(title: str, body: str) -> HTMLResponse:
nav = ('DashboardCorpus'
'Ten31 Signal Engine')
doc = f"
{html.escape(title)}" \
f"{nav}{body}"
return HTMLResponse(doc)
def _slug(s: str) -> str:
return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:40] or "src"
def create_app() -> FastAPI:
cfg = load_config()
app = FastAPI(title="Ten31 Signal Engine")
def conn() -> sqlite3.Connection:
c = db.connect(cfg.db_path)
db.init_db(c)
return c
@app.get("/", response_class=HTMLResponse)
def dashboard() -> HTMLResponse:
c = conn()
def scalar(q, *a):
r = c.execute(q, a).fetchone()
return r[0] if r else 0
cards = {
"Sources": scalar("SELECT COUNT(*) FROM sources"),
"Documents": scalar("SELECT COUNT(*) FROM documents"),
"Claims": scalar("SELECT COUNT(*) FROM claims"),
"Embedded": scalar("SELECT COUNT(*) FROM claims WHERE qdrant_point_id IS NOT NULL"),
"Convictions": scalar("SELECT COUNT(*) FROM conviction_log"),
"Ledger": scalar("SELECT COUNT(*) FROM ledger"),
}
cards_html = "".join(f'
{v}
{k}
'
for k, v in cards.items())
# breakdowns
def rows(q):
return "".join(f"
{html.escape(str(a))}
{b}
" for a, b in c.execute(q))
claims_by_type = rows("SELECT claim_type, COUNT(*) FROM claims GROUP BY claim_type ORDER BY 2 DESC")
claims_by_seam = rows("SELECT thesis_seam, COUNT(*) FROM claims GROUP BY thesis_seam ORDER BY 2 DESC")
queue = rows("SELECT job_type||' / '||state, COUNT(*) FROM backfill_jobs GROUP BY 1 ORDER BY 1")
c.close()
body = f"""
Dashboard
{cards_html}
Claims by type
type
n
{claims_by_type or '
none yet
'}
Claims by thesis seam
seam
n
{claims_by_seam or '
none yet
'}
Backfill queue
type / state
n
{queue or '
empty
'}
"""
return _page("Dashboard", body)
@app.get("/corpus", response_class=HTMLResponse)
def corpus() -> HTMLResponse:
c = conn()
srcs = c.execute("""
SELECT s.*,
(SELECT COUNT(*) FROM documents d WHERE d.source_id=s.source_id) docs,
(SELECT COUNT(*) FROM claims cl WHERE cl.source_id=s.source_id) claims
FROM sources s ORDER BY s.kind, s.source_id""").fetchall()
c.close()
def row(s):
extra = s["ticker"] or s["backtest_2022_2023"] or ""
return (f"
")
table = "".join(row(s) for s in srcs)
opt = lambda xs: "".join(f"" for x in xs)
form = f""""""
body = f"""
Corpus ({len(srcs)} sources)
{form}
name
kind
cluster
role
ticker / backtest
docs
claims
{table}
"""
return _page("Corpus", body)
@app.post("/corpus/add")
async def corpus_add(request: Request):
raw = (await request.body()).decode()
f = {k: v[0].strip() for k, v in parse_qs(raw).items() if v and v[0].strip()}
name = f.get("name")
if not name:
return RedirectResponse("/corpus", status_code=303)
kind = f.get("kind", "podcast")
ticker = f.get("ticker")
sid = f"co-{ticker.lower()}" if ticker else f"{'pod' if kind in ('podcast','youtube') else kind}-{_slug(name)}"
c = conn()
c.execute("""INSERT OR IGNORE INTO sources
(source_id, name, kind, source_cluster, role, ticker, rss_url, channel_url)
VALUES (?,?,?,?,?,?,?,?)""",
(sid, name, kind, f.get("cluster"), f.get("role", "none"),
ticker.upper() if ticker else None, f.get("rss_url"), f.get("channel_url")))
c.commit()
c.close()
return RedirectResponse("/corpus", status_code=303)
@app.get("/source/{source_id}", response_class=HTMLResponse)
def source_detail(source_id: str) -> HTMLResponse:
c = conn()
s = c.execute("SELECT * FROM sources WHERE source_id=?", (source_id,)).fetchone()
if not s:
c.close()
return _page("Not found", "
Source not found
")
claims = c.execute("""SELECT proposition, claim_type, time_horizon, thesis_seam, topic_canonical,
engages_consensus, date FROM claims WHERE source_id=?
ORDER BY date DESC LIMIT 200""", (source_id,)).fetchall()
c.close()
def crow(cl):
star = " ⚔" if cl["engages_consensus"] else ""
return (f"
{cl['date'] or ''}
{cl['claim_type']}
"
f"
{cl['thesis_seam']}
{html.escape(cl['topic_canonical'] or '')}
"
f"
{html.escape(cl['proposition'])}{star}
")
rows = "".join(crow(cl) for cl in claims) or '
no claims extracted yet
'
meta = f"{s['kind']} cluster={s['source_cluster'] or '-'} role={s['role'] or '-'}"
if s["ticker"]:
meta += f" ticker={s['ticker']}"
if s["backtest_2022_2023"]:
meta += f" · backtest={s['backtest_2022_2023']}"
body = f"""