180 lines
9.4 KiB
Python
180 lines
9.4 KiB
Python
"""Corpus-management web UI (FastAPI).
|
|
|
|
Pages:
|
|
/ dashboard — corpus + pipeline counts at a glance
|
|
/corpus full source selection (companies + podcasts) + "add source" form
|
|
/corpus/add POST handler (manual urlencoded parse → no python-multipart dependency)
|
|
/source/{id} per-source detail: documents + extracted claims (inspect the signal)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import html
|
|
import re
|
|
import sqlite3
|
|
from urllib.parse import parse_qs
|
|
|
|
from fastapi import FastAPI, Request
|
|
from fastapi.responses import HTMLResponse, RedirectResponse
|
|
|
|
from ..config import load_config
|
|
from ..store import db
|
|
|
|
_CSS = """
|
|
body{font:14px/1.5 -apple-system,Segoe UI,Roboto,sans-serif;margin:0;background:#0f1115;color:#e6e6e6}
|
|
header{background:#161a22;padding:12px 20px;border-bottom:1px solid #2a2f3a}
|
|
header a{color:#7aa2f7;text-decoration:none;margin-right:18px;font-weight:600}
|
|
main{padding:20px;max-width:1100px;margin:0 auto}
|
|
h1{font-size:20px}h2{font-size:16px;margin-top:28px;color:#9aa5b1}
|
|
table{border-collapse:collapse;width:100%;margin:10px 0}
|
|
th,td{text-align:left;padding:6px 10px;border-bottom:1px solid #232833;font-size:13px}
|
|
th{color:#9aa5b1;font-weight:600}
|
|
tr:hover td{background:#161a22}
|
|
.tag{display:inline-block;padding:1px 7px;border-radius:10px;background:#232833;font-size:11px;color:#aab}
|
|
.cards{display:flex;gap:14px;flex-wrap:wrap}
|
|
.card{background:#161a22;border:1px solid #2a2f3a;border-radius:8px;padding:14px 18px;min-width:130px}
|
|
.card .n{font-size:24px;font-weight:700;color:#7aa2f7}.card .l{color:#9aa5b1;font-size:12px}
|
|
form{background:#161a22;border:1px solid #2a2f3a;border-radius:8px;padding:16px;margin:14px 0}
|
|
label{display:block;margin:8px 0 2px;color:#9aa5b1;font-size:12px}
|
|
input,select{background:#0f1115;border:1px solid #2a2f3a;color:#e6e6e6;border-radius:5px;padding:6px 8px;width:240px}
|
|
button{background:#7aa2f7;color:#0f1115;border:0;border-radius:6px;padding:8px 16px;font-weight:700;cursor:pointer;margin-top:12px}
|
|
a{color:#7aa2f7}.muted{color:#6b7280;font-size:12px}
|
|
"""
|
|
|
|
_CLUSTERS = ["macro", "ai_tech", "energy", "bitcoin", "vc_consensus", "generalist"]
|
|
_KINDS = ["podcast", "youtube", "filing", "earnings_call"]
|
|
_ROLES = ["none", "CB", "IND", "DX"]
|
|
|
|
|
|
def _page(title: str, body: str) -> HTMLResponse:
|
|
nav = ('<header><a href="/">Dashboard</a><a href="/corpus">Corpus</a>'
|
|
'<span class="muted">Ten31 Signal Engine</span></header>')
|
|
doc = f"<!doctype html><html><head><meta charset=utf-8><title>{html.escape(title)}</title>" \
|
|
f"<style>{_CSS}</style></head><body>{nav}<main>{body}</main></body></html>"
|
|
return HTMLResponse(doc)
|
|
|
|
|
|
def _slug(s: str) -> str:
|
|
return re.sub(r"[^a-z0-9]+", "-", s.lower()).strip("-")[:40] or "src"
|
|
|
|
|
|
def create_app() -> FastAPI:
|
|
cfg = load_config()
|
|
app = FastAPI(title="Ten31 Signal Engine")
|
|
|
|
def conn() -> sqlite3.Connection:
|
|
c = db.connect(cfg.db_path)
|
|
db.init_db(c)
|
|
return c
|
|
|
|
@app.get("/", response_class=HTMLResponse)
|
|
def dashboard() -> HTMLResponse:
|
|
c = conn()
|
|
def scalar(q, *a):
|
|
r = c.execute(q, a).fetchone()
|
|
return r[0] if r else 0
|
|
cards = {
|
|
"Sources": scalar("SELECT COUNT(*) FROM sources"),
|
|
"Documents": scalar("SELECT COUNT(*) FROM documents"),
|
|
"Claims": scalar("SELECT COUNT(*) FROM claims"),
|
|
"Embedded": scalar("SELECT COUNT(*) FROM claims WHERE qdrant_point_id IS NOT NULL"),
|
|
"Convictions": scalar("SELECT COUNT(*) FROM conviction_log"),
|
|
"Ledger": scalar("SELECT COUNT(*) FROM ledger"),
|
|
}
|
|
cards_html = "".join(f'<div class="card"><div class="n">{v}</div><div class="l">{k}</div></div>'
|
|
for k, v in cards.items())
|
|
# breakdowns
|
|
def rows(q):
|
|
return "".join(f"<tr><td>{html.escape(str(a))}</td><td>{b}</td></tr>" for a, b in c.execute(q))
|
|
claims_by_type = rows("SELECT claim_type, COUNT(*) FROM claims GROUP BY claim_type ORDER BY 2 DESC")
|
|
claims_by_seam = rows("SELECT thesis_seam, COUNT(*) FROM claims GROUP BY thesis_seam ORDER BY 2 DESC")
|
|
queue = rows("SELECT job_type||' / '||state, COUNT(*) FROM backfill_jobs GROUP BY 1 ORDER BY 1")
|
|
c.close()
|
|
body = f"""<h1>Dashboard</h1><div class="cards">{cards_html}</div>
|
|
<h2>Claims by type</h2><table><tr><th>type</th><th>n</th></tr>{claims_by_type or '<tr><td class=muted colspan=2>none yet</td></tr>'}</table>
|
|
<h2>Claims by thesis seam</h2><table><tr><th>seam</th><th>n</th></tr>{claims_by_seam or '<tr><td class=muted colspan=2>none yet</td></tr>'}</table>
|
|
<h2>Backfill queue</h2><table><tr><th>type / state</th><th>n</th></tr>{queue or '<tr><td class=muted colspan=2>empty</td></tr>'}</table>"""
|
|
return _page("Dashboard", body)
|
|
|
|
@app.get("/corpus", response_class=HTMLResponse)
|
|
def corpus() -> HTMLResponse:
|
|
c = conn()
|
|
srcs = c.execute("""
|
|
SELECT s.*,
|
|
(SELECT COUNT(*) FROM documents d WHERE d.source_id=s.source_id) docs,
|
|
(SELECT COUNT(*) FROM claims cl WHERE cl.source_id=s.source_id) claims
|
|
FROM sources s ORDER BY s.kind, s.source_id""").fetchall()
|
|
c.close()
|
|
|
|
def row(s):
|
|
extra = s["ticker"] or s["backtest_2022_2023"] or ""
|
|
return (f"<tr><td><a href='/source/{html.escape(s['source_id'])}'>{html.escape(s['name'])}</a></td>"
|
|
f"<td><span class=tag>{s['kind']}</span></td><td>{s['source_cluster'] or ''}</td>"
|
|
f"<td>{s['role'] or ''}</td><td>{html.escape(str(extra))}</td>"
|
|
f"<td>{s['docs']}</td><td>{s['claims']}</td></tr>")
|
|
table = "".join(row(s) for s in srcs)
|
|
opt = lambda xs: "".join(f"<option>{x}</option>" for x in xs)
|
|
form = f"""<form method=post action="/corpus/add">
|
|
<strong>Add to corpus</strong>
|
|
<label>Name</label><input name=name required placeholder="NVIDIA / Odd Lots">
|
|
<label>Kind</label><select name=kind>{opt(_KINDS)}</select>
|
|
<label>Cluster</label><select name=cluster>{opt(_CLUSTERS)}</select>
|
|
<label>Role</label><select name=role>{opt(_ROLES)}</select>
|
|
<label>Ticker (companies)</label><input name=ticker placeholder="NVDA">
|
|
<label>RSS URL (podcasts)</label><input name=rss_url placeholder="https://...">
|
|
<label>YouTube channel</label><input name=channel_url placeholder="https://youtube.com/@...">
|
|
<button type=submit>Add source</button>
|
|
</form>"""
|
|
body = f"""<h1>Corpus ({len(srcs)} sources)</h1>{form}
|
|
<table><tr><th>name</th><th>kind</th><th>cluster</th><th>role</th><th>ticker / backtest</th><th>docs</th><th>claims</th></tr>{table}</table>"""
|
|
return _page("Corpus", body)
|
|
|
|
@app.post("/corpus/add")
|
|
async def corpus_add(request: Request):
|
|
raw = (await request.body()).decode()
|
|
f = {k: v[0].strip() for k, v in parse_qs(raw).items() if v and v[0].strip()}
|
|
name = f.get("name")
|
|
if not name:
|
|
return RedirectResponse("/corpus", status_code=303)
|
|
kind = f.get("kind", "podcast")
|
|
ticker = f.get("ticker")
|
|
sid = f"co-{ticker.lower()}" if ticker else f"{'pod' if kind in ('podcast','youtube') else kind}-{_slug(name)}"
|
|
c = conn()
|
|
c.execute("""INSERT OR IGNORE INTO sources
|
|
(source_id, name, kind, source_cluster, role, ticker, rss_url, channel_url)
|
|
VALUES (?,?,?,?,?,?,?,?)""",
|
|
(sid, name, kind, f.get("cluster"), f.get("role", "none"),
|
|
ticker.upper() if ticker else None, f.get("rss_url"), f.get("channel_url")))
|
|
c.commit()
|
|
c.close()
|
|
return RedirectResponse("/corpus", status_code=303)
|
|
|
|
@app.get("/source/{source_id}", response_class=HTMLResponse)
|
|
def source_detail(source_id: str) -> HTMLResponse:
|
|
c = conn()
|
|
s = c.execute("SELECT * FROM sources WHERE source_id=?", (source_id,)).fetchone()
|
|
if not s:
|
|
c.close()
|
|
return _page("Not found", "<h1>Source not found</h1>")
|
|
claims = c.execute("""SELECT proposition, claim_type, time_horizon, thesis_seam, topic_canonical,
|
|
engages_consensus, date FROM claims WHERE source_id=?
|
|
ORDER BY date DESC LIMIT 200""", (source_id,)).fetchall()
|
|
c.close()
|
|
def crow(cl):
|
|
star = " ⚔" if cl["engages_consensus"] else ""
|
|
return (f"<tr><td>{cl['date'] or ''}</td><td><span class=tag>{cl['claim_type']}</span></td>"
|
|
f"<td>{cl['thesis_seam']}</td><td>{html.escape(cl['topic_canonical'] or '')}</td>"
|
|
f"<td>{html.escape(cl['proposition'])}{star}</td></tr>")
|
|
rows = "".join(crow(cl) for cl in claims) or '<tr><td class=muted colspan=5>no claims extracted yet</td></tr>'
|
|
meta = f"<span class=tag>{s['kind']}</span> cluster={s['source_cluster'] or '-'} role={s['role'] or '-'}"
|
|
if s["ticker"]:
|
|
meta += f" ticker={s['ticker']}"
|
|
if s["backtest_2022_2023"]:
|
|
meta += f" · backtest={s['backtest_2022_2023']}"
|
|
body = f"""<h1>{html.escape(s['name'])}</h1><p>{meta}</p>
|
|
<p class=muted>{html.escape(s['notes'] or '')}</p>
|
|
<h2>Claims ({len(claims)}) <span class=muted>⚔ = engages consensus</span></h2>
|
|
<table><tr><th>date</th><th>type</th><th>seam</th><th>topic</th><th>proposition</th></tr>{rows}</table>"""
|
|
return _page(s["name"], body)
|
|
|
|
return app
|