Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)

2026-06-15 09:24:29 -05:00
commit a6aec77506
77 changed files with 6263 additions and 0 deletions
@@ -0,0 +1,6 @@
+"""The scoring brain (build blueprint).
+
+Stats/geometry NOMINATE candidates; the frontier model only judges/expands a pre-filtered shortlist
+(§5.1). Every count that feeds a score routes through the independence primitive (EISC), never a raw
+source count (§4.5). Every scorer reads `visible_claims` (as-of filtered), never `claims` directly.
+"""
@@ -0,0 +1,43 @@
+"""As-of harness (§6.6 look-ahead guard).
+
+Every scorer reads the `visible_claims` TEMP VIEW, never `claims` directly: at nomination time only
+claims dated <= as_of are visible, so the backtest can't reward noticing what already happened. The
+view also resolves merged canonical topics (topics.status='merged') to a stable `topic_id`.
+"""
+from __future__ import annotations
+
+import sqlite3
+
+
+class Scorer:
+    """Context manager that binds a run to an as_of date and exposes `visible_claims`.
+
+    mode='backtest' enforces strict as-of discipline; 'forward' is the live pilot. as_of is a
+    controlled ISO date (YYYY-MM-DD) — safe to inline into the view DDL (views can't take params)."""
+
+    def __init__(self, conn: sqlite3.Connection, as_of: str, *, mode: str = "backtest") -> None:
+        self.conn = conn
+        self.as_of = as_of
+        self.mode = mode
+
+    def __enter__(self) -> "Scorer":
+        self.conn.executescript(
+            f"""
+            DROP VIEW IF EXISTS visible_claims;
+            CREATE TEMP VIEW visible_claims AS
+            SELECT c.*,
+              COALESCE((SELECT t.merged_into FROM topics t
+                        WHERE t.topic_canonical = c.topic_canonical AND t.status='merged'),
+                       c.topic_canonical) AS topic_id
+            FROM claims c
+            JOIN documents d ON d.doc_id = c.doc_id
+            WHERE c.date IS NOT NULL AND c.date <= '{self.as_of}';
+            """
+        )
+        return self
+
+    def __exit__(self, *exc) -> None:
+        self.conn.execute("DROP VIEW IF EXISTS visible_claims")
+
+    def count_visible(self) -> int:
+        return self.conn.execute("SELECT COUNT(*) FROM visible_claims").fetchone()[0]
@@ -0,0 +1,49 @@
+"""The quantitative bar (§5.1, §6.6) — the single gate between nomination and the frontier judge.
+
+Two tiers:
+  - evidence bar  → clears hard gates → WRITE A LEDGER ROW (the denominator, §6.6), even if never judged.
+  - promotion bar → also clears the score threshold → goes to the frontier judge.
+
+THE GLOBAL META-RULE (applied to every scorer): no candidate clears on a single source or single
+cluster — EISC_adj >= 2.0 AND K_eff >= 2. This is the §2.1 anti-lonely-outlier law, enforced once.
+"""
+from __future__ import annotations
+
+EISC_FLOOR = 2.0
+KEFF_FLOOR = 2
+
+# Defaults; overridable via the score_thresholds table (so the backtest can sweep without code edits).
+DEFAULT_MIN_SCORE = {"under_acted": 0.3, "emergence": 2.0, "contrarian": 1.5,
+                     "convergence": 2.5, "intersection": 2.0}
+
+
+def _min_score(conn, scorer: str) -> float:
+    if conn is not None:
+        row = conn.execute("SELECT min_score FROM score_thresholds WHERE scorer=?", (scorer,)).fetchone()
+        if row and row[0] is not None:
+            return float(row[0])
+    return DEFAULT_MIN_SCORE.get(scorer, 0.0)
+
+
+def evaluate(scorer: str, result: dict, *, conn=None) -> tuple[bool, bool]:
+    """Returns (cleared_evidence_bar, cleared_promotion_bar)."""
+    if scorer == "under_acted":
+        return _under_acted(result, _min_score(conn, scorer))
+    return (False, False)  # Job A scorers wired with the forward pilot
+
+
+def _under_acted(result: dict, min_score: float) -> tuple[bool, bool]:
+    i = result["inputs"]
+    breaker = bool(i.get("is_breaker"))
+    # §4.4 Job B = "rising INDEPENDENT corroboration". EISC>=2.0 enforces independence (shared-guest +
+    # same-cluster discounting), so this is NOT an isolated point or one-guest echo (§2.1). Cross-cluster
+    # (k_eff>=2) is the §4.5 GOLD for Job A DISCOVERY — NOT a hard gate for Job B corroboration: N
+    # independent energy companies confirming a power thesis is real corroboration. Cross-cluster still
+    # BOOSTS the score (eisc_corrob = eisc_adj includes the xcluster_mult) so cross-cluster ranks first.
+    corroborated = (i.get("n_confirmed", 0) >= 4 and i.get("n_src", 0) >= 2
+                    and i.get("eisc_corrob", 0.0) >= EISC_FLOOR and i.get("a_corrob", 0.0) > 0)
+    conv_ok = breaker or i.get("conviction_weight", 0.0) >= 0.7      # med-high / high
+    expo_ok = breaker or i.get("exposure") in ("none", "lt2")        # genuine exposure gap
+    evidence = corroborated and conv_ok and expo_ok
+    promotion = evidence and result["score"] >= min_score
+    return evidence, promotion
@@ -0,0 +1,86 @@
+"""Pre-registered confusion matrix on the §7.1 derivatives (DESIGN_v2 §1.3).
+
+Measures PRECISION and RECALL, not recall alone. Uses the engine's already-stored candidate_scores
+(cleared_date + whisper_date) × the pre-registered external repricing (resolution.K2023.yaml). Reports
+the matrix at BOTH the cleared level (what the engine fired) and the whisper level (what it saw before
+the independence floor) — the delta is the empirical answer to the gate debate.
+"""
+from __future__ import annotations
+
+import json
+from datetime import datetime
+
+import yaml
+
+from .external import basket_index, fetch_eod, resolve_reprice, runway_at_signal
+
+
+def _engine_dates(conn) -> dict[str, dict]:
+    """For each under_acted node: earliest cleared as_of and earliest whisper as_of (n_conf>=4, a>0)."""
+    rows = conn.execute(
+        "SELECT node_id, conviction_id, as_of, cleared_evidence_bar ev, inputs_json "
+        "FROM candidate_scores WHERE scorer='under_acted'"
+    ).fetchall()
+    out: dict[str, dict] = {}
+    for r in rows:
+        k = r["node_id"] or r["conviction_id"]
+        i = json.loads(r["inputs_json"])
+        d = out.setdefault(k, {"cleared": None, "whisper": None})
+        if r["ev"] and (d["cleared"] is None or r["as_of"] < d["cleared"]):
+            d["cleared"] = r["as_of"]
+        if i.get("n_confirmed", 0) >= 4 and i.get("a_corrob", 0) > 0:
+            if d["whisper"] is None or r["as_of"] < d["whisper"]:
+                d["whisper"] = r["as_of"]
+    return out
+
+
+def _lead_days(repricing_date: str, signal_date: str | None) -> int | None:
+    if not signal_date or not repricing_date:
+        return None
+    return (datetime.strptime(repricing_date, "%Y-%m-%d") - datetime.strptime(signal_date, "%Y-%m-%d")).days
+
+
+def run_confusion(conn, cfg, spec_path: str) -> dict:
+    spec = yaml.safe_load(open(spec_path))
+    w, rule = spec["window"], spec["rule"]
+    engine = _engine_dates(conn)
+    price_cache: dict[str, list] = {}
+
+    rows = []
+    for node, basket in spec["baskets"].items():
+        prices = {}
+        for sym in basket:
+            if sym not in price_cache:
+                price_cache[sym] = fetch_eod(cfg.fmp_api_key, sym, w["start"], w["end"])
+            prices[sym] = price_cache[sym]
+        missing = [s for s in basket if not prices[s]]
+        idx = basket_index(prices)
+        res = resolve_reprice(idx, threshold_pct=rule["threshold_pct"], hold_pct=rule["hold_pct"],
+                              hold_days=rule["hold_days"])
+        ed = engine.get(node, {"cleared": None, "whisper": None})
+        rows.append({
+            "node": node, "basket": basket, "missing": missing,
+            "confirmed": res["confirmed"], "repricing_date": res["repricing_date"], "peak_pct": res["peak_pct"],
+            "cleared_date": ed["cleared"], "whisper_date": ed["whisper"],
+            "lead_cleared": _lead_days(res["repricing_date"], ed["cleared"]) if res["confirmed"] else None,
+            "lead_whisper": _lead_days(res["repricing_date"], ed["whisper"]) if res["confirmed"] else None,
+            # DESIGN_v2.1 Correction A: runway = fraction of the durable move still ahead at signal
+            "runway_cleared": runway_at_signal(idx, ed["cleared"]) if res["confirmed"] else None,
+            "runway_whisper": runway_at_signal(idx, ed["whisper"]) if res["confirmed"] else None,
+        })
+
+    def classify(r, level):
+        fired = bool(r[f"{level}_date"])
+        real = r["confirmed"]
+        return "TP" if (fired and real) else "FP" if (fired and not real) else "FN" if real else "TN"
+
+    def matrix(level):
+        c = {"TP": 0, "FP": 0, "FN": 0, "TN": 0}
+        for r in rows:
+            c[classify(r, level)] += 1
+        p = c["TP"] / (c["TP"] + c["FP"]) if (c["TP"] + c["FP"]) else None
+        rec = c["TP"] / (c["TP"] + c["FN"]) if (c["TP"] + c["FN"]) else None
+        return c, p, rec
+
+    return {"rows": rows, "cleared": matrix("cleared"), "whisper": matrix("whisper"),
+            "classify": classify}
@@ -0,0 +1,96 @@
+"""External-confirmation data for the resolver (DESIGN_v2 §1). Price series via FMP (already paid for).
+
+This is the *resolving* leg (§6.2): real-world repricing, not discourse. Kept deliberately simple and
+transparent — the resolution rule is pre-registered, so the code here only fetches + applies it.
+"""
+from __future__ import annotations
+
+import requests
+
+_FMP = "https://financialmodelingprep.com"
+
+
+def fetch_eod(api_key: str, symbol: str, start: str, end: str) -> list[tuple[str, float]]:
+    """Daily (date, close) for a symbol. Tries the FMP 'stable' then legacy 'v3' price endpoints."""
+    s = requests.Session()
+    attempts = [
+        (f"{_FMP}/stable/historical-price-eod/full", {"symbol": symbol, "from": start, "to": end}),
+        (f"{_FMP}/api/v3/historical-price-full/{symbol}", {"from": start, "to": end}),
+    ]
+    for url, params in attempts:
+        try:
+            r = s.get(url, params={**params, "apikey": api_key}, timeout=40)
+            if r.status_code != 200:
+                continue
+            j = r.json()
+        except Exception:  # noqa: BLE001
+            continue
+        rows = j.get("historical") if isinstance(j, dict) else j
+        if not rows:
+            continue
+        out = [(x["date"][:10], x.get("close") or x.get("adjClose")) for x in rows
+               if x.get("date") and (x.get("close") or x.get("adjClose"))]
+        if out:
+            return sorted(out)
+    return []
+
+
+def basket_index(prices_by_symbol: dict[str, list[tuple[str, float]]]) -> list[tuple[str, float]]:
+    """Equal-weight, each-symbol-normalized-to-its-own-first-close index, averaged over dates where
+    data exists. (Symbols that IPO'd mid-window enter at 1.0 when they start — flagged by the caller.)"""
+    norm = {}
+    for sym, series in prices_by_symbol.items():
+        if series:
+            base = series[0][1]
+            norm[sym] = {d: c / base for d, c in series if base}
+    dates = sorted({d for n in norm.values() for d in n})
+    idx = []
+    for d in dates:
+        vals = [n[d] for n in norm.values() if d in n]
+        if vals:
+            idx.append((d, sum(vals) / len(vals)))
+    return idx
+
+
+def index_value_at(index: list[tuple[str, float]], date: str | None) -> float | None:
+    """Latest index value on or before `date` (baseline if the signal predates the data)."""
+    if not index or not date:
+        return None
+    vals = [v for d, v in index if d <= date]
+    return vals[-1] if vals else index[0][1]
+
+
+def runway_at_signal(index: list[tuple[str, float]], signal_date: str | None) -> float | None:
+    """Fraction of the durable move STILL AHEAD at the signal date (DESIGN_v2.1 Correction A).
+    1.0 = whole move ahead (signal before it); 0.0 = signal at the peak. The right metric for a
+    long-duration holder — a modestly-late signal with most of the move ahead is still actionable."""
+    if not index or not signal_date:
+        return None
+    base = index[0][1]
+    peak = max(v for _, v in index)
+    val = index_value_at(index, signal_date)
+    if peak <= base or val is None:
+        return None
+    return round(max(0.0, (peak - val) / (peak - base)), 2)
+
+
+def resolve_reprice(index: list[tuple[str, float]], *, threshold_pct: float, hold_pct: float,
+                    hold_days: int) -> dict:
+    """Apply the pre-registered rule: first date the index is ≥ +threshold% vs baseline AND still
+    ≥ +hold% `hold_days` later. Returns {confirmed, repricing_date, peak_pct}."""
+    from datetime import datetime, timedelta
+    if not index:
+        return {"confirmed": False, "repricing_date": None, "peak_pct": None}
+    base = index[0][1]
+    thr = 1.0 + threshold_pct / 100.0
+    hold = 1.0 + hold_pct / 100.0
+    by_date = dict(index)
+    dates = [d for d, _ in index]
+    peak = max(v for _, v in index)
+    for d, v in index:
+        if v / base >= thr:
+            target = (datetime.strptime(d, "%Y-%m-%d") + timedelta(days=hold_days)).strftime("%Y-%m-%d")
+            later = [vv for dd, vv in index if dd >= target]
+            if later and (later[0] / base) >= hold:
+                return {"confirmed": True, "repricing_date": d, "peak_pct": round((peak / base - 1) * 100, 1)}
+    return {"confirmed": False, "repricing_date": None, "peak_pct": round((peak / base - 1) * 100, 1)}
@@ -0,0 +1,113 @@
+"""Effective Independent Source Count (EISC) — the system's differentiator (§4.5).
+
+Discount convergence by source connectedness. Five shows that "independently converge" but share one
+guest must count as ~one voice; three shows across macro/energy/ai with no shared guests are gold.
+
+Method (resolved in the design panel): noisy-OR connectedness matrix + inverse-row-sum EISC.
+  - symmetric & order-independent (unlike a sequential pairwise-penalty walk)
+  - each source's contribution is individually explainable ("counts 0.31 because connected to 3 others")
+  - collapses correctly: 5 clones -> ~1.0 ; 5 cross-cluster independents -> ~5.0 (raw)
+  - no eigensolve (unstable at n=2..4, our common case)
+"""
+from __future__ import annotations
+
+from collections import defaultdict
+
+import numpy as np
+
+# Coupling per edge type: a voiceprint-confirmed shared guest is near-total redundancy on a topic.
+KAPPA = {"shared_guest": 0.85, "citation": 0.45, "community": 0.60}
+# Same-cluster baseline correlation (sources in the same world are partly redundant even w/o an edge).
+CLUSTER_COUPLING = {"bitcoin": 0.55, "vc_consensus": 0.35}
+SAME_CLUSTER_DEFAULT = 0.25
+EDGE_CLAMP = 0.95          # cap kappa*weight so a heavily-weighted edge can't exceed near-total
+CAP_VALUE = 0.25          # §4.5: bitcoin / capped sources contribute at most 0.25 of a voice
+CLUSTER_MIN_CONTRIB = 0.5  # a cluster must add >= half an independent voice to count toward K_eff
+
+
+def effective_independent_N(srcs: list[tuple], edges: list[tuple], *, mode: str = "live") -> dict:
+    """srcs: [(source_id, source_cluster, cluster_capped_low[, own_network])]; edges: [(a,b,type,weight)].
+    mode='live' (default) DROPS own_network sources (Ten31's own orbit — listening to ourselves, §v2.1);
+    mode='test' keeps them (the reflexivity test fixture). Returns {eisc_adj, eisc_raw, k_eff, ...}."""
+    if mode == "live":
+        srcs = [s for s in srcs if not (len(s) > 3 and s[3])]
+    ids = [s[0] for s in srcs]
+    n = len(ids)
+    if n == 0:
+        return {"eisc_adj": 0.0, "eisc_raw": 0.0, "k_eff": 0, "xcluster_mult": 1.0, "per_source_contrib": {}}
+    idx = {sid: i for i, sid in enumerate(ids)}
+    cluster = {s[0]: s[1] for s in srcs}
+    capped = {s[0]: (bool(s[2]) or s[1] == "bitcoin") for s in srcs}
+
+    # edge channel: combine all edges between a pair by noisy-OR product of (1 - kappa*weight)
+    pair_factor: dict = defaultdict(lambda: 1.0)
+    for a, b, etype, w in edges:
+        if a in idx and b in idx and a != b:
+            term = min(EDGE_CLAMP, KAPPA.get(etype, 0.0) * (w if w is not None else 1.0))
+            pair_factor[frozenset((a, b))] *= (1.0 - term)
+
+    C = np.eye(n)
+    for i in range(n):
+        for j in range(i + 1, n):
+            a, b = ids[i], ids[j]
+            e = 1.0 - pair_factor[frozenset((a, b))]            # 0 if no edge
+            ci, cj = cluster[a], cluster[b]
+            clust = (CLUSTER_COUPLING.get(ci, SAME_CLUSTER_DEFAULT)
+                     if (ci is not None and ci == cj) else 0.0)
+            c = 1.0 - (1.0 - e) * (1.0 - clust)
+            C[i, j] = C[j, i] = c
+
+    rowsum = C.sum(axis=1)                                      # includes the diagonal 1.0
+    contrib, eisc_raw = {}, 0.0
+    cluster_mass: dict = defaultdict(float)
+    for i, sid in enumerate(ids):
+        cap = CAP_VALUE if capped[sid] else 1.0
+        contrib[sid] = cap * (1.0 / rowsum[i])
+        eisc_raw += contrib[sid]
+        if not capped[sid] and cluster[sid]:
+            cluster_mass[cluster[sid]] += contrib[sid]
+
+    # cross-cluster bonus: count NON-capped clusters that genuinely contribute an independent voice
+    # (summed contribution >= half a voice). This stops "one guest across many clusters" from earning
+    # the gold multiplier — the raw EISC already collapses that guest to ~1, and k_eff must agree.
+    k_eff = sum(1 for m in cluster_mass.values() if m >= CLUSTER_MIN_CONTRIB)
+    xmult = max(1.0, 1.0 + 0.5 * (k_eff - 1))                   # 1clu->1.0, 2->1.5, 3->2.0 (gold)
+    return {
+        "eisc_adj": xmult * eisc_raw,
+        "eisc_raw": eisc_raw,
+        "k_eff": k_eff,
+        "xcluster_mult": xmult,
+        "per_source_contrib": {k: round(v, 4) for k, v in contrib.items()},
+    }
+
+
+# --- DB helpers (the brain only READS the graph; edges are produced upstream by the voiceprint lib) ---
+def load_source_meta(conn, ids: list[str]) -> list[tuple]:
+    ids = list(dict.fromkeys(ids))
+    if not ids:
+        return []
+    ph = ",".join("?" * len(ids))
+    rows = conn.execute(
+        f"SELECT source_id, source_cluster, cluster_capped_low, COALESCE(own_network,0) "
+        f"FROM sources WHERE source_id IN ({ph})", ids
+    ).fetchall()
+    return [(r[0], r[1], r[2], r[3]) for r in rows]
+
+
+def load_edges(conn, ids: list[str]) -> list[tuple]:
+    ids = list(dict.fromkeys(ids))
+    if not ids:
+        return []
+    ph = ",".join("?" * len(ids))
+    rows = conn.execute(
+        f"SELECT src_a, src_b, edge_type, weight FROM source_edges WHERE src_a IN ({ph}) AND src_b IN ({ph})",
+        ids + ids,
+    ).fetchall()
+    return [(r[0], r[1], r[2], r[3]) for r in rows]
+
+
+def eisc_for(conn, source_ids: list[str], *, mode: str = "live") -> dict:
+    """Convenience: EISC for a set of source_ids, loading cluster/cap/own_network + edges from SQLite.
+    mode='live' drops own_network sources; mode='test' keeps them (§v2.1 condition 1)."""
+    ids = list(dict.fromkeys(source_ids))
+    return effective_independent_N(load_source_meta(conn, ids), load_edges(conn, ids), mode=mode)
@@ -0,0 +1,49 @@
+"""Ledger + candidate_scores writers. Log EVERY bar-clearer from day one (§6.6 denominator).
+
+date_logged = as_of (backtest rows carry historical dates so lead-time math is correct). The
+discourse_metric JSON is FROZEN here at log time — the resolver (separate forward pass) never edits it.
+Grant's rating lives in human_evaluations; the model never reads it pre-log (§6.7).
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+
+
+def _sig_id(scorer: str, key: str, as_of: str) -> str:
+    return "sig_" + hashlib.sha1(f"{scorer}|{key}|{as_of}".encode()).hexdigest()[:16]
+
+
+def _score_id(scorer: str, key: str, as_of: str) -> str:
+    return hashlib.sha1(f"cs|{scorer}|{key}|{as_of}".encode()).hexdigest()
+
+
+def record_candidate_score(conn, result: dict, as_of: str, evidence: bool, promotion: bool) -> None:
+    key = result.get("node_id") or result.get("conviction_id") or result.get("topic_canonical") or ""
+    conn.execute(
+        """INSERT OR REPLACE INTO candidate_scores
+             (score_id, scorer, as_of, topic_canonical, node_id, conviction_id, score,
+              cleared_evidence_bar, cleared_promotion_bar, inputs_json)
+           VALUES (?,?,?,?,?,?,?,?,?,?)""",
+        (_score_id(result["scorer"], key, as_of), result["scorer"], as_of,
+         result.get("topic_canonical"), result.get("node_id"), result.get("conviction_id"),
+         result["score"], int(evidence), int(promotion), json.dumps(result["inputs"])[:8000]),
+    )
+    conn.commit()
+
+
+def log_candidate(conn, *, scorer: str, as_of: str, ledger_type: str, proposition: str,
+                  discourse_metric: dict, origin_conviction_id=None, origin_node_id=None) -> str:
+    key = origin_node_id or origin_conviction_id or proposition
+    signal_id = _sig_id(scorer, key, as_of)
+    dm = {**discourse_metric, "scorer": scorer}
+    conn.execute(
+        """INSERT OR IGNORE INTO ledger
+             (signal_id, type, proposition, date_logged, discourse_metric, model_confidence,
+              origin_conviction_id, origin_node_id)
+           VALUES (?,?,?,?,?,?,?,?)""",
+        (signal_id, ledger_type, proposition[:1000], as_of, json.dumps(dm)[:8000], None,
+         origin_conviction_id, origin_node_id),
+    )
+    conn.commit()
+    return signal_id
@@ -0,0 +1,80 @@
+"""Local-LLM scoring helpers (§4.4). Bounded labeling passes over PRE-FILTERED candidates only —
+never nomination from the raw corpus (§5.1). JSON mode, temp 0, no thinking → deterministic.
+
+Helper #2 (derivative-relevance) is built first — it's the one the §7.1 backtest needs. Helper #1
+(stance-folding for Job A contrarian) comes with the forward pilot.
+"""
+from __future__ import annotations
+
+import json
+import logging
+
+log = logging.getLogger(__name__)
+
+_REL_SYS = (
+    "You assess whether claims corroborate a specific investment hypothesis (a 2nd/3rd-order "
+    "derivative of a thesis). For EACH claim decide: does it provide real-world evidence that the "
+    "hypothesis is PLAYING OUT (corroborates), and the direction. 'affirms' = supports the hypothesis; "
+    "'contradicts' = is evidence against it; 'tangential' = same topic words but not actually about the "
+    "hypothesis (e.g. 'transformers' the ML architecture vs the electrical-grid kind). Be strict: a "
+    "passing mention is tangential, not corroboration. "
+    "TWO HARD RULES (these are the difference between catching a real signal and being fooled):\n"
+    "1) REALIZED-ONLY. The hypothesis must be PLAYING OUT in fact. Announcements, plans, intentions, "
+    "forecasts, targets, and 'may/will/expects/poised-to/aims-to/up-to' language are NOT corroboration — "
+    "they are 'tangential' unless the claim states the thing has ACTUALLY HAPPENED / been DEPLOYED / "
+    "closed. A $2B program 'announced' or capital 'made available' is NOT capital deployed. A company "
+    "that 'may consider' or 'expects' something has not done it.\n"
+    "2) ROLE-MATCH. The actor in the claim must occupy the role the hypothesis is about. If the "
+    "hypothesis is that capital PROVIDERS are funding/supplying something, then a BORROWER or USER on the "
+    "demand side (e.g. a firm posting an asset AS collateral to RECEIVE a loan) is the wrong side of the "
+    "transaction → 'tangential' to that hypothesis, not 'affirms'. "
+    'Return ONLY JSON: {"results":[{"claim_id":"...","corroborates":true|false,'
+    '"direction":"affirms"|"contradicts"|"tangential"}]}.'
+)
+
+
+def _parse(raw: str) -> list[dict]:
+    try:
+        obj = json.loads(raw)
+    except Exception:
+        i, j = raw.find("{"), raw.rfind("}")
+        if i < 0 or j < 0:
+            return []
+        try:
+            obj = json.loads(raw[i:j + 1])
+        except Exception:
+            return []
+    res = obj.get("results", []) if isinstance(obj, dict) else []
+    return [r for r in res if isinstance(r, dict) and r.get("claim_id")]
+
+
+def derivative_relevance(backend, derivative: str, claims: list[dict]) -> dict[str, dict]:
+    """claims: [{claim_id, proposition}]. Returns {claim_id: {corroborates, direction}}.
+    Filters retrieval near-misses; it cannot ADD claims search didn't return (not a nominator)."""
+    if not claims:
+        return {}
+    listing = "\n".join(f"- [{c['claim_id']}] {c['proposition']}" for c in claims)
+    user = (f"HYPOTHESIS (derivative): {derivative}\n\nCLAIMS:\n{listing}\n\n"
+            f"Judge each claim id.")
+    messages = [{"role": "system", "content": _REL_SYS}, {"role": "user", "content": user}]
+    # Output is ~one JSON record per claim (claim_id + corroborates + direction ≈ 70-100 tokens). At
+    # top_k=60 that's ~5k tokens — a fixed 3000 budget truncated mid-array → empty parse → a node
+    # silently zeroed (the source of the unstable 5-affirm/0-affirm flip). Size the budget to the batch.
+    budget = max(3000, 120 * len(claims) + 500)
+    parsed = []
+    for attempt in range(2):  # one retry — a gateway-under-load truncation shouldn't zero out a node
+        raw = backend.complete_json(messages, max_tokens=budget)
+        parsed = _parse(raw)
+        if parsed:
+            break
+        log.warning("derivative_relevance empty parse (attempt %d) for %r; raw[:160]=%r",
+                    attempt + 1, derivative[:50], raw[:160])
+    # The listing presents ids as `- [{claim_id}] ...`; the model INCONSISTENTLY echoes the id back with
+    # the surrounding brackets ("[edgar:...]") — which then misses the bracket-less lookup key and the
+    # whole node reads as 0/(missing). Normalize the brackets+whitespace so matching is robust either way.
+    out = {}
+    for r in parsed:
+        cid = str(r["claim_id"]).strip().strip("[]").strip()
+        out[cid] = {"corroborates": bool(r.get("corroborates")),
+                    "direction": r.get("direction", "tangential")}
+    return out
@@ -0,0 +1,27 @@
+"""Resolver — the SEPARATE forward pass that closes the loop (§6.2, §6.3).
+
+ARCHITECTURALLY ISOLATED from the scorers: it has no shared write path with them. Scorers write
+candidate_scores + ledger rows with outcome columns NULL and a FROZEN discourse_metric. The resolver
+runs later (larger as_of), reads ledger rows whose date_logged < as_of_now, and writes ONLY
+resolution_date / discourse_outcome / external_outcome / lead_time_days. It is FORBIDDEN from touching
+discourse_metric — that is the structural reason the ledger can't reward noticing what already happened.
+
+Implementation note: real resolutions need forward time (the clock can't be backfilled). For the
+backtest, the discourse leg can be resolved by re-running the discourse metric forward from date_logged;
+the external leg (price/filings/human check, §6.5) is filled as that evidence arrives. Stubbed now to
+lock the architecture; filled out for the forward pilot.
+"""
+from __future__ import annotations
+
+
+def resolve_discourse_leg(conn, sc, cfg, *, as_of_now: str) -> int:
+    """For each ledger row logged before as_of_now without a resolution, re-measure discourse forward
+    and set discourse_outcome + lead_time. (Forward-only; never reads/edits discourse_metric.)
+    Returns count resolved. STUB — implemented for the forward pilot."""
+    rows = conn.execute(
+        "SELECT signal_id, date_logged FROM ledger WHERE resolution_date IS NULL AND date_logged < ?",
+        (as_of_now,),
+    ).fetchall()
+    # TODO(forward-pilot): re-run windowed independence from date_logged→as_of_now for each row's
+    # origin derivative; set discourse_outcome in {up_cross_cluster,up_single_cluster,flat,down}.
+    return 0
@@ -0,0 +1,81 @@
+"""Scoring orchestrator. For Job B / the §7.1 backtest: march as_of dates, score every conviction +
+fan-out derivative, gate, log the denominator, promote nodes.
+"""
+from __future__ import annotations
+
+import logging
+
+from ..extract.backends import from_config as backend_from_config
+from . import bar, under_acted
+from .asof import Scorer
+from .ledger_writer import log_candidate, record_candidate_score
+
+log = logging.getLogger(__name__)
+
+
+def _nodes_for(conn, as_of, mode, conviction_ids):
+    nodes = []
+    where, params = "", []
+    if conviction_ids:
+        ph = ",".join("?" * len(conviction_ids))
+        where = f" WHERE conviction_id IN ({ph})"
+        params = list(conviction_ids)
+    for c in conn.execute(
+        f"SELECT conviction_id, thematic_proposition, conviction_level, current_exposure, is_thesis_breaker "
+        f"FROM conviction_log{where}", params,
+    ):
+        nodes.append({"conviction_id": c[0], "node_id": None, "derivative": c[1],
+                      "level": c[2], "exposure": c[3], "breaker": bool(c[4])})
+    fq = ("SELECT f.node_id, f.parent_conviction_id, f.derivative_proposition, c.conviction_level, "
+          "c.current_exposure, c.is_thesis_breaker FROM fanout_nodes f "
+          "JOIN conviction_log c ON c.conviction_id = f.parent_conviction_id")
+    conds, fparams = [], []
+    if conviction_ids:
+        conds.append(f"f.parent_conviction_id IN ({','.join('?' * len(conviction_ids))})")
+        fparams += list(conviction_ids)
+    if mode == "forward":   # backtest uses the seeded tree as the as-of-2023 hypothesis (no created_at leak)
+        conds.append("f.created_at <= ?")
+        fparams.append(as_of)
+    if conds:
+        fq += " WHERE " + " AND ".join(conds)
+    for f in conn.execute(fq, fparams):
+        nodes.append({"conviction_id": f[1], "node_id": f[0], "derivative": f[2],
+                      "level": f[3], "exposure": f[4], "breaker": bool(f[5])})
+    return nodes
+
+
+def run_under_acted(conn, sc, cfg, *, as_of, mode="backtest", conviction_ids=None, window_days=28) -> list[dict]:
+    backend = backend_from_config(cfg, sc)
+    out = []
+    with Scorer(conn, as_of, mode=mode):
+        for nd in _nodes_for(conn, as_of, mode, conviction_ids):
+            r = under_acted.score_node(
+                conn, sc, backend, as_of=as_of, derivative=nd["derivative"],
+                conviction_id=nd["conviction_id"], node_id=nd["node_id"],
+                conviction_level=nd["level"], exposure=nd["exposure"], is_breaker=nd["breaker"],
+                window_days=window_days,
+            )
+            ev, pr = bar.evaluate("under_acted", r, conn=conn)
+            record_candidate_score(conn, r, as_of, ev, pr)
+            if ev:
+                log_candidate(conn, scorer="under_acted", as_of=as_of,
+                              ledger_type="under_acted_conviction", proposition=nd["derivative"],
+                              discourse_metric=r["inputs"], origin_conviction_id=nd["conviction_id"],
+                              origin_node_id=nd["node_id"])
+                if nd["node_id"]:
+                    conn.execute("UPDATE fanout_nodes SET status=? WHERE node_id=?",
+                                 ("signal" if pr else "corroborated", nd["node_id"]))
+                    conn.commit()
+            out.append({"node": nd, "result": r, "evidence": ev, "promotion": pr})
+    return out
+
+
+def run_backtest(conn, sc, cfg, *, conviction_id, dates, window_days=90) -> list[tuple]:
+    timeline = []
+    for as_of in dates:
+        res = run_under_acted(conn, sc, cfg, as_of=as_of, mode="backtest",
+                              conviction_ids=[conviction_id], window_days=window_days)
+        timeline.append((as_of, res))
+        fired = [r for r in res if r["evidence"]]
+        log.info("as_of %s: %d/%d nodes cleared evidence bar", as_of, len(fired), len(res))
+    return timeline
@@ -0,0 +1,105 @@
+"""Two-sided net-corroboration (DESIGN_v2.1 H5 + condition 3) — the instrument for the adversarial cases.
+
+For a derivative, track the INDEPENDENCE-WEIGHTED affirms MINUS denies over time. This is the right
+output for Strike/Battery (where the question is "did the engine distinguish real adoption from
+narrative, and catch the contradiction?"), not runway:
+  - STRIKE (reflexivity): a PASS = net stays low/quiet in LIVE mode (own_network dropped) while it
+    would have fired in TEST mode (own_network kept) → the engine refuses the intra-cluster echo.
+  - BATTERY (timing): the DEMAND derivative's net rises while the SUPPLY derivative's net stays flat →
+    "half-confirmed, the load-bearing half isn't moving" = the eroding-conviction signal.
+Reuses the §4.6 relevance helper, which already returns direction affirms|contradicts|tangential.
+"""
+from __future__ import annotations
+
+from .independence import eisc_for
+from .llm_helpers import derivative_relevance
+from .windows import window_bounds
+
+
+def classify_corpus(sc, backend, derivative: str, as_of: str, *, top_k: int = 60) -> list[dict]:
+    """Retrieve (as-of filtered) + LLM-classify each claim's direction toward the derivative.
+    Returns affirms/contradicts claims with source_id + date (tangential dropped)."""
+    res = sc.search(derivative, collection="propositions", top_k=top_k, rerank=True)
+    hits = res.get("data", []) if isinstance(res, dict) else []
+    cand = []
+    for h in hits:
+        pl = (h.get("payload") or {})
+        d = pl.get("date")
+        if not pl.get("claim_id") or not d or d[:10] > as_of:
+            continue
+        cand.append({"claim_id": pl["claim_id"], "proposition": pl.get("proposition", ""),
+                     "date": d[:10], "source_id": pl.get("source_id")})
+    if not cand:
+        return []
+    rel = derivative_relevance(backend, derivative,
+                               [{"claim_id": c["claim_id"], "proposition": c["proposition"]} for c in cand])
+    out = []
+    for c in cand:
+        direction = rel.get(c["claim_id"], {}).get("direction", "tangential")
+        if direction in ("affirms", "contradicts"):
+            out.append({**c, "direction": direction})
+    return out
+
+
+# DESIGN_v2 ADOPT #1 (claim-type weighting): a node "resolves" on REALIZED, descriptive disclosure —
+# not on forecasts/intent. A source counts toward the net only if it carries a HARD (realized-fact)
+# claim on this side; predictive/interpretive claims (forecasts, opinion, 'may consider', 'expects')
+# are the exact material that fooled the supply axis on Battery, so they don't qualify a source alone.
+_HARD_CLAIM_TYPES = ("descriptive", "reactive")
+
+
+def _hard_sources(conn, claim_ids: list[str]) -> set:
+    """Sources that contributed at least one realized-fact (descriptive/reactive) claim among claim_ids."""
+    if not claim_ids:
+        return set()
+    ph = ",".join("?" * len(claim_ids))
+    qph = ",".join("?" * len(_HARD_CLAIM_TYPES))
+    rows = conn.execute(
+        f"SELECT DISTINCT source_id FROM claims WHERE claim_id IN ({ph}) AND claim_type IN ({qph})",
+        list(claim_ids) + list(_HARD_CLAIM_TYPES),
+    ).fetchall()
+    return {r[0] for r in rows}
+
+
+def net_at(conn, classified: list[dict], as_of: str, *, window_days: int = 90, mode: str = "live",
+           require_hard_evidence: bool = True) -> dict:
+    """Net independence-weighted corroboration in the trailing window ending at as_of. With
+    require_hard_evidence (default), a source only counts on a side if it carries a realized-fact claim
+    there — forecasts/intent alone don't qualify it (the announced-vs-deployed / opinion-vs-fact guard)."""
+    _, start, end = window_bounds(as_of, n=1, days=window_days)[0]
+    win = [c for c in classified if start < c["date"] <= end]
+    aff = [c for c in win if c["direction"] == "affirms"]
+    den = [c for c in win if c["direction"] == "contradicts"]
+    aff_src_all = {c["source_id"] for c in aff}
+    den_src_all = {c["source_id"] for c in den}
+    if require_hard_evidence:
+        hard_aff = _hard_sources(conn, [c["claim_id"] for c in aff])
+        hard_den = _hard_sources(conn, [c["claim_id"] for c in den])
+        aff_src = list(aff_src_all & hard_aff)
+        den_src = list(den_src_all & hard_den)
+    else:
+        aff_src, den_src = list(aff_src_all), list(den_src_all)
+    aff_e = eisc_for(conn, aff_src, mode=mode)["eisc_adj"] if aff_src else 0.0
+    den_e = eisc_for(conn, den_src, mode=mode)["eisc_adj"] if den_src else 0.0
+    own = 0
+    if aff_src:
+        ph = ",".join("?" * len(aff_src))
+        own = conn.execute(
+            f"SELECT COUNT(*) FROM sources WHERE source_id IN ({ph}) AND COALESCE(own_network,0)=1", aff_src
+        ).fetchone()[0]
+    return {"as_of": as_of, "affirms_eisc": round(aff_e, 2), "denies_eisc": round(den_e, 2),
+            "net": round(aff_e - den_e, 2),
+            "n_affirm": len(aff), "n_deny": len(den),
+            "hard_affirm_src": len(aff_src), "soft_affirm_src_dropped": len(aff_src_all) - len(aff_src),
+            "own_network_affirm_src": own}
+
+
+def trajectory(conn, sc, backend, derivative: str, as_of_dates: list[str], *,
+               window_days: int = 90, mode: str = "live", top_k: int = 60) -> list[dict]:
+    """The net-corroboration curve over as_of_dates. Run twice (mode='live' vs 'test') to see what the
+    own_network quarantine removes — the reflexivity measurement."""
+    out = []
+    for as_of in as_of_dates:
+        classified = classify_corpus(sc, backend, derivative, as_of, top_k=top_k)
+        out.append(net_at(conn, classified, as_of, window_days=window_days, mode=mode))
+    return out
@@ -0,0 +1,75 @@
+"""Under-acted-conviction scorer — Job B, the §7.1 backtest target.
+
+score = conviction_weight x exposure_gap x rising_independent_corroboration
+
+Fires when Ten31 believes something (high conviction), has little/no position (exposure gap), and the
+world is beginning to corroborate it or a derivative of it — independently and with acceleration. This
+is the signal that should have flagged "size up power-infra picks-and-shovels" in 2023.
+
+Exposure is joined LOCALLY (never crosses the frontier boundary, §4.6). Corroboration is RETRIEVED
+(stats nominate), then an LLM helper only FILTERS retrieval near-misses (§5.1) — it cannot add claims.
+"""
+from __future__ import annotations
+
+from .llm_helpers import derivative_relevance
+from .windows import windowed_independence
+
+CONVICTION_WEIGHT = {"low": 0.15, "med": 0.4, "med-high": 0.7, "high": 1.0}
+EXPOSURE_GAP = {"none": 1.0, "lt2": 0.8, "2to10": 0.4, "gt10": 0.1, "unset": 0.6}
+
+
+def score_node(conn, sc, backend, *, as_of: str, derivative: str, conviction_id: str,
+               node_id: str | None, conviction_level: str, exposure: str,
+               is_breaker: bool = False, top_k: int = 40, window_days: int = 28) -> dict:
+    cw = CONVICTION_WEIGHT.get(conviction_level, 0.4)
+    eg = EXPOSURE_GAP.get(exposure, 0.6)
+
+    # 1. RETRIEVE (stats nominate): hybrid search over embedded propositions; as-of post-filter.
+    try:
+        res = sc.search(derivative, collection="propositions", top_k=top_k, rerank=True)
+    except Exception as e:  # noqa: BLE001
+        return _result(conviction_id, node_id, 0.0, {"reason": f"search_failed:{str(e)[:60]}"},
+                       cw, eg, exposure, is_breaker)
+    hits = res.get("data", []) if isinstance(res, dict) else []
+    cand = []
+    for h in hits:
+        pl = (h.get("payload") or {}) if isinstance(h, dict) else {}
+        d = pl.get("date")
+        if not pl.get("claim_id") or not d or d[:10] > as_of:   # Qdrant can't date-filter; do it here
+            continue
+        cand.append({"claim_id": pl["claim_id"], "proposition": pl.get("proposition", ""),
+                     "date": d, "source_id": pl.get("source_id")})
+    if not cand:
+        return _result(conviction_id, node_id, 0.0, {"reason": "no_retrieval", "n_retrieved": 0},
+                       cw, eg, exposure, is_breaker)
+
+    # 2. FILTER near-misses with the LLM (affirms-only). Not a nominator — can't add claims.
+    rel = derivative_relevance(backend, derivative,
+                               [{"claim_id": c["claim_id"], "proposition": c["proposition"]} for c in cand])
+    confirmed = [c for c in cand
+                 if rel.get(c["claim_id"], {}).get("corroborates")
+                 and rel[c["claim_id"]].get("direction") == "affirms"]
+    n_src = len({c["source_id"] for c in confirmed})
+
+    # 3. CORROBORATION = independence-weighted acceleration over the confirmed set (treat as a topic).
+    #    window_days matches corpus cadence: ~90d for quarterly filings/earnings, ~28d for weekly podcasts.
+    wi = windowed_independence(conn, [(c["date"], c["source_id"]) for c in confirmed], as_of, days=window_days)
+    a_corrob = wi["acceleration"]
+    eisc_corrob = wi["eisc0"]
+    corroboration = max(0.0, a_corrob) * eisc_corrob
+
+    score = corroboration if is_breaker else cw * eg * corroboration
+    inputs = {
+        "as_of": as_of, "derivative": derivative, "n_retrieved": len(cand), "n_confirmed": len(confirmed),
+        "n_src": n_src, "a_corrob": a_corrob, "eisc_corrob": eisc_corrob, "k_eff0": wi["k_eff0"],
+        "window_counts": wi["counts"], "window_eisc": wi["eisc"], "corroboration": round(corroboration, 3),
+        "confirmed_claim_ids": [c["claim_id"] for c in confirmed][:50],
+    }
+    return _result(conviction_id, node_id, score, inputs, cw, eg, exposure, is_breaker)
+
+
+def _result(conviction_id, node_id, score, inputs, cw, eg, exposure, is_breaker) -> dict:
+    inputs = {**inputs, "conviction_weight": cw, "exposure_gap": eg, "exposure": exposure,
+              "is_breaker": is_breaker}
+    return {"scorer": "under_acted", "conviction_id": conviction_id, "node_id": node_id,
+            "score": round(float(score), 4), "inputs": inputs}
@@ -0,0 +1,53 @@
+"""Temporal windows + windowed independence (the single temporal layer, §4.4).
+
+28-day non-overlapping windows anchored at as_of (W0 ends at as_of, then back). Non-overlapping
+avoids autocorrelation faking significance. The signal is the discrete 2nd derivative of the
+INDEPENDENCE-WEIGHTED flow (EISC per window), never the raw count — so a topic that "accelerates"
+only because one show booked the same guest three times has flat N(W).
+"""
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+
+from .independence import eisc_for
+
+WINDOW_DAYS = 28
+N_WINDOWS = 3
+
+
+def _d(s: str) -> datetime:
+    return datetime.strptime(s[:10], "%Y-%m-%d")
+
+
+def window_bounds(as_of: str, *, n: int = N_WINDOWS, days: int = WINDOW_DAYS) -> list[tuple[int, str, str]]:
+    """Returns [(idx, start_iso, end_iso)] with W0 ending at as_of, extending backward only."""
+    end = _d(as_of)
+    out = []
+    for idx in range(n):
+        w_end = end - timedelta(days=idx * days)
+        w_start = end - timedelta(days=(idx + 1) * days)
+        out.append((idx, w_start.strftime("%Y-%m-%d"), w_end.strftime("%Y-%m-%d")))
+    return out
+
+
+def windowed_independence(conn, rows: list[tuple], as_of: str, *, n: int = N_WINDOWS,
+                          days: int = WINDOW_DAYS) -> dict:
+    """rows: [(date_iso, source_id)]. For each window compute raw count + EISC_adj of its sources.
+    Returns {counts:[c0..], eisc:[N0..], k_eff:[...], acceleration, eisc0, sources0}.
+    acceleration = N0 - 2*N1 + N2 (independence-weighted 2nd derivative)."""
+    bounds = window_bounds(as_of, n=n, days=days)
+    counts, eiscs, keffs, src_sets = [], [], [], []
+    for _idx, start, end in bounds:
+        win = [r for r in rows if r[0] and start < r[0][:10] <= end]
+        srcs = list({r[1] for r in win})
+        e = eisc_for(conn, srcs) if srcs else {"eisc_adj": 0.0, "k_eff": 0}
+        counts.append(len(win))
+        eiscs.append(e["eisc_adj"])
+        keffs.append(e["k_eff"])
+        src_sets.append(srcs)
+    accel = eiscs[0] - 2 * eiscs[1] + eiscs[2] if n >= 3 else 0.0
+    return {
+        "counts": counts, "eisc": [round(x, 3) for x in eiscs], "k_eff": keffs,
+        "acceleration": round(accel, 3), "eisc0": round(eiscs[0], 3), "k_eff0": keffs[0],
+        "sources0": src_sets[0], "n_total": sum(counts),
+    }