Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)

2026-06-15 09:24:29 -05:00
commit a6aec77506
77 changed files with 6263 additions and 0 deletions
@@ -0,0 +1,113 @@
+"""Effective Independent Source Count (EISC) — the system's differentiator (§4.5).
+
+Discount convergence by source connectedness. Five shows that "independently converge" but share one
+guest must count as ~one voice; three shows across macro/energy/ai with no shared guests are gold.
+
+Method (resolved in the design panel): noisy-OR connectedness matrix + inverse-row-sum EISC.
+  - symmetric & order-independent (unlike a sequential pairwise-penalty walk)
+  - each source's contribution is individually explainable ("counts 0.31 because connected to 3 others")
+  - collapses correctly: 5 clones -> ~1.0 ; 5 cross-cluster independents -> ~5.0 (raw)
+  - no eigensolve (unstable at n=2..4, our common case)
+"""
+from __future__ import annotations
+
+from collections import defaultdict
+
+import numpy as np
+
+# Coupling per edge type: a voiceprint-confirmed shared guest is near-total redundancy on a topic.
+KAPPA = {"shared_guest": 0.85, "citation": 0.45, "community": 0.60}
+# Same-cluster baseline correlation (sources in the same world are partly redundant even w/o an edge).
+CLUSTER_COUPLING = {"bitcoin": 0.55, "vc_consensus": 0.35}
+SAME_CLUSTER_DEFAULT = 0.25
+EDGE_CLAMP = 0.95          # cap kappa*weight so a heavily-weighted edge can't exceed near-total
+CAP_VALUE = 0.25          # §4.5: bitcoin / capped sources contribute at most 0.25 of a voice
+CLUSTER_MIN_CONTRIB = 0.5  # a cluster must add >= half an independent voice to count toward K_eff
+
+
+def effective_independent_N(srcs: list[tuple], edges: list[tuple], *, mode: str = "live") -> dict:
+    """srcs: [(source_id, source_cluster, cluster_capped_low[, own_network])]; edges: [(a,b,type,weight)].
+    mode='live' (default) DROPS own_network sources (Ten31's own orbit — listening to ourselves, §v2.1);
+    mode='test' keeps them (the reflexivity test fixture). Returns {eisc_adj, eisc_raw, k_eff, ...}."""
+    if mode == "live":
+        srcs = [s for s in srcs if not (len(s) > 3 and s[3])]
+    ids = [s[0] for s in srcs]
+    n = len(ids)
+    if n == 0:
+        return {"eisc_adj": 0.0, "eisc_raw": 0.0, "k_eff": 0, "xcluster_mult": 1.0, "per_source_contrib": {}}
+    idx = {sid: i for i, sid in enumerate(ids)}
+    cluster = {s[0]: s[1] for s in srcs}
+    capped = {s[0]: (bool(s[2]) or s[1] == "bitcoin") for s in srcs}
+
+    # edge channel: combine all edges between a pair by noisy-OR product of (1 - kappa*weight)
+    pair_factor: dict = defaultdict(lambda: 1.0)
+    for a, b, etype, w in edges:
+        if a in idx and b in idx and a != b:
+            term = min(EDGE_CLAMP, KAPPA.get(etype, 0.0) * (w if w is not None else 1.0))
+            pair_factor[frozenset((a, b))] *= (1.0 - term)
+
+    C = np.eye(n)
+    for i in range(n):
+        for j in range(i + 1, n):
+            a, b = ids[i], ids[j]
+            e = 1.0 - pair_factor[frozenset((a, b))]            # 0 if no edge
+            ci, cj = cluster[a], cluster[b]
+            clust = (CLUSTER_COUPLING.get(ci, SAME_CLUSTER_DEFAULT)
+                     if (ci is not None and ci == cj) else 0.0)
+            c = 1.0 - (1.0 - e) * (1.0 - clust)
+            C[i, j] = C[j, i] = c
+
+    rowsum = C.sum(axis=1)                                      # includes the diagonal 1.0
+    contrib, eisc_raw = {}, 0.0
+    cluster_mass: dict = defaultdict(float)
+    for i, sid in enumerate(ids):
+        cap = CAP_VALUE if capped[sid] else 1.0
+        contrib[sid] = cap * (1.0 / rowsum[i])
+        eisc_raw += contrib[sid]
+        if not capped[sid] and cluster[sid]:
+            cluster_mass[cluster[sid]] += contrib[sid]
+
+    # cross-cluster bonus: count NON-capped clusters that genuinely contribute an independent voice
+    # (summed contribution >= half a voice). This stops "one guest across many clusters" from earning
+    # the gold multiplier — the raw EISC already collapses that guest to ~1, and k_eff must agree.
+    k_eff = sum(1 for m in cluster_mass.values() if m >= CLUSTER_MIN_CONTRIB)
+    xmult = max(1.0, 1.0 + 0.5 * (k_eff - 1))                   # 1clu->1.0, 2->1.5, 3->2.0 (gold)
+    return {
+        "eisc_adj": xmult * eisc_raw,
+        "eisc_raw": eisc_raw,
+        "k_eff": k_eff,
+        "xcluster_mult": xmult,
+        "per_source_contrib": {k: round(v, 4) for k, v in contrib.items()},
+    }
+
+
+# --- DB helpers (the brain only READS the graph; edges are produced upstream by the voiceprint lib) ---
+def load_source_meta(conn, ids: list[str]) -> list[tuple]:
+    ids = list(dict.fromkeys(ids))
+    if not ids:
+        return []
+    ph = ",".join("?" * len(ids))
+    rows = conn.execute(
+        f"SELECT source_id, source_cluster, cluster_capped_low, COALESCE(own_network,0) "
+        f"FROM sources WHERE source_id IN ({ph})", ids
+    ).fetchall()
+    return [(r[0], r[1], r[2], r[3]) for r in rows]
+
+
+def load_edges(conn, ids: list[str]) -> list[tuple]:
+    ids = list(dict.fromkeys(ids))
+    if not ids:
+        return []
+    ph = ",".join("?" * len(ids))
+    rows = conn.execute(
+        f"SELECT src_a, src_b, edge_type, weight FROM source_edges WHERE src_a IN ({ph}) AND src_b IN ({ph})",
+        ids + ids,
+    ).fetchall()
+    return [(r[0], r[1], r[2], r[3]) for r in rows]
+
+
+def eisc_for(conn, source_ids: list[str], *, mode: str = "live") -> dict:
+    """Convenience: EISC for a set of source_ids, loading cluster/cap/own_network + edges from SQLite.
+    mode='live' drops own_network sources; mode='test' keeps them (§v2.1 condition 1)."""
+    ids = list(dict.fromkeys(source_ids))
+    return effective_independent_N(load_source_meta(conn, ids), load_edges(conn, ids), mode=mode)