Files
ten31-signal-engine/signal_engine/signals/independence.py
T

114 lines
5.4 KiB
Python

"""Effective Independent Source Count (EISC) — the system's differentiator (§4.5).
Discount convergence by source connectedness. Five shows that "independently converge" but share one
guest must count as ~one voice; three shows across macro/energy/ai with no shared guests are gold.
Method (resolved in the design panel): noisy-OR connectedness matrix + inverse-row-sum EISC.
- symmetric & order-independent (unlike a sequential pairwise-penalty walk)
- each source's contribution is individually explainable ("counts 0.31 because connected to 3 others")
- collapses correctly: 5 clones -> ~1.0 ; 5 cross-cluster independents -> ~5.0 (raw)
- no eigensolve (unstable at n=2..4, our common case)
"""
from __future__ import annotations
from collections import defaultdict
import numpy as np
# Coupling per edge type: a voiceprint-confirmed shared guest is near-total redundancy on a topic.
KAPPA = {"shared_guest": 0.85, "citation": 0.45, "community": 0.60}
# Same-cluster baseline correlation (sources in the same world are partly redundant even w/o an edge).
CLUSTER_COUPLING = {"bitcoin": 0.55, "vc_consensus": 0.35}
SAME_CLUSTER_DEFAULT = 0.25
EDGE_CLAMP = 0.95 # cap kappa*weight so a heavily-weighted edge can't exceed near-total
CAP_VALUE = 0.25 # §4.5: bitcoin / capped sources contribute at most 0.25 of a voice
CLUSTER_MIN_CONTRIB = 0.5 # a cluster must add >= half an independent voice to count toward K_eff
def effective_independent_N(srcs: list[tuple], edges: list[tuple], *, mode: str = "live") -> dict:
"""srcs: [(source_id, source_cluster, cluster_capped_low[, own_network])]; edges: [(a,b,type,weight)].
mode='live' (default) DROPS own_network sources (Ten31's own orbit — listening to ourselves, §v2.1);
mode='test' keeps them (the reflexivity test fixture). Returns {eisc_adj, eisc_raw, k_eff, ...}."""
if mode == "live":
srcs = [s for s in srcs if not (len(s) > 3 and s[3])]
ids = [s[0] for s in srcs]
n = len(ids)
if n == 0:
return {"eisc_adj": 0.0, "eisc_raw": 0.0, "k_eff": 0, "xcluster_mult": 1.0, "per_source_contrib": {}}
idx = {sid: i for i, sid in enumerate(ids)}
cluster = {s[0]: s[1] for s in srcs}
capped = {s[0]: (bool(s[2]) or s[1] == "bitcoin") for s in srcs}
# edge channel: combine all edges between a pair by noisy-OR product of (1 - kappa*weight)
pair_factor: dict = defaultdict(lambda: 1.0)
for a, b, etype, w in edges:
if a in idx and b in idx and a != b:
term = min(EDGE_CLAMP, KAPPA.get(etype, 0.0) * (w if w is not None else 1.0))
pair_factor[frozenset((a, b))] *= (1.0 - term)
C = np.eye(n)
for i in range(n):
for j in range(i + 1, n):
a, b = ids[i], ids[j]
e = 1.0 - pair_factor[frozenset((a, b))] # 0 if no edge
ci, cj = cluster[a], cluster[b]
clust = (CLUSTER_COUPLING.get(ci, SAME_CLUSTER_DEFAULT)
if (ci is not None and ci == cj) else 0.0)
c = 1.0 - (1.0 - e) * (1.0 - clust)
C[i, j] = C[j, i] = c
rowsum = C.sum(axis=1) # includes the diagonal 1.0
contrib, eisc_raw = {}, 0.0
cluster_mass: dict = defaultdict(float)
for i, sid in enumerate(ids):
cap = CAP_VALUE if capped[sid] else 1.0
contrib[sid] = cap * (1.0 / rowsum[i])
eisc_raw += contrib[sid]
if not capped[sid] and cluster[sid]:
cluster_mass[cluster[sid]] += contrib[sid]
# cross-cluster bonus: count NON-capped clusters that genuinely contribute an independent voice
# (summed contribution >= half a voice). This stops "one guest across many clusters" from earning
# the gold multiplier — the raw EISC already collapses that guest to ~1, and k_eff must agree.
k_eff = sum(1 for m in cluster_mass.values() if m >= CLUSTER_MIN_CONTRIB)
xmult = max(1.0, 1.0 + 0.5 * (k_eff - 1)) # 1clu->1.0, 2->1.5, 3->2.0 (gold)
return {
"eisc_adj": xmult * eisc_raw,
"eisc_raw": eisc_raw,
"k_eff": k_eff,
"xcluster_mult": xmult,
"per_source_contrib": {k: round(v, 4) for k, v in contrib.items()},
}
# --- DB helpers (the brain only READS the graph; edges are produced upstream by the voiceprint lib) ---
def load_source_meta(conn, ids: list[str]) -> list[tuple]:
ids = list(dict.fromkeys(ids))
if not ids:
return []
ph = ",".join("?" * len(ids))
rows = conn.execute(
f"SELECT source_id, source_cluster, cluster_capped_low, COALESCE(own_network,0) "
f"FROM sources WHERE source_id IN ({ph})", ids
).fetchall()
return [(r[0], r[1], r[2], r[3]) for r in rows]
def load_edges(conn, ids: list[str]) -> list[tuple]:
ids = list(dict.fromkeys(ids))
if not ids:
return []
ph = ",".join("?" * len(ids))
rows = conn.execute(
f"SELECT src_a, src_b, edge_type, weight FROM source_edges WHERE src_a IN ({ph}) AND src_b IN ({ph})",
ids + ids,
).fetchall()
return [(r[0], r[1], r[2], r[3]) for r in rows]
def eisc_for(conn, source_ids: list[str], *, mode: str = "live") -> dict:
"""Convenience: EISC for a set of source_ids, loading cluster/cap/own_network + edges from SQLite.
mode='live' drops own_network sources; mode='test' keeps them (§v2.1 condition 1)."""
ids = list(dict.fromkeys(source_ids))
return effective_independent_N(load_source_meta(conn, ids), load_edges(conn, ids), mode=mode)