114 lines
5.4 KiB
Python
114 lines
5.4 KiB
Python
"""Effective Independent Source Count (EISC) — the system's differentiator (§4.5).
|
|
|
|
Discount convergence by source connectedness. Five shows that "independently converge" but share one
|
|
guest must count as ~one voice; three shows across macro/energy/ai with no shared guests are gold.
|
|
|
|
Method (resolved in the design panel): noisy-OR connectedness matrix + inverse-row-sum EISC.
|
|
- symmetric & order-independent (unlike a sequential pairwise-penalty walk)
|
|
- each source's contribution is individually explainable ("counts 0.31 because connected to 3 others")
|
|
- collapses correctly: 5 clones -> ~1.0 ; 5 cross-cluster independents -> ~5.0 (raw)
|
|
- no eigensolve (unstable at n=2..4, our common case)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from collections import defaultdict
|
|
|
|
import numpy as np
|
|
|
|
# Coupling per edge type: a voiceprint-confirmed shared guest is near-total redundancy on a topic.
|
|
KAPPA = {"shared_guest": 0.85, "citation": 0.45, "community": 0.60}
|
|
# Same-cluster baseline correlation (sources in the same world are partly redundant even w/o an edge).
|
|
CLUSTER_COUPLING = {"bitcoin": 0.55, "vc_consensus": 0.35}
|
|
SAME_CLUSTER_DEFAULT = 0.25
|
|
EDGE_CLAMP = 0.95 # cap kappa*weight so a heavily-weighted edge can't exceed near-total
|
|
CAP_VALUE = 0.25 # §4.5: bitcoin / capped sources contribute at most 0.25 of a voice
|
|
CLUSTER_MIN_CONTRIB = 0.5 # a cluster must add >= half an independent voice to count toward K_eff
|
|
|
|
|
|
def effective_independent_N(srcs: list[tuple], edges: list[tuple], *, mode: str = "live") -> dict:
|
|
"""srcs: [(source_id, source_cluster, cluster_capped_low[, own_network])]; edges: [(a,b,type,weight)].
|
|
mode='live' (default) DROPS own_network sources (Ten31's own orbit — listening to ourselves, §v2.1);
|
|
mode='test' keeps them (the reflexivity test fixture). Returns {eisc_adj, eisc_raw, k_eff, ...}."""
|
|
if mode == "live":
|
|
srcs = [s for s in srcs if not (len(s) > 3 and s[3])]
|
|
ids = [s[0] for s in srcs]
|
|
n = len(ids)
|
|
if n == 0:
|
|
return {"eisc_adj": 0.0, "eisc_raw": 0.0, "k_eff": 0, "xcluster_mult": 1.0, "per_source_contrib": {}}
|
|
idx = {sid: i for i, sid in enumerate(ids)}
|
|
cluster = {s[0]: s[1] for s in srcs}
|
|
capped = {s[0]: (bool(s[2]) or s[1] == "bitcoin") for s in srcs}
|
|
|
|
# edge channel: combine all edges between a pair by noisy-OR product of (1 - kappa*weight)
|
|
pair_factor: dict = defaultdict(lambda: 1.0)
|
|
for a, b, etype, w in edges:
|
|
if a in idx and b in idx and a != b:
|
|
term = min(EDGE_CLAMP, KAPPA.get(etype, 0.0) * (w if w is not None else 1.0))
|
|
pair_factor[frozenset((a, b))] *= (1.0 - term)
|
|
|
|
C = np.eye(n)
|
|
for i in range(n):
|
|
for j in range(i + 1, n):
|
|
a, b = ids[i], ids[j]
|
|
e = 1.0 - pair_factor[frozenset((a, b))] # 0 if no edge
|
|
ci, cj = cluster[a], cluster[b]
|
|
clust = (CLUSTER_COUPLING.get(ci, SAME_CLUSTER_DEFAULT)
|
|
if (ci is not None and ci == cj) else 0.0)
|
|
c = 1.0 - (1.0 - e) * (1.0 - clust)
|
|
C[i, j] = C[j, i] = c
|
|
|
|
rowsum = C.sum(axis=1) # includes the diagonal 1.0
|
|
contrib, eisc_raw = {}, 0.0
|
|
cluster_mass: dict = defaultdict(float)
|
|
for i, sid in enumerate(ids):
|
|
cap = CAP_VALUE if capped[sid] else 1.0
|
|
contrib[sid] = cap * (1.0 / rowsum[i])
|
|
eisc_raw += contrib[sid]
|
|
if not capped[sid] and cluster[sid]:
|
|
cluster_mass[cluster[sid]] += contrib[sid]
|
|
|
|
# cross-cluster bonus: count NON-capped clusters that genuinely contribute an independent voice
|
|
# (summed contribution >= half a voice). This stops "one guest across many clusters" from earning
|
|
# the gold multiplier — the raw EISC already collapses that guest to ~1, and k_eff must agree.
|
|
k_eff = sum(1 for m in cluster_mass.values() if m >= CLUSTER_MIN_CONTRIB)
|
|
xmult = max(1.0, 1.0 + 0.5 * (k_eff - 1)) # 1clu->1.0, 2->1.5, 3->2.0 (gold)
|
|
return {
|
|
"eisc_adj": xmult * eisc_raw,
|
|
"eisc_raw": eisc_raw,
|
|
"k_eff": k_eff,
|
|
"xcluster_mult": xmult,
|
|
"per_source_contrib": {k: round(v, 4) for k, v in contrib.items()},
|
|
}
|
|
|
|
|
|
# --- DB helpers (the brain only READS the graph; edges are produced upstream by the voiceprint lib) ---
|
|
def load_source_meta(conn, ids: list[str]) -> list[tuple]:
|
|
ids = list(dict.fromkeys(ids))
|
|
if not ids:
|
|
return []
|
|
ph = ",".join("?" * len(ids))
|
|
rows = conn.execute(
|
|
f"SELECT source_id, source_cluster, cluster_capped_low, COALESCE(own_network,0) "
|
|
f"FROM sources WHERE source_id IN ({ph})", ids
|
|
).fetchall()
|
|
return [(r[0], r[1], r[2], r[3]) for r in rows]
|
|
|
|
|
|
def load_edges(conn, ids: list[str]) -> list[tuple]:
|
|
ids = list(dict.fromkeys(ids))
|
|
if not ids:
|
|
return []
|
|
ph = ",".join("?" * len(ids))
|
|
rows = conn.execute(
|
|
f"SELECT src_a, src_b, edge_type, weight FROM source_edges WHERE src_a IN ({ph}) AND src_b IN ({ph})",
|
|
ids + ids,
|
|
).fetchall()
|
|
return [(r[0], r[1], r[2], r[3]) for r in rows]
|
|
|
|
|
|
def eisc_for(conn, source_ids: list[str], *, mode: str = "live") -> dict:
|
|
"""Convenience: EISC for a set of source_ids, loading cluster/cap/own_network + edges from SQLite.
|
|
mode='live' drops own_network sources; mode='test' keeps them (§v2.1 condition 1)."""
|
|
ids = list(dict.fromkeys(source_ids))
|
|
return effective_independent_N(load_source_meta(conn, ids), load_edges(conn, ids), mode=mode)
|