Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)
This commit is contained in:
@@ -0,0 +1,113 @@
|
||||
"""Effective Independent Source Count (EISC) — the system's differentiator (§4.5).
|
||||
|
||||
Discount convergence by source connectedness. Five shows that "independently converge" but share one
|
||||
guest must count as ~one voice; three shows across macro/energy/ai with no shared guests are gold.
|
||||
|
||||
Method (resolved in the design panel): noisy-OR connectedness matrix + inverse-row-sum EISC.
|
||||
- symmetric & order-independent (unlike a sequential pairwise-penalty walk)
|
||||
- each source's contribution is individually explainable ("counts 0.31 because connected to 3 others")
|
||||
- collapses correctly: 5 clones -> ~1.0 ; 5 cross-cluster independents -> ~5.0 (raw)
|
||||
- no eigensolve (unstable at n=2..4, our common case)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Coupling per edge type: a voiceprint-confirmed shared guest is near-total redundancy on a topic.
|
||||
KAPPA = {"shared_guest": 0.85, "citation": 0.45, "community": 0.60}
|
||||
# Same-cluster baseline correlation (sources in the same world are partly redundant even w/o an edge).
|
||||
CLUSTER_COUPLING = {"bitcoin": 0.55, "vc_consensus": 0.35}
|
||||
SAME_CLUSTER_DEFAULT = 0.25
|
||||
EDGE_CLAMP = 0.95 # cap kappa*weight so a heavily-weighted edge can't exceed near-total
|
||||
CAP_VALUE = 0.25 # §4.5: bitcoin / capped sources contribute at most 0.25 of a voice
|
||||
CLUSTER_MIN_CONTRIB = 0.5 # a cluster must add >= half an independent voice to count toward K_eff
|
||||
|
||||
|
||||
def effective_independent_N(srcs: list[tuple], edges: list[tuple], *, mode: str = "live") -> dict:
|
||||
"""srcs: [(source_id, source_cluster, cluster_capped_low[, own_network])]; edges: [(a,b,type,weight)].
|
||||
mode='live' (default) DROPS own_network sources (Ten31's own orbit — listening to ourselves, §v2.1);
|
||||
mode='test' keeps them (the reflexivity test fixture). Returns {eisc_adj, eisc_raw, k_eff, ...}."""
|
||||
if mode == "live":
|
||||
srcs = [s for s in srcs if not (len(s) > 3 and s[3])]
|
||||
ids = [s[0] for s in srcs]
|
||||
n = len(ids)
|
||||
if n == 0:
|
||||
return {"eisc_adj": 0.0, "eisc_raw": 0.0, "k_eff": 0, "xcluster_mult": 1.0, "per_source_contrib": {}}
|
||||
idx = {sid: i for i, sid in enumerate(ids)}
|
||||
cluster = {s[0]: s[1] for s in srcs}
|
||||
capped = {s[0]: (bool(s[2]) or s[1] == "bitcoin") for s in srcs}
|
||||
|
||||
# edge channel: combine all edges between a pair by noisy-OR product of (1 - kappa*weight)
|
||||
pair_factor: dict = defaultdict(lambda: 1.0)
|
||||
for a, b, etype, w in edges:
|
||||
if a in idx and b in idx and a != b:
|
||||
term = min(EDGE_CLAMP, KAPPA.get(etype, 0.0) * (w if w is not None else 1.0))
|
||||
pair_factor[frozenset((a, b))] *= (1.0 - term)
|
||||
|
||||
C = np.eye(n)
|
||||
for i in range(n):
|
||||
for j in range(i + 1, n):
|
||||
a, b = ids[i], ids[j]
|
||||
e = 1.0 - pair_factor[frozenset((a, b))] # 0 if no edge
|
||||
ci, cj = cluster[a], cluster[b]
|
||||
clust = (CLUSTER_COUPLING.get(ci, SAME_CLUSTER_DEFAULT)
|
||||
if (ci is not None and ci == cj) else 0.0)
|
||||
c = 1.0 - (1.0 - e) * (1.0 - clust)
|
||||
C[i, j] = C[j, i] = c
|
||||
|
||||
rowsum = C.sum(axis=1) # includes the diagonal 1.0
|
||||
contrib, eisc_raw = {}, 0.0
|
||||
cluster_mass: dict = defaultdict(float)
|
||||
for i, sid in enumerate(ids):
|
||||
cap = CAP_VALUE if capped[sid] else 1.0
|
||||
contrib[sid] = cap * (1.0 / rowsum[i])
|
||||
eisc_raw += contrib[sid]
|
||||
if not capped[sid] and cluster[sid]:
|
||||
cluster_mass[cluster[sid]] += contrib[sid]
|
||||
|
||||
# cross-cluster bonus: count NON-capped clusters that genuinely contribute an independent voice
|
||||
# (summed contribution >= half a voice). This stops "one guest across many clusters" from earning
|
||||
# the gold multiplier — the raw EISC already collapses that guest to ~1, and k_eff must agree.
|
||||
k_eff = sum(1 for m in cluster_mass.values() if m >= CLUSTER_MIN_CONTRIB)
|
||||
xmult = max(1.0, 1.0 + 0.5 * (k_eff - 1)) # 1clu->1.0, 2->1.5, 3->2.0 (gold)
|
||||
return {
|
||||
"eisc_adj": xmult * eisc_raw,
|
||||
"eisc_raw": eisc_raw,
|
||||
"k_eff": k_eff,
|
||||
"xcluster_mult": xmult,
|
||||
"per_source_contrib": {k: round(v, 4) for k, v in contrib.items()},
|
||||
}
|
||||
|
||||
|
||||
# --- DB helpers (the brain only READS the graph; edges are produced upstream by the voiceprint lib) ---
|
||||
def load_source_meta(conn, ids: list[str]) -> list[tuple]:
|
||||
ids = list(dict.fromkeys(ids))
|
||||
if not ids:
|
||||
return []
|
||||
ph = ",".join("?" * len(ids))
|
||||
rows = conn.execute(
|
||||
f"SELECT source_id, source_cluster, cluster_capped_low, COALESCE(own_network,0) "
|
||||
f"FROM sources WHERE source_id IN ({ph})", ids
|
||||
).fetchall()
|
||||
return [(r[0], r[1], r[2], r[3]) for r in rows]
|
||||
|
||||
|
||||
def load_edges(conn, ids: list[str]) -> list[tuple]:
|
||||
ids = list(dict.fromkeys(ids))
|
||||
if not ids:
|
||||
return []
|
||||
ph = ",".join("?" * len(ids))
|
||||
rows = conn.execute(
|
||||
f"SELECT src_a, src_b, edge_type, weight FROM source_edges WHERE src_a IN ({ph}) AND src_b IN ({ph})",
|
||||
ids + ids,
|
||||
).fetchall()
|
||||
return [(r[0], r[1], r[2], r[3]) for r in rows]
|
||||
|
||||
|
||||
def eisc_for(conn, source_ids: list[str], *, mode: str = "live") -> dict:
|
||||
"""Convenience: EISC for a set of source_ids, loading cluster/cap/own_network + edges from SQLite.
|
||||
mode='live' drops own_network sources; mode='test' keeps them (§v2.1 condition 1)."""
|
||||
ids = list(dict.fromkeys(source_ids))
|
||||
return effective_independent_N(load_source_meta(conn, ids), load_edges(conn, ids), mode=mode)
|
||||
Reference in New Issue
Block a user