Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)
This commit is contained in:
@@ -0,0 +1,75 @@
|
||||
"""Under-acted-conviction scorer — Job B, the §7.1 backtest target.
|
||||
|
||||
score = conviction_weight x exposure_gap x rising_independent_corroboration
|
||||
|
||||
Fires when Ten31 believes something (high conviction), has little/no position (exposure gap), and the
|
||||
world is beginning to corroborate it or a derivative of it — independently and with acceleration. This
|
||||
is the signal that should have flagged "size up power-infra picks-and-shovels" in 2023.
|
||||
|
||||
Exposure is joined LOCALLY (never crosses the frontier boundary, §4.6). Corroboration is RETRIEVED
|
||||
(stats nominate), then an LLM helper only FILTERS retrieval near-misses (§5.1) — it cannot add claims.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from .llm_helpers import derivative_relevance
|
||||
from .windows import windowed_independence
|
||||
|
||||
CONVICTION_WEIGHT = {"low": 0.15, "med": 0.4, "med-high": 0.7, "high": 1.0}
|
||||
EXPOSURE_GAP = {"none": 1.0, "lt2": 0.8, "2to10": 0.4, "gt10": 0.1, "unset": 0.6}
|
||||
|
||||
|
||||
def score_node(conn, sc, backend, *, as_of: str, derivative: str, conviction_id: str,
|
||||
node_id: str | None, conviction_level: str, exposure: str,
|
||||
is_breaker: bool = False, top_k: int = 40, window_days: int = 28) -> dict:
|
||||
cw = CONVICTION_WEIGHT.get(conviction_level, 0.4)
|
||||
eg = EXPOSURE_GAP.get(exposure, 0.6)
|
||||
|
||||
# 1. RETRIEVE (stats nominate): hybrid search over embedded propositions; as-of post-filter.
|
||||
try:
|
||||
res = sc.search(derivative, collection="propositions", top_k=top_k, rerank=True)
|
||||
except Exception as e: # noqa: BLE001
|
||||
return _result(conviction_id, node_id, 0.0, {"reason": f"search_failed:{str(e)[:60]}"},
|
||||
cw, eg, exposure, is_breaker)
|
||||
hits = res.get("data", []) if isinstance(res, dict) else []
|
||||
cand = []
|
||||
for h in hits:
|
||||
pl = (h.get("payload") or {}) if isinstance(h, dict) else {}
|
||||
d = pl.get("date")
|
||||
if not pl.get("claim_id") or not d or d[:10] > as_of: # Qdrant can't date-filter; do it here
|
||||
continue
|
||||
cand.append({"claim_id": pl["claim_id"], "proposition": pl.get("proposition", ""),
|
||||
"date": d, "source_id": pl.get("source_id")})
|
||||
if not cand:
|
||||
return _result(conviction_id, node_id, 0.0, {"reason": "no_retrieval", "n_retrieved": 0},
|
||||
cw, eg, exposure, is_breaker)
|
||||
|
||||
# 2. FILTER near-misses with the LLM (affirms-only). Not a nominator — can't add claims.
|
||||
rel = derivative_relevance(backend, derivative,
|
||||
[{"claim_id": c["claim_id"], "proposition": c["proposition"]} for c in cand])
|
||||
confirmed = [c for c in cand
|
||||
if rel.get(c["claim_id"], {}).get("corroborates")
|
||||
and rel[c["claim_id"]].get("direction") == "affirms"]
|
||||
n_src = len({c["source_id"] for c in confirmed})
|
||||
|
||||
# 3. CORROBORATION = independence-weighted acceleration over the confirmed set (treat as a topic).
|
||||
# window_days matches corpus cadence: ~90d for quarterly filings/earnings, ~28d for weekly podcasts.
|
||||
wi = windowed_independence(conn, [(c["date"], c["source_id"]) for c in confirmed], as_of, days=window_days)
|
||||
a_corrob = wi["acceleration"]
|
||||
eisc_corrob = wi["eisc0"]
|
||||
corroboration = max(0.0, a_corrob) * eisc_corrob
|
||||
|
||||
score = corroboration if is_breaker else cw * eg * corroboration
|
||||
inputs = {
|
||||
"as_of": as_of, "derivative": derivative, "n_retrieved": len(cand), "n_confirmed": len(confirmed),
|
||||
"n_src": n_src, "a_corrob": a_corrob, "eisc_corrob": eisc_corrob, "k_eff0": wi["k_eff0"],
|
||||
"window_counts": wi["counts"], "window_eisc": wi["eisc"], "corroboration": round(corroboration, 3),
|
||||
"confirmed_claim_ids": [c["claim_id"] for c in confirmed][:50],
|
||||
}
|
||||
return _result(conviction_id, node_id, score, inputs, cw, eg, exposure, is_breaker)
|
||||
|
||||
|
||||
def _result(conviction_id, node_id, score, inputs, cw, eg, exposure, is_breaker) -> dict:
|
||||
inputs = {**inputs, "conviction_weight": cw, "exposure_gap": eg, "exposure": exposure,
|
||||
"is_breaker": is_breaker}
|
||||
return {"scorer": "under_acted", "conviction_id": conviction_id, "node_id": node_id,
|
||||
"score": round(float(score), 4), "inputs": inputs}
|
||||
Reference in New Issue
Block a user