Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)
This commit is contained in:
@@ -0,0 +1,53 @@
|
||||
"""Temporal windows + windowed independence (the single temporal layer, §4.4).
|
||||
|
||||
28-day non-overlapping windows anchored at as_of (W0 ends at as_of, then back). Non-overlapping
|
||||
avoids autocorrelation faking significance. The signal is the discrete 2nd derivative of the
|
||||
INDEPENDENCE-WEIGHTED flow (EISC per window), never the raw count — so a topic that "accelerates"
|
||||
only because one show booked the same guest three times has flat N(W).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from .independence import eisc_for
|
||||
|
||||
WINDOW_DAYS = 28
|
||||
N_WINDOWS = 3
|
||||
|
||||
|
||||
def _d(s: str) -> datetime:
|
||||
return datetime.strptime(s[:10], "%Y-%m-%d")
|
||||
|
||||
|
||||
def window_bounds(as_of: str, *, n: int = N_WINDOWS, days: int = WINDOW_DAYS) -> list[tuple[int, str, str]]:
|
||||
"""Returns [(idx, start_iso, end_iso)] with W0 ending at as_of, extending backward only."""
|
||||
end = _d(as_of)
|
||||
out = []
|
||||
for idx in range(n):
|
||||
w_end = end - timedelta(days=idx * days)
|
||||
w_start = end - timedelta(days=(idx + 1) * days)
|
||||
out.append((idx, w_start.strftime("%Y-%m-%d"), w_end.strftime("%Y-%m-%d")))
|
||||
return out
|
||||
|
||||
|
||||
def windowed_independence(conn, rows: list[tuple], as_of: str, *, n: int = N_WINDOWS,
|
||||
days: int = WINDOW_DAYS) -> dict:
|
||||
"""rows: [(date_iso, source_id)]. For each window compute raw count + EISC_adj of its sources.
|
||||
Returns {counts:[c0..], eisc:[N0..], k_eff:[...], acceleration, eisc0, sources0}.
|
||||
acceleration = N0 - 2*N1 + N2 (independence-weighted 2nd derivative)."""
|
||||
bounds = window_bounds(as_of, n=n, days=days)
|
||||
counts, eiscs, keffs, src_sets = [], [], [], []
|
||||
for _idx, start, end in bounds:
|
||||
win = [r for r in rows if r[0] and start < r[0][:10] <= end]
|
||||
srcs = list({r[1] for r in win})
|
||||
e = eisc_for(conn, srcs) if srcs else {"eisc_adj": 0.0, "k_eff": 0}
|
||||
counts.append(len(win))
|
||||
eiscs.append(e["eisc_adj"])
|
||||
keffs.append(e["k_eff"])
|
||||
src_sets.append(srcs)
|
||||
accel = eiscs[0] - 2 * eiscs[1] + eiscs[2] if n >= 3 else 0.0
|
||||
return {
|
||||
"counts": counts, "eisc": [round(x, 3) for x in eiscs], "k_eff": keffs,
|
||||
"acceleration": round(accel, 3), "eisc0": round(eiscs[0], 3), "k_eff0": keffs[0],
|
||||
"sources0": src_sets[0], "n_total": sum(counts),
|
||||
}
|
||||
Reference in New Issue
Block a user