54 lines
2.3 KiB
Python
54 lines
2.3 KiB
Python
"""Temporal windows + windowed independence (the single temporal layer, §4.4).
|
|
|
|
28-day non-overlapping windows anchored at as_of (W0 ends at as_of, then back). Non-overlapping
|
|
avoids autocorrelation faking significance. The signal is the discrete 2nd derivative of the
|
|
INDEPENDENCE-WEIGHTED flow (EISC per window), never the raw count — so a topic that "accelerates"
|
|
only because one show booked the same guest three times has flat N(W).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from .independence import eisc_for
|
|
|
|
WINDOW_DAYS = 28
|
|
N_WINDOWS = 3
|
|
|
|
|
|
def _d(s: str) -> datetime:
|
|
return datetime.strptime(s[:10], "%Y-%m-%d")
|
|
|
|
|
|
def window_bounds(as_of: str, *, n: int = N_WINDOWS, days: int = WINDOW_DAYS) -> list[tuple[int, str, str]]:
|
|
"""Returns [(idx, start_iso, end_iso)] with W0 ending at as_of, extending backward only."""
|
|
end = _d(as_of)
|
|
out = []
|
|
for idx in range(n):
|
|
w_end = end - timedelta(days=idx * days)
|
|
w_start = end - timedelta(days=(idx + 1) * days)
|
|
out.append((idx, w_start.strftime("%Y-%m-%d"), w_end.strftime("%Y-%m-%d")))
|
|
return out
|
|
|
|
|
|
def windowed_independence(conn, rows: list[tuple], as_of: str, *, n: int = N_WINDOWS,
|
|
days: int = WINDOW_DAYS) -> dict:
|
|
"""rows: [(date_iso, source_id)]. For each window compute raw count + EISC_adj of its sources.
|
|
Returns {counts:[c0..], eisc:[N0..], k_eff:[...], acceleration, eisc0, sources0}.
|
|
acceleration = N0 - 2*N1 + N2 (independence-weighted 2nd derivative)."""
|
|
bounds = window_bounds(as_of, n=n, days=days)
|
|
counts, eiscs, keffs, src_sets = [], [], [], []
|
|
for _idx, start, end in bounds:
|
|
win = [r for r in rows if r[0] and start < r[0][:10] <= end]
|
|
srcs = list({r[1] for r in win})
|
|
e = eisc_for(conn, srcs) if srcs else {"eisc_adj": 0.0, "k_eff": 0}
|
|
counts.append(len(win))
|
|
eiscs.append(e["eisc_adj"])
|
|
keffs.append(e["k_eff"])
|
|
src_sets.append(srcs)
|
|
accel = eiscs[0] - 2 * eiscs[1] + eiscs[2] if n >= 3 else 0.0
|
|
return {
|
|
"counts": counts, "eisc": [round(x, 3) for x in eiscs], "k_eff": keffs,
|
|
"acceleration": round(accel, 3), "eisc0": round(eiscs[0], 3), "k_eff0": keffs[0],
|
|
"sources0": src_sets[0], "n_total": sum(counts),
|
|
}
|