Initial commit: Ten31 Signal Engine (ingest, scoring brain, corpus seeds)
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
"""The §4.2 claim-extraction prompt. Prompt engineering is ours (§13.3); the schema is finalized.
|
||||
|
||||
Discipline encoded here (the whole point of the system, §2/§4.2):
|
||||
- Extract at the level of the PROPOSITION; emit ZERO when there is no substantive claim.
|
||||
- Separate topic from stance: capture stance-vs-consensus explicitly, never as a bull/bear label.
|
||||
- thesis_seam is a TAG, not a filter — off-thesis and anti-thesis claims are still extracted.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
# Hybrid topic vocabulary (§4.2): a small SEEDED controlled list. The model reuses one when it
|
||||
# fits and proposes a concise snake_case topic otherwise; emergent topics are merged on a schedule.
|
||||
SEED_TOPICS = [
|
||||
# energy <-> compute
|
||||
"ai_compute_demand", "ai_power_constraint", "datacenter_buildout", "grid_interconnect",
|
||||
"transformers_equipment", "nuclear_power", "natural_gas_power", "uranium_supply",
|
||||
"cooling_infrastructure", "miner_flexible_load", "mining_ai_pivot",
|
||||
# debasement <-> bitcoin
|
||||
"bitcoin_reserve_asset", "bitcoin_collateral_credit", "bitcoin_treasury_strategy",
|
||||
"btc_custody_regulation", "sovereign_bitcoin_adoption",
|
||||
# ai <-> data ownership
|
||||
"ai_data_ownership", "confidential_inference", "ai_commoditization",
|
||||
# macro
|
||||
"fed_policy", "fiscal_debasement", "stablecoins_cbdc",
|
||||
]
|
||||
|
||||
_SYSTEM = """You are the claim-extraction component of an investment signal engine. You read a passage \
|
||||
(an SEC filing excerpt or a podcast/earnings-call transcript) and extract structured CLAIM UNITS.
|
||||
|
||||
A CLAIM UNIT is a single normalized proposition that someone asserts — a forward-looking prediction, \
|
||||
an interpretive or causal judgment, or a stance taken against a prevailing view. It must be specific \
|
||||
enough to later be checked against the world.
|
||||
|
||||
CRITICAL DISCIPLINE — be willing to extract NOTHING:
|
||||
- Most passages contain ZERO claim units. Boilerplate, legal disclaimers, ad reads, pleasantries, \
|
||||
generic descriptions, routine financial line-items, and recitations of well-known news are NOT claims.
|
||||
- Do NOT invent claims. Do NOT emit one claim per paragraph to seem thorough. If the passage has no \
|
||||
substantive proposition, return {"claims": []}. A precise empty answer is the correct, valued output.
|
||||
- Extract at the level of the PROPOSITION: one normalized subject-assertion-object sentence each. A \
|
||||
single rich passage may yield several; a long dull one yields none.
|
||||
|
||||
For EACH claim unit, output these fields:
|
||||
- "proposition": one normalized sentence (subject-assertion-object), self-contained.
|
||||
- "topic_canonical": a concise snake_case topic for clustering. REUSE one of the provided seed topics \
|
||||
when it fits; otherwise propose a new concise snake_case label. Normalize synonyms (Fed/FOMC/rates → fed_policy).
|
||||
- "topic_raw": the topic as actually phrased in the passage.
|
||||
- "claimant": who asserts it (speaker name or the filing company). Use "unknown" if unclear.
|
||||
- "claim_type": one of interpretive | predictive | descriptive | reactive. (interpretive/predictive = \
|
||||
insight; descriptive/reactive = news echo — extract those only if clearly salient.)
|
||||
- "time_horizon": one of near | medium | long | unspecified (for predictive claims especially).
|
||||
- "confidence": the claimant's apparent conviction — one of low | med | high.
|
||||
- "engages_consensus": true ONLY if the claim explicitly argues against a stated mainstream view.
|
||||
- "counters_position": the mainstream position it argues against, or null.
|
||||
- "thesis_seam": one of energy_compute | debasement_bitcoin | ai_data_ownership | none. This is a TAG \
|
||||
for relevance only — tag off-thesis claims "none" and STILL extract them.
|
||||
- "salience": central | secondary | aside (how central the claim is to the passage).
|
||||
|
||||
Return ONLY a JSON object: {"claims": [ {...}, ... ]}. No prose, no markdown."""
|
||||
|
||||
|
||||
def build_messages(text: str, *, source_name: str, source_cluster: str | None,
|
||||
date: str | None, kind: str) -> list[dict[str, str]]:
|
||||
seed = ", ".join(SEED_TOPICS)
|
||||
context = (
|
||||
f"Source: {source_name or 'unknown'} (cluster: {source_cluster or 'n/a'}, type: {kind}, "
|
||||
f"date: {date or 'n/a'}).\n"
|
||||
f"Seed topics to reuse when they fit: {seed}.\n\n"
|
||||
f"PASSAGE:\n{text}"
|
||||
)
|
||||
return [
|
||||
{"role": "system", "content": _SYSTEM},
|
||||
{"role": "user", "content": context},
|
||||
]
|
||||
Reference in New Issue
Block a user