Phase 1 Workstream A+E: thesis substrate + dual-approval gate

- migration 0002_phase1_architect: thesis_lines (core spine + per-segment lines), thesis_nodes (+ append-only revisions), thesis_versions (one-canonical-per-line DB invariant), thesis_reviews (dual approval + feedback), segments. Reversible. - backend/mcp/architect_tools.py: agent draft tools (node tree, versions, segments, get_canonical fails-closed) — NO self-approval path. MCP-exposed. - backend/thesis_review.py + server.py routes: human-gated approval. Dual sign-off via thesis_required_approvals; atomic supersede; every action logged. - docs/PHASE_1.md (kickoff brief); docs/OPERATIONS.md (partner guide); start9/0.4 "Resolve duplicate names" fuzzy action. Verified on synthetic data: dual approval promotes correctly, exactly one canonical survives supersede, get_canonical fails closed, full interaction_log. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 10:20:00 -05:00
parent 6be2e40f54
commit 3e199fd8d5
10 changed files with 993 additions and 0 deletions
@@ -0,0 +1,281 @@
+"""Architect MCP tool logic (Phase 1, Workstream A/E) — plain, testable functions.
+
+The Architect drafts and iterates on the thesis; it CANNOT make anything canonical
+— promotion to canonical is a human-only action on a CRM HTTP route (server.py),
+not exposed here (guardrail #4). Every write goes through interaction_log
+(guardrail #5). Mirrors crm_tools.py conventions.
+
+Tool surface:
+  reads   list_thesis_lines, get_thesis, get_node, get_node_history,
+          list_versions, get_canonical_thesis, get_review_feedback,
+          list_segments, get_segment
+  drafts  create_thesis_line, upsert_thesis_node, create_thesis_version,
+          submit_version_for_review, upsert_segment
+NO approve/promote/publish/outbound tool exists.
+"""
+import json
+import os
+import sqlite3
+import sys
+import uuid
+from datetime import datetime, timezone
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest"))
+import config  # noqa: E402
+
+
+def _conn(db=None):
+    c = sqlite3.connect(db or os.environ.get("CRM_DB_PATH") or config.DEFAULT_DB)
+    c.row_factory = sqlite3.Row
+    c.execute("PRAGMA foreign_keys=ON")
+    return c
+
+
+def _now():
+    return datetime.now(timezone.utc).replace(tzinfo=None).isoformat() + "Z"
+
+
+def _eid(prefix):
+    return f"{prefix}_{uuid.uuid4().hex[:16]}"
+
+
+def _log(c, action, target_id, payload, actor_id="architect", actor_type="agent"):
+    c.execute("""INSERT INTO interaction_log
+        (id, ts, actor_type, actor_id, action, target_type, target_id, payload, source, created_at)
+        VALUES (?,?,?,?,?,?,?,?,?,?)""",
+        (str(uuid.uuid4()), _now(), actor_type, actor_id, action, "thesis", target_id,
+         json.dumps(payload) if payload is not None else None, "architect", _now()))
+
+
+def _line_by_key(c, line_key):
+    return c.execute("SELECT * FROM thesis_lines WHERE line_key=? AND deleted_at IS NULL", (line_key,)).fetchone()
+
+
+# ── reads ─────────────────────────────────────────────────────────────────────
+
+def list_thesis_lines(db=None):
+    c = _conn(db)
+    rows = [dict(r) for r in c.execute(
+        "SELECT id, line_key, name, segment_key, is_core, status FROM thesis_lines WHERE deleted_at IS NULL ORDER BY is_core DESC, name")]
+    c.close()
+    return {"lines": rows, "count": len(rows)}
+
+
+def _node_tree(c, line_id):
+    nodes = [dict(r) for r in c.execute(
+        "SELECT * FROM thesis_nodes WHERE line_id=? AND deleted_at IS NULL ORDER BY ord", (line_id,))]
+    by_parent = {}
+    for n in nodes:
+        by_parent.setdefault(n["parent_id"], []).append(n)
+    def build(pid):
+        out = []
+        for n in by_parent.get(pid, []):
+            out.append({**{k: n[k] for k in ("id", "node_type", "title", "body", "status", "variant_group", "ord")},
+                        "children": build(n["id"])})
+        return out
+    return build(None)
+
+
+def get_thesis(line_key, db=None):
+    """A thesis line + its node tree."""
+    c = _conn(db)
+    line = _line_by_key(c, line_key)
+    if not line:
+        c.close()
+        return {"error": "not_found", "line_key": line_key}
+    out = {"line": dict(line), "tree": _node_tree(c, line["id"])}
+    c.close()
+    return out
+
+
+def get_node(node_id, db=None):
+    c = _conn(db)
+    r = c.execute("SELECT * FROM thesis_nodes WHERE id=?", (node_id,)).fetchone()
+    c.close()
+    return dict(r) if r else {"error": "not_found", "node_id": node_id}
+
+
+def get_node_history(node_id, db=None):
+    c = _conn(db)
+    rows = [dict(r) for r in c.execute(
+        "SELECT rev_no, body, title, status, change_summary, change_reason, actor_type, actor_id, created_at "
+        "FROM thesis_node_revisions WHERE node_id=? ORDER BY rev_no DESC", (node_id,))]
+    c.close()
+    return {"node_id": node_id, "revisions": rows}
+
+
+def list_versions(line_key, db=None):
+    c = _conn(db)
+    line = _line_by_key(c, line_key)
+    if not line:
+        c.close()
+        return {"error": "not_found", "line_key": line_key}
+    rows = [dict(r) for r in c.execute(
+        "SELECT id, version_no, status, rationale, created_by, created_at, approved_at "
+        "FROM thesis_versions WHERE line_id=? ORDER BY version_no DESC", (line["id"],))]
+    c.close()
+    return {"line_key": line_key, "versions": rows}
+
+
+def get_canonical_thesis(line_key, db=None):
+    """The single canonical version's body_json. FAILS CLOSED if none approved —
+    so Scribe/downstream agents can never generate against an unapproved thesis."""
+    c = _conn(db)
+    line = _line_by_key(c, line_key)
+    if not line:
+        c.close()
+        return {"status": "no_such_line", "line_key": line_key}
+    r = c.execute("SELECT * FROM thesis_versions WHERE line_id=? AND status='canonical'", (line["id"],)).fetchone()
+    c.close()
+    if not r:
+        return {"status": "no_canonical_thesis", "line_key": line_key}
+    return {"status": "ok", "line_key": line_key, "version_id": r["id"], "version_no": r["version_no"],
+            "approved_at": r["approved_at"], "thesis": json.loads(r["body_json"])}
+
+
+def get_review_feedback(version_id, db=None):
+    """Partners' reviews/feedback on a version — what the Architect iterates on."""
+    c = _conn(db)
+    rows = [dict(r) for r in c.execute(
+        "SELECT reviewer_user_id, decision, feedback, target_node_id, created_at "
+        "FROM thesis_reviews WHERE version_id=? ORDER BY created_at", (version_id,))]
+    approvals = sum(1 for r in rows if r["decision"] == "approve")
+    c.close()
+    return {"version_id": version_id, "reviews": rows, "approvals": approvals}
+
+
+def list_segments(db=None):
+    c = _conn(db)
+    rows = [dict(r) for r in c.execute(
+        "SELECT segment_key, name, definition, needs_to_hear, avoid, version_no FROM segments WHERE status='active' ORDER BY name")]
+    c.close()
+    return {"segments": rows, "count": len(rows)}
+
+
+def get_segment(segment_key, db=None):
+    c = _conn(db)
+    r = c.execute("SELECT * FROM segments WHERE segment_key=? AND status='active'", (segment_key,)).fetchone()
+    c.close()
+    return dict(r) if r else {"error": "not_found", "segment_key": segment_key}
+
+
+# ── draft writes (logged; never canonical) ────────────────────────────────────
+
+def create_thesis_line(line_key, name, segment_key=None, is_core=False, description=None, db=None):
+    c = _conn(db)
+    lid = _eid("thl")
+    c.execute("""INSERT INTO thesis_lines (id, line_key, name, segment_key, is_core, description, created_at, updated_at)
+                 VALUES (?,?,?,?,?,?,?,?)""",
+              (lid, line_key, name, segment_key, 1 if is_core else 0, description, _now(), _now()))
+    _log(c, "thesis.line_created", lid, {"line_key": line_key, "segment_key": segment_key, "is_core": bool(is_core)})
+    c.commit()
+    c.close()
+    return {"id": lid, "line_key": line_key}
+
+
+def upsert_thesis_node(line_id, node_type, body, title=None, parent_id=None, ord=None,
+                       variant_group=None, node_id=None, change_reason=None, change_summary=None,
+                       actor_id="architect", claude_session_id=None, meta=None, db=None):
+    """Create or edit a node. On edit, the prior state is written to
+    thesis_node_revisions before the live row changes (full provenance)."""
+    c = _conn(db)
+    if node_id:
+        prev = c.execute("SELECT * FROM thesis_nodes WHERE id=?", (node_id,)).fetchone()
+        if not prev:
+            c.close()
+            return {"error": "not_found", "node_id": node_id}
+        rev_no = (c.execute("SELECT COALESCE(MAX(rev_no),0) FROM thesis_node_revisions WHERE node_id=?",
+                            (node_id,)).fetchone()[0]) + 1
+        c.execute("""INSERT INTO thesis_node_revisions
+            (id, node_id, line_id, rev_no, node_type, title, body, status, ord, variant_group, meta,
+             change_summary, change_reason, actor_type, actor_id, claude_session_id, created_at)
+            VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
+            (str(uuid.uuid4()), node_id, prev["line_id"], rev_no, prev["node_type"], prev["title"], prev["body"],
+             prev["status"], prev["ord"], prev["variant_group"], prev["meta"], change_summary, change_reason,
+             "agent", actor_id, claude_session_id, _now()))
+        c.execute("""UPDATE thesis_nodes SET node_type=?, title=COALESCE(?,title), body=?, ord=COALESCE(?,ord),
+                     variant_group=?, meta=COALESCE(?,meta), updated_at=? WHERE id=?""",
+                  (node_type, title, body, ord, variant_group, json.dumps(meta) if meta else None, _now(), node_id))
+        _log(c, "thesis.node_revised", node_id, {"line_id": prev["line_id"], "rev_no": rev_no, "reason": change_reason})
+        out = {"id": node_id, "rev_no": rev_no}
+    else:
+        nid = _eid("thn")
+        if ord is None:
+            ord = (c.execute("SELECT COALESCE(MAX(ord),0) FROM thesis_nodes WHERE line_id=? AND parent_id IS ?",
+                             (line_id, parent_id)).fetchone()[0]) + 1.0
+        c.execute("""INSERT INTO thesis_nodes (id, line_id, parent_id, node_type, ord, title, body, status, variant_group, meta, created_at, updated_at)
+                     VALUES (?,?,?,?,?,?,?, 'draft', ?,?,?,?)""",
+                  (nid, line_id, parent_id, node_type, ord, title, body, variant_group,
+                   json.dumps(meta) if meta else None, _now(), _now()))
+        _log(c, "thesis.node_created", nid, {"line_id": line_id, "node_type": node_type})
+        out = {"id": nid, "rev_no": 0}
+    c.commit()
+    c.close()
+    return out
+
+
+def create_thesis_version(line_key, rationale=None, created_by="architect", db=None):
+    """Freeze the current node tree of a line into an immutable draft version
+    (body_json = the Architect->Scribe contract). Stays 'draft' until submitted
+    and human-approved."""
+    c = _conn(db)
+    line = _line_by_key(c, line_key)
+    if not line:
+        c.close()
+        return {"error": "not_found", "line_key": line_key}
+    tree = _node_tree(c, line["id"])
+    # typed projection for the Scribe contract
+    flat = [dict(r) for r in c.execute(
+        "SELECT node_type, title, body FROM thesis_nodes WHERE line_id=? AND deleted_at IS NULL ORDER BY ord", (line["id"],))]
+    def of(t):
+        return [{"title": n["title"], "body": n["body"]} for n in flat if n["node_type"] == t]
+    body_json = {
+        "line_key": line_key, "name": line["name"], "segment_key": line["segment_key"],
+        "throughline": of("throughline"), "pillars": of("section"), "claims": of("claim"),
+        "proof_points": of("proof_point"), "objections": of("objection"), "segment_cuts": of("segment_cut"),
+        "tree": tree, "generated_at": _now(),
+    }
+    vno = (c.execute("SELECT COALESCE(MAX(version_no),0) FROM thesis_versions WHERE line_id=?",
+                     (line["id"],)).fetchone()[0]) + 1
+    vid = _eid("thv")
+    c.execute("""INSERT INTO thesis_versions (id, line_id, version_no, body_json, status, rationale, created_by, created_at)
+                 VALUES (?,?,?,?, 'draft', ?,?,?)""",
+              (vid, line["id"], vno, json.dumps(body_json), rationale, created_by, _now()))
+    _log(c, "thesis.version_created", vid, {"line_key": line_key, "version_no": vno})
+    c.commit()
+    c.close()
+    return {"id": vid, "version_no": vno, "status": "draft"}
+
+
+def submit_version_for_review(version_id, db=None):
+    c = _conn(db)
+    r = c.execute("SELECT status FROM thesis_versions WHERE id=?", (version_id,)).fetchone()
+    if not r:
+        c.close()
+        return {"error": "not_found", "version_id": version_id}
+    if r["status"] != "draft":
+        c.close()
+        return {"error": "not_draft", "status": r["status"]}
+    c.execute("UPDATE thesis_versions SET status='in_review' WHERE id=?", (version_id,))
+    _log(c, "thesis.submitted_for_review", version_id, None)
+    c.commit()
+    c.close()
+    return {"version_id": version_id, "status": "in_review"}
+
+
+def upsert_segment(segment_key, name, definition=None, needs_to_hear=None, avoid=None, db=None):
+    """Create/replace a segment's active definition (retire the prior active row)."""
+    c = _conn(db)
+    prev = c.execute("SELECT version_no FROM segments WHERE segment_key=? AND status='active'", (segment_key,)).fetchone()
+    vno = (prev["version_no"] + 1) if prev else 1
+    if prev:
+        c.execute("UPDATE segments SET status='retired', updated_at=? WHERE segment_key=? AND status='active'",
+                  (_now(), segment_key))
+    sid = _eid("seg")
+    c.execute("""INSERT INTO segments (id, segment_key, name, definition, needs_to_hear, avoid, version_no, status, created_at, updated_at)
+                 VALUES (?,?,?,?,?,?,?, 'active', ?,?)""",
+              (sid, segment_key, name, definition, needs_to_hear, avoid, vno, _now(), _now()))
+    _log(c, "segment.upserted", sid, {"segment_key": segment_key, "version_no": vno})
+    c.commit()
+    c.close()
+    return {"id": sid, "segment_key": segment_key, "version_no": vno}
@@ -18,6 +18,7 @@ import sys

 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 import crm_tools as t  # noqa: E402
+import architect_tools as at  # noqa: E402

 from mcp.server.fastmcp import FastMCP  # noqa: E402

@@ -84,5 +85,73 @@ def set_entity_enrichment(lp_id: str, fields: dict, actor_id: str = "analyst") -
    return t.set_entity_enrichment(lp_id, fields, actor_id=actor_id)


+# ── Architect thesis tools (Phase 1; drafts only — no approve/promote here) ──
+@mcp.tool()
+def get_thesis(line_key: str) -> dict:
+    """Fetch a thesis line and its node tree (throughline → sections → claims → proof-points)."""
+    return at.get_thesis(line_key)
+
+
+@mcp.tool()
+def list_thesis_lines() -> dict:
+    """List all thesis lines (the core spine + per-segment lines)."""
+    return at.list_thesis_lines()
+
+
+@mcp.tool()
+def get_canonical_thesis(line_key: str) -> dict:
+    """The current partner-APPROVED canonical thesis for a line. Fails closed if none approved."""
+    return at.get_canonical_thesis(line_key)
+
+
+@mcp.tool()
+def get_review_feedback(version_id: str) -> dict:
+    """Partners' reviews/feedback on a thesis version — what to iterate on."""
+    return at.get_review_feedback(version_id)
+
+
+@mcp.tool()
+def create_thesis_line(line_key: str, name: str, segment_key: str = "", is_core: bool = False,
+                       description: str = "") -> dict:
+    """Create a new thesis line (a narrative, e.g. the core spine or a per-segment line)."""
+    return at.create_thesis_line(line_key, name, segment_key=segment_key or None,
+                                 is_core=is_core, description=description or None)
+
+
+@mcp.tool()
+def upsert_thesis_node(line_id: str, node_type: str, body: str, title: str = "", parent_id: str = "",
+                       node_id: str = "", variant_group: str = "", change_reason: str = "") -> dict:
+    """Create or edit a thesis node (a claim, section, proof-point, etc.). Edits are revisioned."""
+    return at.upsert_thesis_node(line_id, node_type, body, title=title or None,
+                                 parent_id=parent_id or None, node_id=node_id or None,
+                                 variant_group=variant_group or None, change_reason=change_reason or None)
+
+
+@mcp.tool()
+def create_thesis_version(line_key: str, rationale: str = "") -> dict:
+    """Freeze the current node tree into an immutable DRAFT version (stays draft until a human approves)."""
+    return at.create_thesis_version(line_key, rationale=rationale or None)
+
+
+@mcp.tool()
+def submit_version_for_review(version_id: str) -> dict:
+    """Move a draft thesis version to 'in_review' so the partners can weigh in. Cannot make it canonical."""
+    return at.submit_version_for_review(version_id)
+
+
+@mcp.tool()
+def list_segments() -> dict:
+    """List active LP segment definitions."""
+    return at.list_segments()
+
+
+@mcp.tool()
+def upsert_segment(segment_key: str, name: str, definition: str = "", needs_to_hear: str = "",
+                   avoid: str = "") -> dict:
+    """Create/replace an LP segment's active definition."""
+    return at.upsert_segment(segment_key, name, definition=definition or None,
+                             needs_to_hear=needs_to_hear or None, avoid=avoid or None)
+
+
 if __name__ == "__main__":
    mcp.run()
@@ -0,0 +1,8 @@
+-- Reversal of 0002_phase1_architect.sql. Run manually (never auto-applied).
+DROP TABLE IF EXISTS thesis_reviews;
+DROP TABLE IF EXISTS thesis_node_revisions;
+DROP TABLE IF EXISTS thesis_nodes;
+DROP TABLE IF EXISTS thesis_versions;
+DROP TABLE IF EXISTS thesis_lines;
+DROP TABLE IF EXISTS segments;
+DELETE FROM schema_migrations WHERE filename = '0002_phase1_architect.sql';
@@ -0,0 +1,124 @@
+-- Phase 1 — Workstream A/E: the Architect's thesis substrate.
+--
+-- ADDITIVE AND REVERSIBLE ONLY (guardrail #3). Reversal: 0002_phase1_architect.down.sql.
+-- Applied once by backend/core_migrations.py (schema_migrations ledger).
+--
+-- Models the "living messaging source of truth" as: multiple THESIS LINES (a core
+-- spine + per-segment narratives), each a tree of typed NODES with full revision
+-- history, frozen into immutable VERSIONS that a human signs off to make canonical.
+-- Dual approval + collaborative text feedback live in thesis_reviews. Segments are
+-- versioned defs that tie to the Phase-0 canonical_entities.segment pointer.
+
+-- ============================================================================
+-- thesis_lines — each distinct narrative line. is_core=1 is the shared spine;
+-- others are segment-specific (Grant: different segments may carry different,
+-- related thesis lines). Full-length ids (not the 8-char CRM ids).
+-- ============================================================================
+CREATE TABLE IF NOT EXISTS thesis_lines (
+    id           TEXT PRIMARY KEY,        -- 'thl_' + uuid4 hex
+    line_key     TEXT NOT NULL UNIQUE,    -- slug: 'core' | 'btc_native_hnwi' | ...
+    name         TEXT NOT NULL,
+    segment_key  TEXT,                    -- NULL for the core spine; else the segment this line serves
+    is_core      INTEGER NOT NULL DEFAULT 0,
+    description  TEXT,
+    status       TEXT NOT NULL DEFAULT 'active',
+    created_at   TEXT DEFAULT (datetime('now')),
+    updated_at   TEXT DEFAULT (datetime('now')),
+    deleted_at   TEXT
+);
+
+-- ============================================================================
+-- thesis_nodes — typed node tree per line (the unit of iteration).
+-- ============================================================================
+CREATE TABLE IF NOT EXISTS thesis_nodes (
+    id            TEXT PRIMARY KEY,       -- 'thn_' + uuid4 hex
+    line_id       TEXT NOT NULL REFERENCES thesis_lines(id) ON DELETE CASCADE,
+    parent_id     TEXT,                   -- tree edge; NULL for thesis_root
+    node_type     TEXT NOT NULL,          -- thesis_root|throughline|section|claim|proof_point|objection|rebuttal|segment_cut
+    ord           REAL NOT NULL DEFAULT 0,-- REAL so insert-between never renumbers siblings
+    title         TEXT,
+    body          TEXT,
+    status        TEXT NOT NULL DEFAULT 'draft',   -- draft|candidate|approved|retired
+    variant_group TEXT,                   -- nodes sharing this are competing phrasings of one idea (A/B)
+    meta          TEXT,                   -- JSON: tone tags, confidence, evidence_refs -> canonical_entities/source ids
+    created_at    TEXT DEFAULT (datetime('now')),
+    updated_at    TEXT DEFAULT (datetime('now')),
+    deleted_at    TEXT
+);
+CREATE INDEX IF NOT EXISTS idx_thesis_nodes_line   ON thesis_nodes(line_id);
+CREATE INDEX IF NOT EXISTS idx_thesis_nodes_parent ON thesis_nodes(parent_id);
+CREATE INDEX IF NOT EXISTS idx_thesis_nodes_variant ON thesis_nodes(variant_group);
+
+-- ============================================================================
+-- thesis_node_revisions — append-only per-node history (provenance + undo).
+-- ============================================================================
+CREATE TABLE IF NOT EXISTS thesis_node_revisions (
+    id                TEXT PRIMARY KEY,
+    node_id           TEXT NOT NULL,
+    line_id           TEXT,
+    rev_no            INTEGER NOT NULL,
+    node_type         TEXT, title TEXT, body TEXT, status TEXT, ord REAL, variant_group TEXT, meta TEXT,
+    change_summary    TEXT,
+    change_reason     TEXT,               -- WHY the edit
+    actor_type        TEXT,               -- human | agent
+    actor_id          TEXT,               -- users.id or 'architect'
+    claude_session_id TEXT,               -- ties an agent edit back to its reasoning session
+    created_at        TEXT DEFAULT (datetime('now'))
+);
+CREATE INDEX IF NOT EXISTS idx_thesis_revs_node ON thesis_node_revisions(node_id);
+
+-- ============================================================================
+-- thesis_versions — immutable named snapshots per line; ONE canonical per line.
+-- body_json freezes the published artifact (the Architect->Scribe contract).
+-- ============================================================================
+CREATE TABLE IF NOT EXISTS thesis_versions (
+    id                TEXT PRIMARY KEY,   -- 'thv_' + uuid4 hex
+    line_id           TEXT NOT NULL REFERENCES thesis_lines(id) ON DELETE CASCADE,
+    version_no        INTEGER NOT NULL,
+    body_json         TEXT NOT NULL,      -- {throughline,pillars,claims,proof_points,segment_angles,voice,guardrails}
+    status            TEXT NOT NULL DEFAULT 'draft',   -- draft|in_review|canonical|superseded
+    rationale         TEXT,
+    parent_version_id TEXT,
+    created_by        TEXT,               -- users.id or 'architect'
+    created_at        TEXT DEFAULT (datetime('now')),
+    approved_at       TEXT,
+    superseded_by     TEXT
+);
+CREATE INDEX IF NOT EXISTS idx_thesis_versions_line ON thesis_versions(line_id, version_no);
+-- Hard invariant: at most one canonical version per line.
+CREATE UNIQUE INDEX IF NOT EXISTS idx_thesis_one_canonical ON thesis_versions(line_id) WHERE status = 'canonical';
+
+-- ============================================================================
+-- thesis_reviews — dual approval + collaborative text feedback. Both partners
+-- can comment/approve; the Architect ingests feedback to iterate. Promotion to
+-- canonical happens when distinct 'approve' reviews meet the required count
+-- (app_settings 'thesis_required_approvals', default 1) via the human route.
+-- ============================================================================
+CREATE TABLE IF NOT EXISTS thesis_reviews (
+    id               TEXT PRIMARY KEY,
+    version_id       TEXT NOT NULL REFERENCES thesis_versions(id) ON DELETE CASCADE,
+    reviewer_user_id TEXT NOT NULL,       -- users.id (the human partner)
+    decision         TEXT NOT NULL,       -- approve | request_changes | comment
+    feedback         TEXT,                -- free-text the Architect reads to iterate
+    target_node_id   TEXT,               -- optional: feedback scoped to one node
+    created_at       TEXT DEFAULT (datetime('now'))
+);
+CREATE INDEX IF NOT EXISTS idx_thesis_reviews_version ON thesis_reviews(version_id);
+
+-- ============================================================================
+-- segments — versioned LP segment definitions; one active per segment_key.
+-- canonical_entities.segment (Phase 0) stores the segment_key pointer.
+-- ============================================================================
+CREATE TABLE IF NOT EXISTS segments (
+    id            TEXT PRIMARY KEY,
+    segment_key   TEXT NOT NULL,          -- slug
+    name          TEXT NOT NULL,
+    definition    TEXT,
+    needs_to_hear TEXT,
+    avoid         TEXT,
+    version_no    INTEGER NOT NULL DEFAULT 1,
+    status        TEXT NOT NULL DEFAULT 'active',   -- active | retired
+    created_at    TEXT DEFAULT (datetime('now')),
+    updated_at    TEXT DEFAULT (datetime('now'))
+);
+CREATE UNIQUE INDEX IF NOT EXISTS idx_segments_one_active ON segments(segment_key) WHERE status = 'active';
@@ -37,6 +37,12 @@ except Exception:
    jwt = None
    JWT_AVAILABLE = False

+# Phase-1 Architect: human-gated thesis approval logic (pure stdlib; guarded).
+try:
+    import thesis_review  # type: ignore
+except Exception:
+    thesis_review = None
+
 # ─── Configuration ────────────────────────────────────────────────────────────

 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -1731,6 +1737,16 @@ class CRMHandler(BaseHTTPRequestHandler):
        if path == '/api/audit-log':
            return self.handle_list_audit_log(user, params)

+        # ─── Architect thesis (Phase 1) ───
+        if path == '/api/thesis/lines':
+            return self.handle_list_thesis_lines(user)
+        if path == '/api/thesis/versions':
+            return self.handle_list_thesis_review_queue(user)
+        if re.match(r'^/api/thesis/versions/[^/]+$', path):
+            return self.handle_get_thesis_version(user, path.split('/')[-1])
+        if re.match(r'^/api/thesis/[^/]+/canonical$', path):
+            return self.handle_get_canonical_thesis(user, path.split('/')[-2])
+
        self.send_error_json("Not found", 404)

    def do_POST(self):
@@ -1795,6 +1811,10 @@ class CRMHandler(BaseHTTPRequestHandler):
        if path == '/api/fundraising/backup-verify':
            return self.handle_verify_fundraising_backups(user)

+        # ─── Architect thesis review (Phase 1, human approval gate) ───
+        if re.match(r'^/api/thesis/versions/[^/]+/review$', path):
+            return self.handle_thesis_review(user, path.split('/')[-2], body)
+
        self.send_error_json("Not found", 404)

    def do_PUT(self):
@@ -3408,6 +3428,41 @@ class CRMHandler(BaseHTTPRequestHandler):
        conn.close()
        return self.send_json({"message": "Tag deleted"})

+    # ─── Architect thesis (Phase 1) ───
+    def handle_list_thesis_lines(self, user):
+        if thesis_review is None:
+            return self.send_error_json("Thesis module unavailable", 503)
+        return self.send_json(thesis_review.list_lines(DB_PATH))
+
+    def handle_list_thesis_review_queue(self, user):
+        if thesis_review is None:
+            return self.send_error_json("Thesis module unavailable", 503)
+        return self.send_json(thesis_review.list_versions_for_review(DB_PATH))
+
+    def handle_get_thesis_version(self, user, version_id):
+        if thesis_review is None:
+            return self.send_error_json("Thesis module unavailable", 503)
+        return self.send_json(thesis_review.get_version(DB_PATH, version_id))
+
+    def handle_get_canonical_thesis(self, user, line_key):
+        if thesis_review is None:
+            return self.send_error_json("Thesis module unavailable", 503)
+        return self.send_json(thesis_review.get_canonical(DB_PATH, line_key))
+
+    def handle_thesis_review(self, user, version_id, body):
+        # Promotion to canonical is a human partner action (guardrail #4).
+        if not require_admin(user):
+            return self.send_error_json("Admin required", 403)
+        if thesis_review is None:
+            return self.send_error_json("Thesis module unavailable", 503)
+        body = body or {}
+        res = thesis_review.record_review(DB_PATH, version_id, user['user_id'],
+                                          body.get('decision'), body.get('feedback'),
+                                          body.get('target_node_id'))
+        if res.get('error'):
+            return self.send_error_json(res['error'], 400)
+        return self.send_json({"data": res})
+
    def handle_list_users(self, user):
        conn = get_db()
        users = rows_to_list(conn.execute(
@@ -0,0 +1,153 @@
+"""Human-gated thesis approval (Phase 1 Workstream E).
+
+NOT an agent tool — called only by authenticated CRM routes in server.py. The
+Architect can draft and submit; only a human partner can promote a version to
+canonical. Supports Grant's dual-sign-off + collaborative feedback: both partners
+can leave reviews (approve / request_changes / comment with free-text the
+Architect reads to iterate); a version promotes to canonical once distinct
+'approve' reviewers reach `thesis_required_approvals` (app_settings, default 1 —
+set to 2 for dual sign-off). Promotion atomically supersedes the prior canonical,
+honoring the one-canonical-per-line DB invariant. Everything is logged.
+"""
+import json
+import sqlite3
+import uuid
+from datetime import datetime, timezone
+
+_VALID = ("approve", "request_changes", "comment")
+
+
+def _now():
+    return datetime.now(timezone.utc).replace(tzinfo=None).isoformat() + "Z"
+
+
+def _conn(db):
+    c = sqlite3.connect(db)
+    c.row_factory = sqlite3.Row
+    c.execute("PRAGMA foreign_keys=ON")
+    return c
+
+
+def _log(c, actor_id, action, target_id, payload):
+    c.execute("""INSERT INTO interaction_log
+        (id, ts, actor_type, actor_id, action, target_type, target_id, payload, source, created_at)
+        VALUES (?,?,?,?,?,?,?,?,?,?)""",
+        (str(uuid.uuid4()), _now(), "human", actor_id, action, "thesis", target_id,
+         json.dumps(payload) if payload is not None else None, "crm_ui", _now()))
+
+
+def required_approvals(c):
+    r = c.execute("SELECT value_json FROM app_settings WHERE key='thesis_required_approvals'").fetchone()
+    try:
+        return max(1, int(json.loads(r[0]))) if r else 1
+    except Exception:
+        return 1
+
+
+def _approver_count(c, version_id):
+    return len({r[0] for r in c.execute(
+        "SELECT DISTINCT reviewer_user_id FROM thesis_reviews WHERE version_id=? AND decision='approve'",
+        (version_id,))})
+
+
+def _promote(c, version_row, approver_user_id):
+    """Atomically supersede the prior canonical (if any) for this line, then make
+    this version canonical. Supersede-first keeps the one-canonical unique index satisfied."""
+    line_id = version_row["line_id"]
+    prior = c.execute("SELECT id FROM thesis_versions WHERE line_id=? AND status='canonical'", (line_id,)).fetchone()
+    if prior:
+        c.execute("UPDATE thesis_versions SET status='superseded', superseded_by=? WHERE id=?",
+                  (version_row["id"], prior["id"]))
+    c.execute("UPDATE thesis_versions SET status='canonical', approved_at=? WHERE id=?", (_now(), version_row["id"]))
+    _log(c, approver_user_id, "thesis.approved", version_row["id"],
+         {"line_id": line_id, "superseded": prior["id"] if prior else None})
+
+
+def record_review(db, version_id, reviewer_user_id, decision, feedback=None, target_node_id=None):
+    """Record a partner's review. Promotes to canonical when approve-threshold met.
+    `reviewer_user_id` MUST be a real authenticated human (enforced by the route)."""
+    if decision not in _VALID:
+        return {"error": "bad_decision", "allowed": list(_VALID)}
+    c = _conn(db)
+    v = c.execute("SELECT * FROM thesis_versions WHERE id=?", (version_id,)).fetchone()
+    if not v:
+        c.close()
+        return {"error": "not_found", "version_id": version_id}
+
+    c.execute("""INSERT INTO thesis_reviews (id, version_id, reviewer_user_id, decision, feedback, target_node_id, created_at)
+                 VALUES (?,?,?,?,?,?,?)""",
+              (str(uuid.uuid4()), version_id, reviewer_user_id, decision, feedback, target_node_id, _now()))
+    _log(c, reviewer_user_id, f"thesis.review.{decision}", version_id,
+         {"feedback": feedback, "target_node_id": target_node_id})
+
+    promoted = False
+    approvals = _approver_count(c, version_id)
+    need = required_approvals(c)
+    if decision == "approve" and v["status"] in ("draft", "in_review") and approvals >= need:
+        _promote(c, v, reviewer_user_id)
+        promoted = True
+
+    c.commit()
+    c.close()
+    return {"version_id": version_id, "decision": decision, "approvals": approvals,
+            "required": need, "promoted_to_canonical": promoted}
+
+
+# ── reads for the review UI ───────────────────────────────────────────────────
+
+def list_lines(db):
+    c = _conn(db)
+    rows = [dict(r) for r in c.execute(
+        "SELECT id, line_key, name, segment_key, is_core, status FROM thesis_lines WHERE deleted_at IS NULL ORDER BY is_core DESC, name")]
+    c.close()
+    return {"lines": rows}
+
+
+def list_versions_for_review(db):
+    c = _conn(db)
+    rows = []
+    for v in c.execute("""SELECT v.id, v.line_id, l.line_key, l.name, v.version_no, v.status, v.created_at, v.rationale
+                          FROM thesis_versions v JOIN thesis_lines l ON l.id=v.line_id
+                          WHERE v.status='in_review' ORDER BY v.created_at DESC"""):
+        d = dict(v)
+        d["approvals"] = _approver_count(c, v["id"])
+        d["required"] = required_approvals(c)
+        rows.append(d)
+    c.close()
+    return {"versions": rows}
+
+
+def get_canonical(db, line_key):
+    """The one canonical version's frozen body_json; fails closed if none."""
+    c = _conn(db)
+    line = c.execute("SELECT id FROM thesis_lines WHERE line_key=? AND deleted_at IS NULL", (line_key,)).fetchone()
+    if not line:
+        c.close()
+        return {"status": "no_such_line", "line_key": line_key}
+    r = c.execute("SELECT * FROM thesis_versions WHERE line_id=? AND status='canonical'", (line["id"],)).fetchone()
+    c.close()
+    if not r:
+        return {"status": "no_canonical_thesis", "line_key": line_key}
+    return {"status": "ok", "line_key": line_key, "version_id": r["id"], "version_no": r["version_no"],
+            "approved_at": r["approved_at"], "thesis": json.loads(r["body_json"])}
+
+
+def get_version(db, version_id):
+    c = _conn(db)
+    v = c.execute("SELECT * FROM thesis_versions WHERE id=?", (version_id,)).fetchone()
+    if not v:
+        c.close()
+        return {"error": "not_found", "version_id": version_id}
+    out = dict(v)
+    try:
+        out["body"] = json.loads(v["body_json"])
+    except Exception:
+        out["body"] = None
+    out.pop("body_json", None)
+    out["reviews"] = [dict(r) for r in c.execute(
+        "SELECT reviewer_user_id, decision, feedback, target_node_id, created_at FROM thesis_reviews WHERE version_id=? ORDER BY created_at",
+        (version_id,))]
+    out["approvals"] = _approver_count(c, version_id)
+    out["required"] = required_approvals(c)
+    c.close()
+    return out
@@ -0,0 +1,106 @@
+# Operating the Ten31 CRM — A Partner's Guide
+
+> **Status: DRAFT / living document.** This is the operator-facing guide to our new agent-enhanced CRM. It is written for the firm's non-engineer members — especially the partners who will be thought-partners and dual-approvers of the thesis. It will grow as the system is built; the open questions and "what's coming" notes below are real, not placeholders. Last updated 2026-06-05.
+
+---
+
+## 1. What this is
+
+Our CRM has quietly become two things at once.
+
+**First, it's the canonical "LP graph."** It is the single source of truth for who our LPs and prospects are, what we've committed and discussed, and how everyone connects. Historically we tracked investors in two different places — a classic contacts/opportunities system and the live fundraising grid (the collaborative spreadsheet the partners actually edit). Those two never agreed on a single record per person. The new layer fixes that: it resolves all the variants of one investor — their grid row, their contact card, their org, their closed-LP profile — into **one real canonical record**. That canonical record is what everything else is built on.
+
+**Second, it now has an AI-agent layer on top.** The vision is six specialized agents that widen the fundraising funnel and sharpen how we tell our story, all running on Claude for reasoning and on our own local models for anything sensitive. In one paragraph: **Scout** watches public sources for trigger events; **Analyst** builds LP dossiers and maps warm-intro paths; **Architect** helps us converge on and evolve our investment thesis; **Scribe** turns that thesis into content; **Closer** drafts outreach and meeting prep; and an **Orchestrator** schedules and routes work between them. Every one of them reads from the canonical LP graph, and none of them sends anything to a human without a partner approving it first.
+
+**Where we are today.** We are deliberately phased.
+
+- **Phase 0 (live): the data + retrieval substrate.** The canonical entities, an append-only interaction log, and search over our own corpus. No outward-facing agents exist yet — this is the foundation everything else stands on.
+- **Phase 1 (starting): the Architect.** A collaborative copilot for the thesis. It drafts and pressure-tests; a partner signs off; the approved thesis becomes the source of truth every later agent reads.
+- **Later (Phase 2–3): Scout, Analyst, Closer, Orchestrator** — and only after counsel has defined what we're allowed to do on outbound.
+
+---
+
+## 2. The big ideas, in plain terms
+
+**Canonical entities — one real record per LP.** No more "is this the John Smith from the grid or the J. Smith from contacts?" The system collapses name variants and cross-system duplicates into a single canonical identity. When you look something up, you get the whole person — not a fragment. This is also why our search works: if the same LP is scattered under three spellings, retrieval fragments and the agents get a partial picture.
+
+**The interaction log — everything is recorded.** There is now an append-only log of every meaningful action: every human touch (a logged call, a note, a meeting) and, going forward, every agent action (a draft generated, a record enriched, a thesis version approved). It is never edited or deleted, only appended. This is both our compliance trail and the agents' memory. The richer it is, the smarter every agent gets.
+
+**Retrieval / search over our own corpus.** We can now ask questions across everything we've ever recorded — notes, logged communications, fundraising-grid notes, and (once enabled) Gmail correspondence — and get back the most relevant pieces. It's a hybrid of meaning-based search and exact keyword/name matching, tuned on our own vocabulary so exact fund and LP names rank correctly. This is what lets an agent answer "did we ever discuss X with this LP?" instead of guessing.
+
+**Sovereignty — our sensitive data stays ours.** This is the non-negotiable. All the LP-specific data, the embeddings, the search index, and the duplicate-resolution all run on **our own infrastructure** — the Start9 server and our local Spark machines. Claude (a third party) is only ever sent the *minimum necessary, non-sensitive* context for a given task, and never a bulk export of the LP list. When an agent genuinely needs Claude to reason over real record content (later phases), that content first passes through a redaction step that swaps real names, amounts, and emails for placeholders, then swaps them back locally. The de-anonymization key never leaves our box.
+
+---
+
+## 3. How to operate it day-to-day
+
+You don't need to touch any code to operate this. Three habits and three buttons.
+
+**Keep the CRM clean.** The canonical graph is only as good as what goes in. When you add an investor, use the real legal name where you can, attach them to the right org, and avoid creating a second record for someone who's already there. The duplicate resolver is good, but it works best as a backstop, not a crutch.
+
+**Log interactions, and log them well — this is the highest-leverage habit.** When you have a call, a meeting, or a meaningful email exchange, log it with substance: what was discussed, the LP's reaction, objections raised, next steps. Two reasons this matters more than it looks. (1) It's our compliance record. (2) It is literally the training material the agents reason over. A thin "had a call" note teaches the agents nothing; "pushed back on energy thesis, worried about regulatory risk in Texas, wants to see Fund II returns" becomes evidence the Architect can use to anticipate objections and the Analyst can use to build a real dossier. Good logging compounds.
+
+**The three one-click actions (on the StartOS server page).** These run on our infrastructure and are safe to re-run any time. None of them modifies your CRM source data — they build or refresh the derived search index and the canonical IDs.
+
+- **Build search index** — the one-time (or full-rebuild) setup. It resolves the canonical entity IDs from your live data, then reads every record, and builds the entire search index from scratch. Takes roughly 8–15 minutes. Use this for the initial go-live or if you ever want a clean rebuild.
+- **Refresh search index** — the fast, routine one. It updates the search index with just what's changed since the last run. Seconds to minutes. Use this to keep search current after a batch of edits. (Eventually this will run automatically on a schedule; for now it's a button.)
+- **Resolve duplicate names** — the smart de-duplication. The build step merges the obvious exact matches automatically and *flags* the harder, judgment-call pairs (e.g. "Kate" vs "Katherine"). This action asks our local Qwen model to decide which flagged pairs are truly the same person and merges them. It runs entirely on our infrastructure and is idempotent (safe to re-run). It needs our Spark Control gateway to be reachable, because that's where the local model lives.
+
+A sensible rhythm: **Build** once at go-live, **Resolve duplicate names** after the build flags candidates, and **Refresh** routinely as the grid and correspondence change.
+
+**Where to look when something seems off.** If search results feel stale, run **Refresh search index**. If the same LP shows up as two people, run **Resolve duplicate names** (and check you didn't create a true second record by accident). If an action fails mentioning Spark Control or Qdrant, the local-model gateway or the search database isn't reachable from the box — that's an infrastructure check, not a data problem. The interaction log is the place to see what happened and when.
+
+---
+
+## 4. The agent workflows — what's live, what's coming, and the approval gates
+
+The cardinal rule across all of them: **agents draft, partners approve, and nothing goes outbound without a human.** No agent emails an LP, posts publicly, or contacts a prospect on its own. Ever.
+
+| Agent | What it does | Status |
+|---|---|---|
+| **Architect** | Collaborative copilot for the thesis: generates competing framings of a claim, turns your critique into a clean edit, red-teams LP objections, and grounds every claim in real evidence from our corpus. | **Starting now (Phase 1)** |
+| **Scout** | Monitors public sources (X, filings) for trigger events worth acting on. | Coming (Phase 2) |
+| **Analyst** | Builds LP dossiers, enriches records with public info, maps warm-intro paths. | Coming (Phase 2) |
+| **Scribe** | Distributes the approved thesis as content across channels — read-only consumer of what the Architect produces. | Coming (after Architect) |
+| **Closer** | Drafts outreach, nurture sequences, and meeting prep. | Coming (Phase 3, gated) |
+| **Orchestrator** | Schedules and routes work between the agents. | Coming (Phase 3) |
+
+**The Architect, concretely (because it's the one you'll use first).** It is *not* a one-shot thesis generator. It's a workbench for **exploration → convergence → continual evolution.** You bring the seed of a thesis; it helps you sharpen it claim by claim. Each claim is a small, separately-editable node, so you can rework one argument without re-litigating the whole narrative, and hold competing phrasings side by side. Crucially, the Architect can *draft and stage* a candidate thesis version, but **it cannot make a version canonical.** Promoting a version to "this is our official thesis" is a deliberate human action through a partner-authenticated route — the plan supports single- or dual-partner sign-off (an open decision, see below). Once approved, that version becomes the single source every downstream agent reads, and it's logged in the interaction log as a human decision. Scribe and Closer can never generate against an unapproved draft.
+
+**The approval gates, summarized.** (1) Canonicalizing the thesis is a human-only action. (2) Any outbound message (Closer/Scribe) is drafted by an agent and *sent by a human* after review. (3) When agents reason over sensitive record content, it passes through the redaction boundary first. (4) The entire outbound capability is *blocked* until counsel has defined our solicitation posture — we don't ship cold outreach before that gate clears.
+
+---
+
+## 5. Best practices — getting the most out of it
+
+**Habits that compound:**
+
+- **Log richly and consistently.** This is the single biggest lever. Substance over checkbox. (See §3.)
+- **Tag and segment deliberately.** As segments firm up (e.g. family office, institution, bitcoin-native HNWI, energy player), assigning each LP to the right segment is what lets the Architect tailor "what this audience needs to hear" and lets us say the right thing to the right person.
+- **Use one real record per person.** Resolve duplicates when flagged; don't paper over them.
+- **Keep the index fresh.** Refresh after meaningful batches of edits so search and the agents reflect reality.
+- **Treat the thesis as versioned.** When the message evolves, evolve a claim node and re-approve — don't overwrite history. The whole point is recoverable iteration.
+
+**What NOT to do:**
+
+- **Don't bulk-export the LP list** to any third-party tool. Sovereignty is the line we don't cross.
+- **Don't paste real LP data or query results into a public Claude/ChatGPT session.** The local pipeline exists precisely so we don't have to.
+- **Don't treat the search index as the source of truth.** It's derived from the CRM and rebuildable in minutes; the CRM is canonical. If they ever disagree, the CRM wins and you rebuild the index.
+- **Don't let an agent's draft go out unreviewed.** A draft is a draft until a partner approves and sends it.
+- **Don't route bulk email ingest through Superhuman** (or any external mail tool) — use the built-in sovereign Gmail capture, which keeps mail on our box. Superhuman is great for *your* inbox triage and drafting; it's not our system of record.
+
+---
+
+## 6. This is a living document
+
+**Last updated:** 2026-06-05 · **Maintained by:** the build team, alongside the partners.
+
+This guide will expand as each agent comes online. Things deliberately left open for later phases:
+
+- **Thesis approval policy** — single-partner vs. dual partner sign-off (the dual-approver workflow this guide is partly written for is still being decided).
+- **LP segments** — the firm-defined audience set and the per-segment "what to say / what to avoid" is content the partners supply, not something the system invents.
+- **The agents themselves** — Scout, Analyst, Scribe, Closer, Orchestrator are described here as intent; their operating instructions get written when they're built.
+- **The compliance gate** — outbound capability stays off until counsel defines solicitation posture, accreditation/QP verification, and recordkeeping rules.
+- **Automatic index refresh** — today's manual "Refresh" button becomes a scheduled background sync.
+
+When in doubt about an operating question this guide doesn't answer, ask — and we'll fold the answer back in here.
@@ -0,0 +1,83 @@
+# Phase 1 — The Architect: Kickoff Brief
+
+**Goal:** stand up the **Architect** — a collaborative copilot that helps the partners *converge on* and then *continually evolve* a **versioned, evidence-grounded, partner-approved canonical thesis** (the "messaging source of truth"). The Architect drafts and pressure-tests; a partner signs off; the approved thesis becomes the single source every later agent reads. **Internal-only, collaborative, no outbound** (that's Scribe/Closer, later).
+
+See `CLAUDE.md` for settled architecture + guardrails; `docs/Ten31_Agentic_Build_Plan.md` §3–5. This brief assumes Phase 0 is built and deployed (canonical entities, the CRM MCP server with retrieval modes + `interaction_log`, the ingest pipeline).
+
+## Design stance (the load-bearing constraint)
+
+Grant: *"we are gravitating towards what we think the key message is, but we have NOT landed on it, and we may iterate over time."* So the Architect is **not** a one-shot generator of a finished thesis — it is a substrate for **exploration → convergence → continual evolution.** The unit of iteration is a small typed node (one claim, one proof-point, one throughline), not a monolithic doc, so partners can rework one claim without re-litigating the whole narrative, hold competing phrasings side by side, and promote a winner while keeping the rest recoverable.
+
+## What the partners must supply (the content the substrate can't invent)
+
+The Architect *sharpens an existing thesis; it does not author one from nothing.* These are inputs, co-authored in the first Architect sessions:
+- [ ] **Thesis seed (v1):** the current-best throughline (scarcity / critical-infrastructure tying bitcoin ↔ AI infrastructure / energy / freedom tech) broken into **3–5 pillars** and a first set of testable **claims**.
+- [ ] **LP segments** (build-plan open decision #4): confirm/define the distinct audiences (proposed starter set: family office, institution, bitcoin-native HNWI, energy player) and, per segment, *what they need to hear* and *what to avoid saying*.
+- [ ] **Voice:** tone/diction, "this is us / this is not us" before-after examples, sacred phrases, words we never use.
+- [ ] **Approval policy:** who may promote a thesis version to canonical (any admin? Grant specifically? dual partner sign-off?).
+
+## Workstream A — Thesis artifact + versioning *(substrate; buildable now, no content needed)*
+
+Additive, reversible migration `0002_phase1_architect.sql` (+ `.down.sql`) via the existing `core_migrations.py` runner, reusing every Phase-0 convention (full-length ids, soft-delete only, `interaction_log` on every write).
+- **`thesis_nodes`** — typed node tree (`thesis_root → throughline/section → claim → proof_point → objection/rebuttal → segment_cut`), `ord` as REAL (stable insert-between), `variant_group` for competing A/B phrasings, `status` (draft|candidate|approved|retired).
+- **`thesis_node_revisions`** — append-only per-node history (prior content + `change_summary`/`change_reason`/actor/`claude_session_id`): fine-grained undo + provenance.
+- **`thesis_versions`** — immutable named snapshots; **a DB-level partial-unique index guarantees at most one `canonical` version per thesis.** Each approved version also freezes a `body_json` (throughline, pillars, claims, proof-points, segment angles, voice, guardrails) — the stable, machine-readable **Architect→Scribe contract**.
+- **Publish-on-approval:** approving a version publishes *only its* nodes into the existing Qdrant `crm_chunks` collection under new `thesis_*` `doc_type`s (idempotent), and prunes the prior version's thesis chunks — so a downstream search for "the message" returns the approved version, never a draft.
+
+*(Design decision to confirm: the fine-grained node tree (powerful for iteration) plus a frozen `body_json` snapshot per approved version (simple, stable contract for Scribe) — keep both; the tree is the editing surface, the snapshot is the published artifact.)*
+
+## Workstream B — The collaborative loop (Architect skills)
+
+The copilot session is turn-based propose → react → revise, delivered as Agent SDK **skills** (one per move, independently testable) over a new `backend/mcp/architect_tools.py` surface (drafts only, every move logged; the agent can stage candidates but **cannot cross the canonical gate**):
+1. **Vary** — generate ≥3 genuinely distinct framings of a target node, scored (sharpness, differentiation, evidence-backing, segment-portability, credibility).
+2. **Revise** — turn a free-text partner critique into a faithful before/after edit (never silently drop a framing the partner liked).
+3. **Red-team** — anticipate LP objections per segment, each with our drafted answer + an honest *substantiated / hand-wavy* flag.
+4. **Consistency-check** — when a throughline/pillar changes, surface every downstream node that now conflicts + a proposed reconciliation (apply none without partner acceptance).
+5. **Substantiate (ground)** — see Workstream D.
+Plus a session-orchestration skill that loads state, sequences moves, and resumes across sessions (replays deferred proposals, the open-objection ledger, still-weak claims) — proving iteration spans sessions, not just turns.
+
+## Workstream C — Segments & voice
+
+`segments` table (versioned; one `active` row per `segment_key`); reuse the Phase-0 `canonical_entities.segment` field as the pointer tagging each LP to a segment (closes the loop between *who an LP is* and *what we say to them*). Voice + each segment cut become skills (`ten31-voice`, `ten31-thesis-spine`, `ten31-segment-cut`); a segment cut must trace every claim to a spine pillar (orphans/contradictions surfaced), and a **drift flag** fires when a cut's spine version falls behind the active spine.
+
+## Workstream D — Grounding & defensibility
+
+The corpus is a **defensibility oracle, not a generator.** Each claim is a structured object (`draft|grounded|contested|retired`) that cannot leave `draft` without ≥1 **citation bundle pinned to a stable `source_model:source_id`** *and* a completed **counter-evidence sweep** (the negation framing, not just the claim). An **objection register** per claim is assembled from `get_interaction_history` + `keyword_search` (recurring LP pushback). Stale-evidence (>~12mo via `date_ts`) is flagged for revalidation. Uses the Phase-0 retrieval modes unchanged.
+- **Sovereignty:** retrieval + embeddings stay local. The thesis *content* is non-LP-specific messaging substance → generally fine to send to Claude as-is. But the *evidence* (real LP conversations) used to ground it is sensitive → the Claude-facing synthesis step routes through the **redaction/re-hydration boundary** (`docs/redaction-rehydration.md`). **The Architect is the first agent to send retrieved record substance to Claude, so this boundary must be built here** (scrub/rehydrate at Spark Control).
+
+## Workstream E — Approval gate & Scribe contract
+
+Canonicalization is a **logged human action**, enforced by capability not convention:
+- The promote-to-canonical edge is a **human-authenticated CRM route** (`POST /api/thesis/{id}/approve`, Bearer + admin) on `server.py` — **not exposed as an agent tool.** It atomically supersedes the prior canonical and writes a `thesis.approved` `interaction_log` row (`actor_type='human'`, real `users.id`).
+- A thin **"Thesis review" view** in the existing SPA (`frontend/index.html`): diff candidate vs canonical, Approve / Request-changes.
+- `get_canonical_thesis()` returns the one canonical version's `body_json`, **fails closed** if none — so Scribe can never generate against an unapproved thesis.
+- **Architect↔Scribe boundary:** Architect owns/articulates and writes only to `thesis_versions` (never outbound); **Scribe is a read-only consumer** of the canonical version, stamps each draft with the source `thesis_version_id`, and routes through its *own* separate review-before-publish gate.
+
+## Acceptance criteria
+
+- [ ] Migration 0002 (additive, reversible) creates the thesis tables with the one-canonical invariant; applies + reverses cleanly via the existing runner.
+- [ ] A thesis exists as a typed node tree with a seeded, partner-signed **canonical v1**; renders back to a coherent document; supports competing variants with logged, reversible promotion.
+- [ ] A partner can run a full session (load → intent → any of the 5 moves in any order → accept/reject/defer → converge) in one Agent SDK conversation; a later session resumes from prior state.
+- [ ] No claim promotes past `draft` without a pinned citation + counter-sweep; every citation is auditable back to its memo/call/email/note.
+- [ ] **No version becomes canonical except through the human route; the agent has no self-promotion path** (tested). Every transition is in `interaction_log`.
+- [ ] The redaction boundary is built and asserts no Tier-1 content / no real Tier-2 identifier reaches Claude in the grounding step (golden-file test).
+- [ ] The Architect→Scribe `body_json` contract is documented; a (future) Scribe draft is traceable to the exact approved `thesis_version_id`.
+- [ ] No outbound/publish/contact capability anywhere in the Architect surface (guardrails #4, #6).
+
+## Out of scope for Phase 1 (Architect sub-phase)
+
+- The Scribe *build* (distribution/publishing) — defined here only as the downstream contract; built as the next sub-phase with review-before-publish.
+- Any outbound send, public post, or LP contact. Scout/Analyst (Phase 2), Closer/Orchestrator (Phase 3).
+
+## Suggested order
+
+A (substrate) → E (gate + contract) → B (loop skills) → C (segments/voice) → D (grounding + redaction boundary). **A and E are buildable now without the thesis content;** B–D become useful once the partners seed v1. Start the content prep (the four inputs above) in parallel.
+
+## Open decisions for the owner
+
+1. **One thesis line, or several** (one throughline vs. per-vertical theses)?
+2. **The four content inputs** above (seed, segments, voice, approval policy) — the critical path.
+3. **Approval:** single-partner vs. dual sign-off; a dedicated `thesis_approver` capability vs. reuse `admin`.
+4. **Grounding dials** (partner-set, not Claude): rerank-score floor for "real" support; doc_type source-weighting (memo/transcript > one-line note?); counter-evidence threshold to mark a claim `contested`.
+5. **Phase-1 evidence scope:** internal corpus only, or admit external sources (web/filings) before Scout/Analyst? And is the Gmail corpus live/backfilled (thin email evidence if not)?
+6. **May a claim be promoted while still `contested`/unsubstantiated** (a deliberate bet), or must the gate block on unresolved weaknesses?
@@ -1,7 +1,9 @@
 import { sdk } from '../sdk'
 import { buildSearchIndex } from './buildSearchIndex'
 import { refreshSearchIndex } from './refreshSearchIndex'
+import { resolveDuplicates } from './resolveDuplicates'

 export const actions = sdk.Actions.of()
  .addAction(buildSearchIndex)
  .addAction(refreshSearchIndex)
+  .addAction(resolveDuplicates)
@@ -0,0 +1,112 @@
+import { i18n } from '../i18n'
+import { sdk } from '../sdk'
+import { DATA_MOUNT_PATH, IMAGE_ID } from '../utils'
+
+/**
+ * Manual "Resolve duplicate names" action (Phase-0 entity resolution, fuzzy tier).
+ *
+ * Runs the fuzzy entity-resolution tier on the box where /data/crm.db lives:
+ *
+ *   fuzzy_resolve.py --db /data/crm.db   (local-Qwen merge of name-variant dupes)
+ *
+ * The deterministic tier (entity_resolution.py, run by "Build search index")
+ * merges exact/normalized matches and FLAGS the harder name-variant pairs it
+ * won't merge on a guess (e.g. "Kate" / "Katherine"). This action runs the
+ * second tier: it asks the local Qwen model (via Spark Control) to decide which
+ * flagged pairs are truly the same LP and merges them into one canonical id.
+ * It is idempotent, read-only on the CRM source tables, and logs a row to
+ * interaction_log — so re-running is always safe.
+ *
+ * Implementation notes:
+ *   - The scripts import their siblings by bare name (`import config`, etc.),
+ *     so they must run with cwd = /app/backend/ingest.
+ *   - fuzzy_resolve.py talks to Spark Control (the Qwen chat completion that
+ *     adjudicates each candidate pair), so the Spark/Qdrant env must be present.
+ *     This action runs in its OWN subcontainer and does NOT go through
+ *     docker_entrypoint.sh, so it cannot inherit the entrypoint's exports — the
+ *     env is passed explicitly below. Spark Control must be reachable for the
+ *     Qwen call, or the run will fail.
+ *   - allowedStatuses: 'any' — the action runs in its own subcontainer with the
+ *     same /data volume mounted, so it works whether or not the CRM is running.
+ *     SQLite WAL mode means a concurrently-running CRM is fine for these
+ *     reads/derived writes.
+ */
+
+const DB_PATH = `${DATA_MOUNT_PATH}/crm.db`
+const INGEST_DIR = '/app/backend/ingest'
+
+// OPERATOR: Spark Control + Qdrant endpoints for the ingest run. These are the
+// LAN defaults for the Ten31 deployment — edit them for your network. Keep them
+// in sync with the export block in docker_entrypoint.sh (single source of truth
+// for the values; this action needs its own copy because it does not run the
+// entrypoint). Spark Control is TLS with a self-signed cert by default, hence
+// SPARK_CONTROL_VERIFY_TLS = 'false'.
+const ingestEnv: { [k: string]: string } = {
+  CRM_DB_PATH: DB_PATH,
+  SPARK_CONTROL_URL: 'https://192.168.1.72:62419',
+  SPARK_CONTROL_VERIFY_TLS: 'false',
+  QDRANT_URL: 'http://192.168.1.87:6333',
+}
+
+export const resolveDuplicates = sdk.Action.withoutInput(
+  // id
+  'resolve-duplicate-names',
+
+  // metadata
+  async ({ effects }) => ({
+    name: i18n('Resolve duplicate names'),
+    description: i18n(
+      'Use the local Qwen model to merge name-variant duplicates the ' +
+        'deterministic resolver flagged (e.g. Kate/Katherine). Idempotent; runs ' +
+        'entirely on your infrastructure. Requires Spark Control to be reachable ' +
+        '(for the Qwen call; set SPARK_CONTROL_URL).',
+    ),
+    warning: null,
+    allowedStatuses: 'any',
+    group: null,
+    visibility: 'enabled',
+  }),
+
+  // execution
+  async ({ effects }) => {
+    const env = ingestEnv
+
+    const subcontainer = await sdk.SubContainer.of(
+      effects,
+      { imageId: IMAGE_ID },
+      sdk.Mounts.of().mountVolume({
+        volumeId: 'main',
+        subpath: null,
+        mountpoint: DATA_MOUNT_PATH,
+        readonly: false,
+      }),
+      'ten31-database-resolve-duplicate-names',
+    )
+
+    try {
+      // Fuzzy entity-resolution tier — local Qwen (via Spark Control) adjudicates
+      // the flagged name-variant pairs and merges true duplicates into one
+      // canonical id (idempotent; no CRM source data is modified).
+      await subcontainer.execFail(
+        ['python3', 'fuzzy_resolve.py', '--db', DB_PATH],
+        { cwd: INGEST_DIR, env },
+        // 30 minutes — each flagged pair is one Qwen call; leave generous
+        // headroom for a large backlog of candidates.
+        30 * 60 * 1000,
+      )
+    } finally {
+      await subcontainer.destroy()
+    }
+
+    return {
+      version: '1',
+      title: i18n('Duplicate names resolved'),
+      message: i18n(
+        'The local Qwen model reviewed the flagged name-variant pairs and ' +
+          'merged true duplicates into one canonical id. You can re-run this ' +
+          'action any time.',
+      ),
+      result: null,
+    }
+  },
+)