Files
Keysat c7ce44d963 Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system:
- A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9
- A2: additive/reversible core migration (canonical_entities, entity_links,
  interaction_log, relationship_edges, soft-delete) + ledgered runner
- B1/B3: chunking + deterministic entity resolution (backend/ingest)
- B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks
- C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools
- docs: redaction/re-hydration, Gmail enablement runbook
- synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db,
  drop legacy files + start9/0.3.5)

Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity
queries). Real backfill runs on Ten31 infra; index holds synthetic data only.
Branch snapshot also captures pre-existing working-tree changes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 08:13:35 -05:00

193 lines
9.4 KiB
SQL

-- Gmail Integration — Phase 1 migration
-- Creates all tables for email capture, matching, threading, attachments.
-- This migration is IDEMPOTENT: safe to re-run.
-- Applied by email_integration.db.apply_migrations() on server startup when
-- CRM_GMAIL_INTEGRATION_ENABLED is truthy.
--
-- DO NOT modify this file in place after it ships. Create 0002_*.sql, etc.
-- ============================================================================
-- email_accounts — one row per enrolled team-member mailbox
-- ============================================================================
CREATE TABLE IF NOT EXISTS email_accounts (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
email_address TEXT NOT NULL UNIQUE,
auth_method TEXT NOT NULL, -- 'dwd' | 'oauth'
oauth_refresh_enc BLOB,
oauth_token_enc BLOB,
oauth_token_exp TEXT,
sync_enabled INTEGER NOT NULL DEFAULT 1,
sync_status TEXT NOT NULL DEFAULT 'pending',
sync_error TEXT,
last_history_id TEXT,
last_synced_at TEXT,
backfill_complete INTEGER NOT NULL DEFAULT 0,
backfill_cursor TEXT,
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now')),
FOREIGN KEY(user_id) REFERENCES users(id)
);
CREATE INDEX IF NOT EXISTS idx_email_accounts_user ON email_accounts(user_id);
CREATE INDEX IF NOT EXISTS idx_email_accounts_sync ON email_accounts(sync_enabled, sync_status);
-- ============================================================================
-- emails — canonical email record, dedup'd across accounts by RFC Message-ID
-- ============================================================================
CREATE TABLE IF NOT EXISTS emails (
id TEXT PRIMARY KEY,
rfc_message_id TEXT NOT NULL UNIQUE,
gmail_thread_id TEXT,
rfc_thread_root_id TEXT,
thread_id TEXT, -- FK email_threads.id (populated by threads.py)
subject TEXT,
from_email TEXT NOT NULL,
from_name TEXT,
to_emails_json TEXT NOT NULL DEFAULT '[]',
cc_emails_json TEXT NOT NULL DEFAULT '[]',
bcc_emails_json TEXT NOT NULL DEFAULT '[]',
reply_to TEXT,
sent_at TEXT NOT NULL,
body_text TEXT,
body_html TEXT,
snippet TEXT,
in_reply_to TEXT,
references_json TEXT DEFAULT '[]',
has_attachments INTEGER NOT NULL DEFAULT 0,
size_estimate INTEGER,
is_matched INTEGER NOT NULL DEFAULT 0,
match_status TEXT NOT NULL DEFAULT 'unmatched', -- unmatched|matched|skipped
raw_headers_json TEXT,
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_emails_thread ON emails(gmail_thread_id);
CREATE INDEX IF NOT EXISTS idx_emails_rfc_thread ON emails(rfc_thread_root_id);
CREATE INDEX IF NOT EXISTS idx_emails_thread_fk ON emails(thread_id);
CREATE INDEX IF NOT EXISTS idx_emails_from ON emails(from_email);
CREATE INDEX IF NOT EXISTS idx_emails_sent_at ON emails(sent_at);
CREATE INDEX IF NOT EXISTS idx_emails_matched ON emails(is_matched, sent_at);
CREATE INDEX IF NOT EXISTS idx_emails_in_reply_to ON emails(in_reply_to);
-- ============================================================================
-- email_recipients — denormalized for fast address lookups
-- ============================================================================
CREATE TABLE IF NOT EXISTS email_recipients (
id TEXT PRIMARY KEY,
email_id TEXT NOT NULL,
address TEXT NOT NULL,
display_name TEXT,
kind TEXT NOT NULL, -- from|to|cc|bcc|reply_to
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_email_recipients_addr ON email_recipients(address);
CREATE INDEX IF NOT EXISTS idx_email_recipients_email ON email_recipients(email_id);
-- ============================================================================
-- email_account_messages — per-mailbox sighting of an email
-- ============================================================================
CREATE TABLE IF NOT EXISTS email_account_messages (
id TEXT PRIMARY KEY,
email_id TEXT NOT NULL,
account_id TEXT NOT NULL,
gmail_message_id TEXT NOT NULL,
gmail_thread_id TEXT NOT NULL,
labels_json TEXT DEFAULT '[]',
is_sent INTEGER NOT NULL DEFAULT 0,
first_seen_at TEXT DEFAULT (datetime('now')),
deleted_at TEXT,
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE,
FOREIGN KEY(account_id) REFERENCES email_accounts(id) ON DELETE CASCADE,
UNIQUE(account_id, gmail_message_id)
);
CREATE INDEX IF NOT EXISTS idx_eam_email ON email_account_messages(email_id);
CREATE INDEX IF NOT EXISTS idx_eam_account ON email_account_messages(account_id);
CREATE INDEX IF NOT EXISTS idx_eam_gmail_msg ON email_account_messages(gmail_message_id);
-- ============================================================================
-- email_attachments — metadata; bytes on disk under data/email_attachments/
-- ============================================================================
CREATE TABLE IF NOT EXISTS email_attachments (
id TEXT PRIMARY KEY,
email_id TEXT NOT NULL,
gmail_attachment_id TEXT NOT NULL,
filename TEXT NOT NULL,
sanitized_filename TEXT NOT NULL,
mime_type TEXT,
size_bytes INTEGER,
sha256_hex TEXT,
storage_path TEXT NOT NULL,
download_status TEXT NOT NULL DEFAULT 'pending', -- pending|downloaded|failed|skipped
download_attempts INTEGER NOT NULL DEFAULT 0,
download_error TEXT,
downloaded_at TEXT,
created_at TEXT DEFAULT (datetime('now')),
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_attach_email ON email_attachments(email_id);
CREATE INDEX IF NOT EXISTS idx_attach_sha ON email_attachments(sha256_hex);
CREATE INDEX IF NOT EXISTS idx_attach_status ON email_attachments(download_status);
-- ============================================================================
-- email_threads — thread roll-up for UI
-- ============================================================================
CREATE TABLE IF NOT EXISTS email_threads (
id TEXT PRIMARY KEY,
gmail_thread_id TEXT,
rfc_thread_root_id TEXT,
subject_normalized TEXT,
first_message_at TEXT,
last_message_at TEXT,
message_count INTEGER NOT NULL DEFAULT 0,
participant_count INTEGER NOT NULL DEFAULT 0,
participants_json TEXT DEFAULT '[]',
is_matched INTEGER NOT NULL DEFAULT 0,
created_at TEXT DEFAULT (datetime('now')),
updated_at TEXT DEFAULT (datetime('now'))
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_threads_gmail_uniq ON email_threads(gmail_thread_id)
WHERE gmail_thread_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_threads_rfc_root ON email_threads(rfc_thread_root_id);
CREATE INDEX IF NOT EXISTS idx_threads_last_msg ON email_threads(last_message_at);
-- ============================================================================
-- email_investor_links — matched investors
-- ============================================================================
CREATE TABLE IF NOT EXISTS email_investor_links (
id TEXT PRIMARY KEY,
email_id TEXT NOT NULL,
fundraising_investor_id TEXT,
fundraising_contact_id TEXT,
contact_id TEXT,
organization_id TEXT,
matched_address TEXT NOT NULL,
match_kind TEXT NOT NULL, -- exact_email|domain_match|manual
match_confidence REAL NOT NULL DEFAULT 1.0,
created_at TEXT DEFAULT (datetime('now')),
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_eil_email ON email_investor_links(email_id);
CREATE INDEX IF NOT EXISTS idx_eil_investor ON email_investor_links(fundraising_investor_id);
CREATE INDEX IF NOT EXISTS idx_eil_fr_contact ON email_investor_links(fundraising_contact_id);
CREATE INDEX IF NOT EXISTS idx_eil_contact ON email_investor_links(contact_id);
-- ============================================================================
-- email_sync_runs — per-run observability
-- ============================================================================
CREATE TABLE IF NOT EXISTS email_sync_runs (
id TEXT PRIMARY KEY,
account_id TEXT NOT NULL,
kind TEXT NOT NULL, -- backfill|incremental|manual
started_at TEXT NOT NULL,
finished_at TEXT,
status TEXT NOT NULL, -- running|ok|error|partial
messages_seen INTEGER NOT NULL DEFAULT 0,
messages_stored INTEGER NOT NULL DEFAULT 0,
attachments_saved INTEGER NOT NULL DEFAULT 0,
api_calls INTEGER NOT NULL DEFAULT 0,
retries INTEGER NOT NULL DEFAULT 0,
error TEXT,
FOREIGN KEY(account_id) REFERENCES email_accounts(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_sync_runs_account ON email_sync_runs(account_id, started_at);