Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,192 @@
|
||||
-- Gmail Integration — Phase 1 migration
|
||||
-- Creates all tables for email capture, matching, threading, attachments.
|
||||
-- This migration is IDEMPOTENT: safe to re-run.
|
||||
-- Applied by email_integration.db.apply_migrations() on server startup when
|
||||
-- CRM_GMAIL_INTEGRATION_ENABLED is truthy.
|
||||
--
|
||||
-- DO NOT modify this file in place after it ships. Create 0002_*.sql, etc.
|
||||
|
||||
-- ============================================================================
|
||||
-- email_accounts — one row per enrolled team-member mailbox
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS email_accounts (
|
||||
id TEXT PRIMARY KEY,
|
||||
user_id TEXT NOT NULL,
|
||||
email_address TEXT NOT NULL UNIQUE,
|
||||
auth_method TEXT NOT NULL, -- 'dwd' | 'oauth'
|
||||
oauth_refresh_enc BLOB,
|
||||
oauth_token_enc BLOB,
|
||||
oauth_token_exp TEXT,
|
||||
sync_enabled INTEGER NOT NULL DEFAULT 1,
|
||||
sync_status TEXT NOT NULL DEFAULT 'pending',
|
||||
sync_error TEXT,
|
||||
last_history_id TEXT,
|
||||
last_synced_at TEXT,
|
||||
backfill_complete INTEGER NOT NULL DEFAULT 0,
|
||||
backfill_cursor TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
updated_at TEXT DEFAULT (datetime('now')),
|
||||
FOREIGN KEY(user_id) REFERENCES users(id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_email_accounts_user ON email_accounts(user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_email_accounts_sync ON email_accounts(sync_enabled, sync_status);
|
||||
|
||||
-- ============================================================================
|
||||
-- emails — canonical email record, dedup'd across accounts by RFC Message-ID
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS emails (
|
||||
id TEXT PRIMARY KEY,
|
||||
rfc_message_id TEXT NOT NULL UNIQUE,
|
||||
gmail_thread_id TEXT,
|
||||
rfc_thread_root_id TEXT,
|
||||
thread_id TEXT, -- FK email_threads.id (populated by threads.py)
|
||||
subject TEXT,
|
||||
from_email TEXT NOT NULL,
|
||||
from_name TEXT,
|
||||
to_emails_json TEXT NOT NULL DEFAULT '[]',
|
||||
cc_emails_json TEXT NOT NULL DEFAULT '[]',
|
||||
bcc_emails_json TEXT NOT NULL DEFAULT '[]',
|
||||
reply_to TEXT,
|
||||
sent_at TEXT NOT NULL,
|
||||
body_text TEXT,
|
||||
body_html TEXT,
|
||||
snippet TEXT,
|
||||
in_reply_to TEXT,
|
||||
references_json TEXT DEFAULT '[]',
|
||||
has_attachments INTEGER NOT NULL DEFAULT 0,
|
||||
size_estimate INTEGER,
|
||||
is_matched INTEGER NOT NULL DEFAULT 0,
|
||||
match_status TEXT NOT NULL DEFAULT 'unmatched', -- unmatched|matched|skipped
|
||||
raw_headers_json TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
updated_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_emails_thread ON emails(gmail_thread_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_emails_rfc_thread ON emails(rfc_thread_root_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_emails_thread_fk ON emails(thread_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_emails_from ON emails(from_email);
|
||||
CREATE INDEX IF NOT EXISTS idx_emails_sent_at ON emails(sent_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_emails_matched ON emails(is_matched, sent_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_emails_in_reply_to ON emails(in_reply_to);
|
||||
|
||||
-- ============================================================================
|
||||
-- email_recipients — denormalized for fast address lookups
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS email_recipients (
|
||||
id TEXT PRIMARY KEY,
|
||||
email_id TEXT NOT NULL,
|
||||
address TEXT NOT NULL,
|
||||
display_name TEXT,
|
||||
kind TEXT NOT NULL, -- from|to|cc|bcc|reply_to
|
||||
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_email_recipients_addr ON email_recipients(address);
|
||||
CREATE INDEX IF NOT EXISTS idx_email_recipients_email ON email_recipients(email_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- email_account_messages — per-mailbox sighting of an email
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS email_account_messages (
|
||||
id TEXT PRIMARY KEY,
|
||||
email_id TEXT NOT NULL,
|
||||
account_id TEXT NOT NULL,
|
||||
gmail_message_id TEXT NOT NULL,
|
||||
gmail_thread_id TEXT NOT NULL,
|
||||
labels_json TEXT DEFAULT '[]',
|
||||
is_sent INTEGER NOT NULL DEFAULT 0,
|
||||
first_seen_at TEXT DEFAULT (datetime('now')),
|
||||
deleted_at TEXT,
|
||||
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(account_id) REFERENCES email_accounts(id) ON DELETE CASCADE,
|
||||
UNIQUE(account_id, gmail_message_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_eam_email ON email_account_messages(email_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_eam_account ON email_account_messages(account_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_eam_gmail_msg ON email_account_messages(gmail_message_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- email_attachments — metadata; bytes on disk under data/email_attachments/
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS email_attachments (
|
||||
id TEXT PRIMARY KEY,
|
||||
email_id TEXT NOT NULL,
|
||||
gmail_attachment_id TEXT NOT NULL,
|
||||
filename TEXT NOT NULL,
|
||||
sanitized_filename TEXT NOT NULL,
|
||||
mime_type TEXT,
|
||||
size_bytes INTEGER,
|
||||
sha256_hex TEXT,
|
||||
storage_path TEXT NOT NULL,
|
||||
download_status TEXT NOT NULL DEFAULT 'pending', -- pending|downloaded|failed|skipped
|
||||
download_attempts INTEGER NOT NULL DEFAULT 0,
|
||||
download_error TEXT,
|
||||
downloaded_at TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_attach_email ON email_attachments(email_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_attach_sha ON email_attachments(sha256_hex);
|
||||
CREATE INDEX IF NOT EXISTS idx_attach_status ON email_attachments(download_status);
|
||||
|
||||
-- ============================================================================
|
||||
-- email_threads — thread roll-up for UI
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS email_threads (
|
||||
id TEXT PRIMARY KEY,
|
||||
gmail_thread_id TEXT,
|
||||
rfc_thread_root_id TEXT,
|
||||
subject_normalized TEXT,
|
||||
first_message_at TEXT,
|
||||
last_message_at TEXT,
|
||||
message_count INTEGER NOT NULL DEFAULT 0,
|
||||
participant_count INTEGER NOT NULL DEFAULT 0,
|
||||
participants_json TEXT DEFAULT '[]',
|
||||
is_matched INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
updated_at TEXT DEFAULT (datetime('now'))
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_threads_gmail_uniq ON email_threads(gmail_thread_id)
|
||||
WHERE gmail_thread_id IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_threads_rfc_root ON email_threads(rfc_thread_root_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_threads_last_msg ON email_threads(last_message_at);
|
||||
|
||||
-- ============================================================================
|
||||
-- email_investor_links — matched investors
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS email_investor_links (
|
||||
id TEXT PRIMARY KEY,
|
||||
email_id TEXT NOT NULL,
|
||||
fundraising_investor_id TEXT,
|
||||
fundraising_contact_id TEXT,
|
||||
contact_id TEXT,
|
||||
organization_id TEXT,
|
||||
matched_address TEXT NOT NULL,
|
||||
match_kind TEXT NOT NULL, -- exact_email|domain_match|manual
|
||||
match_confidence REAL NOT NULL DEFAULT 1.0,
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_eil_email ON email_investor_links(email_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_eil_investor ON email_investor_links(fundraising_investor_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_eil_fr_contact ON email_investor_links(fundraising_contact_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_eil_contact ON email_investor_links(contact_id);
|
||||
|
||||
-- ============================================================================
|
||||
-- email_sync_runs — per-run observability
|
||||
-- ============================================================================
|
||||
CREATE TABLE IF NOT EXISTS email_sync_runs (
|
||||
id TEXT PRIMARY KEY,
|
||||
account_id TEXT NOT NULL,
|
||||
kind TEXT NOT NULL, -- backfill|incremental|manual
|
||||
started_at TEXT NOT NULL,
|
||||
finished_at TEXT,
|
||||
status TEXT NOT NULL, -- running|ok|error|partial
|
||||
messages_seen INTEGER NOT NULL DEFAULT 0,
|
||||
messages_stored INTEGER NOT NULL DEFAULT 0,
|
||||
attachments_saved INTEGER NOT NULL DEFAULT 0,
|
||||
api_calls INTEGER NOT NULL DEFAULT 0,
|
||||
retries INTEGER NOT NULL DEFAULT 0,
|
||||
error TEXT,
|
||||
FOREIGN KEY(account_id) REFERENCES email_accounts(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_sync_runs_account ON email_sync_runs(account_id, started_at);
|
||||
Reference in New Issue
Block a user