c7ce44d963
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
193 lines
9.4 KiB
SQL
193 lines
9.4 KiB
SQL
-- Gmail Integration — Phase 1 migration
|
|
-- Creates all tables for email capture, matching, threading, attachments.
|
|
-- This migration is IDEMPOTENT: safe to re-run.
|
|
-- Applied by email_integration.db.apply_migrations() on server startup when
|
|
-- CRM_GMAIL_INTEGRATION_ENABLED is truthy.
|
|
--
|
|
-- DO NOT modify this file in place after it ships. Create 0002_*.sql, etc.
|
|
|
|
-- ============================================================================
|
|
-- email_accounts — one row per enrolled team-member mailbox
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS email_accounts (
|
|
id TEXT PRIMARY KEY,
|
|
user_id TEXT NOT NULL,
|
|
email_address TEXT NOT NULL UNIQUE,
|
|
auth_method TEXT NOT NULL, -- 'dwd' | 'oauth'
|
|
oauth_refresh_enc BLOB,
|
|
oauth_token_enc BLOB,
|
|
oauth_token_exp TEXT,
|
|
sync_enabled INTEGER NOT NULL DEFAULT 1,
|
|
sync_status TEXT NOT NULL DEFAULT 'pending',
|
|
sync_error TEXT,
|
|
last_history_id TEXT,
|
|
last_synced_at TEXT,
|
|
backfill_complete INTEGER NOT NULL DEFAULT 0,
|
|
backfill_cursor TEXT,
|
|
created_at TEXT DEFAULT (datetime('now')),
|
|
updated_at TEXT DEFAULT (datetime('now')),
|
|
FOREIGN KEY(user_id) REFERENCES users(id)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_email_accounts_user ON email_accounts(user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_email_accounts_sync ON email_accounts(sync_enabled, sync_status);
|
|
|
|
-- ============================================================================
|
|
-- emails — canonical email record, dedup'd across accounts by RFC Message-ID
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS emails (
|
|
id TEXT PRIMARY KEY,
|
|
rfc_message_id TEXT NOT NULL UNIQUE,
|
|
gmail_thread_id TEXT,
|
|
rfc_thread_root_id TEXT,
|
|
thread_id TEXT, -- FK email_threads.id (populated by threads.py)
|
|
subject TEXT,
|
|
from_email TEXT NOT NULL,
|
|
from_name TEXT,
|
|
to_emails_json TEXT NOT NULL DEFAULT '[]',
|
|
cc_emails_json TEXT NOT NULL DEFAULT '[]',
|
|
bcc_emails_json TEXT NOT NULL DEFAULT '[]',
|
|
reply_to TEXT,
|
|
sent_at TEXT NOT NULL,
|
|
body_text TEXT,
|
|
body_html TEXT,
|
|
snippet TEXT,
|
|
in_reply_to TEXT,
|
|
references_json TEXT DEFAULT '[]',
|
|
has_attachments INTEGER NOT NULL DEFAULT 0,
|
|
size_estimate INTEGER,
|
|
is_matched INTEGER NOT NULL DEFAULT 0,
|
|
match_status TEXT NOT NULL DEFAULT 'unmatched', -- unmatched|matched|skipped
|
|
raw_headers_json TEXT,
|
|
created_at TEXT DEFAULT (datetime('now')),
|
|
updated_at TEXT DEFAULT (datetime('now'))
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_emails_thread ON emails(gmail_thread_id);
|
|
CREATE INDEX IF NOT EXISTS idx_emails_rfc_thread ON emails(rfc_thread_root_id);
|
|
CREATE INDEX IF NOT EXISTS idx_emails_thread_fk ON emails(thread_id);
|
|
CREATE INDEX IF NOT EXISTS idx_emails_from ON emails(from_email);
|
|
CREATE INDEX IF NOT EXISTS idx_emails_sent_at ON emails(sent_at);
|
|
CREATE INDEX IF NOT EXISTS idx_emails_matched ON emails(is_matched, sent_at);
|
|
CREATE INDEX IF NOT EXISTS idx_emails_in_reply_to ON emails(in_reply_to);
|
|
|
|
-- ============================================================================
|
|
-- email_recipients — denormalized for fast address lookups
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS email_recipients (
|
|
id TEXT PRIMARY KEY,
|
|
email_id TEXT NOT NULL,
|
|
address TEXT NOT NULL,
|
|
display_name TEXT,
|
|
kind TEXT NOT NULL, -- from|to|cc|bcc|reply_to
|
|
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_email_recipients_addr ON email_recipients(address);
|
|
CREATE INDEX IF NOT EXISTS idx_email_recipients_email ON email_recipients(email_id);
|
|
|
|
-- ============================================================================
|
|
-- email_account_messages — per-mailbox sighting of an email
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS email_account_messages (
|
|
id TEXT PRIMARY KEY,
|
|
email_id TEXT NOT NULL,
|
|
account_id TEXT NOT NULL,
|
|
gmail_message_id TEXT NOT NULL,
|
|
gmail_thread_id TEXT NOT NULL,
|
|
labels_json TEXT DEFAULT '[]',
|
|
is_sent INTEGER NOT NULL DEFAULT 0,
|
|
first_seen_at TEXT DEFAULT (datetime('now')),
|
|
deleted_at TEXT,
|
|
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE,
|
|
FOREIGN KEY(account_id) REFERENCES email_accounts(id) ON DELETE CASCADE,
|
|
UNIQUE(account_id, gmail_message_id)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_eam_email ON email_account_messages(email_id);
|
|
CREATE INDEX IF NOT EXISTS idx_eam_account ON email_account_messages(account_id);
|
|
CREATE INDEX IF NOT EXISTS idx_eam_gmail_msg ON email_account_messages(gmail_message_id);
|
|
|
|
-- ============================================================================
|
|
-- email_attachments — metadata; bytes on disk under data/email_attachments/
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS email_attachments (
|
|
id TEXT PRIMARY KEY,
|
|
email_id TEXT NOT NULL,
|
|
gmail_attachment_id TEXT NOT NULL,
|
|
filename TEXT NOT NULL,
|
|
sanitized_filename TEXT NOT NULL,
|
|
mime_type TEXT,
|
|
size_bytes INTEGER,
|
|
sha256_hex TEXT,
|
|
storage_path TEXT NOT NULL,
|
|
download_status TEXT NOT NULL DEFAULT 'pending', -- pending|downloaded|failed|skipped
|
|
download_attempts INTEGER NOT NULL DEFAULT 0,
|
|
download_error TEXT,
|
|
downloaded_at TEXT,
|
|
created_at TEXT DEFAULT (datetime('now')),
|
|
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_attach_email ON email_attachments(email_id);
|
|
CREATE INDEX IF NOT EXISTS idx_attach_sha ON email_attachments(sha256_hex);
|
|
CREATE INDEX IF NOT EXISTS idx_attach_status ON email_attachments(download_status);
|
|
|
|
-- ============================================================================
|
|
-- email_threads — thread roll-up for UI
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS email_threads (
|
|
id TEXT PRIMARY KEY,
|
|
gmail_thread_id TEXT,
|
|
rfc_thread_root_id TEXT,
|
|
subject_normalized TEXT,
|
|
first_message_at TEXT,
|
|
last_message_at TEXT,
|
|
message_count INTEGER NOT NULL DEFAULT 0,
|
|
participant_count INTEGER NOT NULL DEFAULT 0,
|
|
participants_json TEXT DEFAULT '[]',
|
|
is_matched INTEGER NOT NULL DEFAULT 0,
|
|
created_at TEXT DEFAULT (datetime('now')),
|
|
updated_at TEXT DEFAULT (datetime('now'))
|
|
);
|
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_threads_gmail_uniq ON email_threads(gmail_thread_id)
|
|
WHERE gmail_thread_id IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_threads_rfc_root ON email_threads(rfc_thread_root_id);
|
|
CREATE INDEX IF NOT EXISTS idx_threads_last_msg ON email_threads(last_message_at);
|
|
|
|
-- ============================================================================
|
|
-- email_investor_links — matched investors
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS email_investor_links (
|
|
id TEXT PRIMARY KEY,
|
|
email_id TEXT NOT NULL,
|
|
fundraising_investor_id TEXT,
|
|
fundraising_contact_id TEXT,
|
|
contact_id TEXT,
|
|
organization_id TEXT,
|
|
matched_address TEXT NOT NULL,
|
|
match_kind TEXT NOT NULL, -- exact_email|domain_match|manual
|
|
match_confidence REAL NOT NULL DEFAULT 1.0,
|
|
created_at TEXT DEFAULT (datetime('now')),
|
|
FOREIGN KEY(email_id) REFERENCES emails(id) ON DELETE CASCADE
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_eil_email ON email_investor_links(email_id);
|
|
CREATE INDEX IF NOT EXISTS idx_eil_investor ON email_investor_links(fundraising_investor_id);
|
|
CREATE INDEX IF NOT EXISTS idx_eil_fr_contact ON email_investor_links(fundraising_contact_id);
|
|
CREATE INDEX IF NOT EXISTS idx_eil_contact ON email_investor_links(contact_id);
|
|
|
|
-- ============================================================================
|
|
-- email_sync_runs — per-run observability
|
|
-- ============================================================================
|
|
CREATE TABLE IF NOT EXISTS email_sync_runs (
|
|
id TEXT PRIMARY KEY,
|
|
account_id TEXT NOT NULL,
|
|
kind TEXT NOT NULL, -- backfill|incremental|manual
|
|
started_at TEXT NOT NULL,
|
|
finished_at TEXT,
|
|
status TEXT NOT NULL, -- running|ok|error|partial
|
|
messages_seen INTEGER NOT NULL DEFAULT 0,
|
|
messages_stored INTEGER NOT NULL DEFAULT 0,
|
|
attachments_saved INTEGER NOT NULL DEFAULT 0,
|
|
api_calls INTEGER NOT NULL DEFAULT 0,
|
|
retries INTEGER NOT NULL DEFAULT 0,
|
|
error TEXT,
|
|
FOREIGN KEY(account_id) REFERENCES email_accounts(id) ON DELETE CASCADE
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_sync_runs_account ON email_sync_runs(account_id, started_at);
|