Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server

Workstream A–C substrate for the Ten31 agentic system:
- A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9
- A2: additive/reversible core migration (canonical_entities, entity_links,
  interaction_log, relationship_edges, soft-delete) + ledgered runner
- B1/B3: chunking + deterministic entity resolution (backend/ingest)
- B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks
- C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools
- docs: redaction/re-hydration, Gmail enablement runbook
- synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db,
  drop legacy files + start9/0.3.5)

Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity
queries). Real backfill runs on Ten31 infra; index holds synthetic data only.
Branch snapshot also captures pre-existing working-tree changes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Keysat
2026-06-05 08:11:28 -05:00
parent 7027efd777
commit c7ce44d963
99 changed files with 10676 additions and 7817 deletions
+620 -52
View File
@@ -18,7 +18,7 @@ import re
import base64
import threading
from datetime import datetime, timedelta
from http.server import HTTPServer, BaseHTTPRequestHandler
from http.server import HTTPServer, ThreadingHTTPServer, BaseHTTPRequestHandler
from urllib.parse import urlparse, parse_qs, unquote
from functools import wraps
@@ -52,6 +52,14 @@ CORS_ORIGIN = os.environ.get("CRM_CORS_ORIGIN", "*")
ENV = os.environ.get("CRM_ENV", "development")
LOGIN_RATE_LIMIT_PER_MIN = int(os.environ.get("CRM_LOGIN_RATE_LIMIT_PER_MIN", "20"))
WRITE_RATE_LIMIT_PER_MIN = int(os.environ.get("CRM_WRITE_RATE_LIMIT_PER_MIN", "300"))
GET_RATE_LIMIT_PER_MIN = int(os.environ.get("CRM_GET_RATE_LIMIT_PER_MIN", "600"))
# Auto-ban any IP that racks up too many 404s in a short window — almost always
# a vulnerability scanner blasting common paths (/.env, /.git/config, /swagger,
# /actuator/env, wp-json, etc.). Banned IPs get instant 429s with no DB or
# filesystem work, so they can't keep the single SQLite writer busy.
ABUSE_404_THRESHOLD = int(os.environ.get("CRM_ABUSE_404_THRESHOLD", "15"))
ABUSE_404_WINDOW_SEC = int(os.environ.get("CRM_ABUSE_404_WINDOW_SEC", "60"))
ABUSE_BAN_SEC = int(os.environ.get("CRM_ABUSE_BAN_SEC", "900")) # 15 minutes
BACKUP_POLICY_SETTING_KEY = "fundraising_backup_policy"
DEFAULT_BACKUP_POLICY = {
"enabled": True,
@@ -127,6 +135,10 @@ def init_db():
tags TEXT DEFAULT '[]',
notes TEXT,
linkedin_url TEXT,
city TEXT,
state TEXT,
country TEXT,
location_query TEXT,
preferred_contact TEXT DEFAULT 'email',
created_by TEXT REFERENCES users(id),
created_at TEXT DEFAULT (datetime('now')),
@@ -258,6 +270,7 @@ def init_db():
investor_name TEXT NOT NULL,
notes TEXT,
lead TEXT,
lead_source TEXT,
priority INTEGER DEFAULT 0,
follow_up INTEGER DEFAULT 0,
graveyard INTEGER DEFAULT 0,
@@ -337,6 +350,30 @@ def init_db():
UNIQUE(investor_id, list_key)
);
CREATE TABLE IF NOT EXISTS fundraising_presence (
user_id TEXT PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
username TEXT NOT NULL,
full_name TEXT,
active_view TEXT,
row_id TEXT,
col_id TEXT,
is_editing INTEGER DEFAULT 0,
cell_key TEXT,
last_seen_at TEXT DEFAULT (datetime('now')),
expires_at_epoch INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS fundraising_cell_locks (
cell_key TEXT PRIMARY KEY,
row_id TEXT NOT NULL,
col_id TEXT NOT NULL,
locked_by_user_id TEXT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
locked_by_username TEXT NOT NULL,
locked_by_full_name TEXT,
last_seen_at TEXT DEFAULT (datetime('now')),
expires_at_epoch INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS app_settings (
key TEXT PRIMARY KEY,
value_json TEXT NOT NULL,
@@ -363,9 +400,42 @@ def init_db():
CREATE INDEX IF NOT EXISTS idx_fr_commitments_fund ON fundraising_commitments(fund_id);
CREATE INDEX IF NOT EXISTS idx_fr_automation_runs_created ON fundraising_automation_runs(created_at);
CREATE INDEX IF NOT EXISTS idx_fr_memberships_list ON fundraising_list_memberships(list_key);
CREATE INDEX IF NOT EXISTS idx_fr_presence_expires ON fundraising_presence(expires_at_epoch);
CREATE INDEX IF NOT EXISTS idx_fr_locks_expires ON fundraising_cell_locks(expires_at_epoch);
""")
# Lightweight schema migrations for existing databases.
for stmt in [
"ALTER TABLE contacts ADD COLUMN city TEXT",
"ALTER TABLE contacts ADD COLUMN state TEXT",
"ALTER TABLE contacts ADD COLUMN country TEXT",
"ALTER TABLE contacts ADD COLUMN location_query TEXT",
"ALTER TABLE fundraising_investors ADD COLUMN lead_source TEXT",
]:
try:
conn.execute(stmt)
except sqlite3.OperationalError:
pass
# ─── Gmail integration migrations (feature-flag-guarded import) ───
try:
from email_integration.db import apply_migrations as _email_apply_migrations
_email_apply_migrations(cursor)
except ImportError:
pass
except Exception as _e:
print(f"[email_integration] migration warning: {_e}")
conn.commit()
# ─── Core schema migrations (Phase 0+; ordered .sql files w/ ledger) ───
# Additive/reversible only; tracked in schema_migrations. See core_migrations.py.
try:
from core_migrations import apply_core_migrations as _apply_core_migrations
_apply_core_migrations(conn)
except Exception as _e:
print(f"[migrations] core migration warning: {_e}")
conn.close()
print(f"Database initialized at {DB_PATH}")
@@ -563,6 +633,16 @@ def _split_full_name(full_name):
def _normalize_text(value):
return str(value or '').strip().lower()
def _parse_location_text(text):
raw = str(text or '').strip()
if not raw:
return '', '', '', ''
parts = [p.strip() for p in raw.split(',') if p.strip()]
city = parts[0] if len(parts) >= 1 else ''
state = parts[1] if len(parts) >= 2 else ''
country = parts[2] if len(parts) >= 3 else ''
return city, state, country, raw
def ensure_default_automation_rules(conn):
defaults = [
{
@@ -662,6 +742,11 @@ def _upsert_contact_from_fundraising(conn, investor_name, contact, actor_user_id
full_name = str(contact.get('name') or '').strip()
email = str(contact.get('email') or '').strip()
title = str(contact.get('title') or '').strip()
source = str(contact.get('source') or '').strip()
city = str(contact.get('city') or '').strip()
state = str(contact.get('state') or '').strip()
country = str(contact.get('country') or '').strip()
location_query = str(contact.get('location_query') or '').strip()
if not full_name and not email:
return None
first_name, last_name = _split_full_name(full_name)
@@ -700,20 +785,25 @@ def _upsert_contact_from_fundraising(conn, investor_name, contact, actor_user_id
next_last = last_name if (last_name or full_name) else str(existing['last_name'] or '')
next_email = email or str(existing['email'] or '')
next_title = title or str(existing['title'] or '')
next_source = source or str(existing['source'] or '')
next_city = city or str(existing['city'] or '')
next_state = state or str(existing['state'] or '')
next_country = country or str(existing['country'] or '')
next_location_query = location_query or str(existing['location_query'] or '')
next_org = org_id or existing['organization_id']
conn.execute("""
UPDATE contacts
SET first_name = ?, last_name = ?, email = ?, title = ?,
organization_id = ?, contact_type = 'investor', updated_at = ?
organization_id = ?, source = ?, contact_type = 'investor', city = ?, state = ?, country = ?, location_query = ?, updated_at = ?
WHERE id = ?
""", (next_first, next_last, next_email, next_title, next_org, now(), existing['id']))
""", (next_first, next_last, next_email, next_title, next_org, next_source, next_city, next_state, next_country, next_location_query, now(), existing['id']))
return existing['id']
contact_id = generate_id()
conn.execute("""
INSERT INTO contacts (
id, first_name, last_name, email, title, organization_id, contact_type, status, created_by, updated_at
) VALUES (?, ?, ?, ?, ?, ?, 'investor', 'active', ?, ?)
id, first_name, last_name, email, title, organization_id, source, contact_type, status, city, state, country, location_query, created_by, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, 'investor', 'active', ?, ?, ?, ?, ?, ?)
""", (
contact_id,
first_name or 'Unknown',
@@ -721,6 +811,11 @@ def _upsert_contact_from_fundraising(conn, investor_name, contact, actor_user_id
email,
title,
org_id,
source,
city,
state,
country,
location_query,
actor_user_id,
now()
))
@@ -748,6 +843,11 @@ def _sync_contact_to_fundraising_state(conn, contact_row, actor_user_id=None, re
email = str(contact_row.get('email') or '').strip()
full_name = ' '.join([str(contact_row.get('first_name') or '').strip(), str(contact_row.get('last_name') or '').strip()]).strip()
title = str(contact_row.get('title') or '').strip()
source = str(contact_row.get('source') or '').strip()
city = str(contact_row.get('city') or '').strip()
state = str(contact_row.get('state') or '').strip()
country = str(contact_row.get('country') or '').strip()
location_query = str(contact_row.get('location_query') or '').strip()
if not full_name and not email:
return
@@ -814,22 +914,24 @@ def _sync_contact_to_fundraising_state(conn, contact_row, actor_user_id=None, re
"name": full_name or existing.get('name') or '',
"email": email or existing.get('email') or '',
"title": title or existing.get('title') or '',
"city": str(existing.get('city') or ''),
"state": str(existing.get('state') or ''),
"country": str(existing.get('country') or ''),
"location_query": str(existing.get('location_query') or '')
"city": city or str(existing.get('city') or ''),
"state": state or str(existing.get('state') or ''),
"country": country or str(existing.get('country') or ''),
"location_query": location_query or str(existing.get('location_query') or '')
}
else:
next_contacts.append({
"name": full_name,
"email": email,
"title": title,
"city": "",
"state": "",
"country": "",
"location_query": ""
"city": city,
"state": state,
"country": country,
"location_query": location_query
})
inv['contacts'] = next_contacts
if source and not str(inv.get('lead_source') or '').strip():
inv['lead_source'] = source
changed = True
if not changed:
@@ -901,6 +1003,7 @@ def sync_fundraising_relational(conn, grid, views, actor_user_id=None):
investor_name = str(row.get('investor_name') or '').strip() or 'Untitled Investor'
notes = str(row.get('notes') or '')
lead = str(row.get('lead') or '')
lead_source = str(row.get('lead_source') or row.get('combined_lead_source') or '').strip()
total_invested = 0.0
for _, col in fund_columns:
total_invested += _to_number(row.get(str(col.get('id'))))
@@ -909,13 +1012,17 @@ def sync_fundraising_relational(conn, grid, views, actor_user_id=None):
investor_id = existing['id'] if existing else generate_id()
conn.execute("""
INSERT INTO fundraising_investors (
id, investor_name, notes, lead, priority, follow_up, graveyard,
id, investor_name, notes, lead, lead_source, priority, follow_up, graveyard,
source_row_id, total_invested, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(source_row_id) DO UPDATE SET
investor_name = excluded.investor_name,
notes = excluded.notes,
lead = excluded.lead,
lead_source = CASE
WHEN COALESCE(trim(fundraising_investors.lead_source), '') = '' THEN excluded.lead_source
ELSE fundraising_investors.lead_source
END,
priority = excluded.priority,
follow_up = excluded.follow_up,
graveyard = excluded.graveyard,
@@ -926,6 +1033,7 @@ def sync_fundraising_relational(conn, grid, views, actor_user_id=None):
investor_name,
notes,
lead,
lead_source,
1 if _to_bool(row.get('priority')) else 0,
1 if _to_bool(row.get('follow_up')) else 0,
1 if _to_bool(row.get('graveyard')) else 0,
@@ -947,7 +1055,10 @@ def sync_fundraising_relational(conn, grid, views, actor_user_id=None):
email = str(c.get('email') or '').strip()
if not full_name and not email:
continue
_upsert_contact_from_fundraising(conn, investor_name, c, actor_user_id=actor_user_id)
contact_payload = dict(c)
if lead_source and not str(contact_payload.get('source') or '').strip():
contact_payload['source'] = lead_source
_upsert_contact_from_fundraising(conn, investor_name, contact_payload, actor_user_id=actor_user_id)
conn.execute("""
INSERT INTO fundraising_contacts (
id, investor_id, full_name, email, title, city, state, country, location_query, sort_order, updated_at
@@ -958,7 +1069,7 @@ def sync_fundraising_relational(conn, grid, views, actor_user_id=None):
str(c.get('location_query') or ''), i, now()
))
elif isinstance(contacts, str) and contacts.strip():
_upsert_contact_from_fundraising(conn, investor_name, {"name": contacts.strip(), "email": "", "title": ""}, actor_user_id=actor_user_id)
_upsert_contact_from_fundraising(conn, investor_name, {"name": contacts.strip(), "email": "", "title": "", "source": lead_source}, actor_user_id=actor_user_id)
conn.execute("""
INSERT INTO fundraising_contacts (
id, investor_id, full_name, email, title, city, state, country, location_query, sort_order, updated_at
@@ -1291,6 +1402,7 @@ DEFAULT_FUNDRAISING_COLUMNS = [
{"id": "contacts", "label": "Contacts", "type": "contacts", "width": 260},
{"id": "log_action", "label": "Log", "type": "action", "readOnly": True, "width": 90},
{"id": "notes", "label": "Notes / Communication / Outreach", "type": "longtext", "width": 420},
{"id": "lead_source", "label": "Lead Source", "type": "text", "width": 180},
{"id": "notes_last_modified", "label": "Notes Last Modified", "type": "date", "readOnly": True, "width": 180},
{"id": "last_communication_date", "label": "Last Communication Date", "type": "date", "readOnly": True, "width": 195},
{"id": "priority", "label": "Priority", "type": "checkbox", "width": 110},
@@ -1314,7 +1426,12 @@ DEFAULT_FUNDRAISING_ROWS = []
class CRMHandler(BaseHTTPRequestHandler):
"""Main HTTP request handler for the CRM API."""
_rate_limit_buckets = {}
# Class-level state shared across all handler threads. Protected by
# _abuse_lock; see rate_limited() and record_404() for usage.
_rate_limit_buckets = {} # (scope, ip) -> [timestamps]
_404_buckets = {} # ip -> [timestamps] of recent 404 responses
_banned_ips = {} # ip -> ban_until_epoch
_abuse_lock = threading.Lock()
def log_message(self, format, *args):
"""Override to use cleaner logging."""
@@ -1323,14 +1440,22 @@ class CRMHandler(BaseHTTPRequestHandler):
# ── Request Parsing ──
def get_body(self):
# Cache parsed body on the request handler instance so repeated
# calls don't try to re-read an already-consumed stream. Handler
# instances are one-per-request in ThreadingHTTPServer, so the
# cache is naturally request-scoped.
if hasattr(self, '_cached_body'):
return self._cached_body
content_length = int(self.headers.get('Content-Length', 0))
if content_length == 0:
return {}
self._cached_body = {}
return self._cached_body
body = self.rfile.read(content_length)
try:
return json.loads(body.decode('utf-8'))
self._cached_body = json.loads(body.decode('utf-8'))
except json.JSONDecodeError:
return {}
self._cached_body = {}
return self._cached_body
def get_query_params(self):
parsed = urlparse(self.path)
@@ -1373,15 +1498,57 @@ class CRMHandler(BaseHTTPRequestHandler):
def rate_limited(self, scope, limit_per_minute):
now_ts = time.time()
bucket_key = f"{scope}:{self.get_client_ip()}"
bucket = self._rate_limit_buckets.get(bucket_key, [])
cutoff = now_ts - 60.0
bucket = [t for t in bucket if t >= cutoff]
if len(bucket) >= max(1, int(limit_per_minute)):
with self._abuse_lock:
bucket = self._rate_limit_buckets.get(bucket_key, [])
cutoff = now_ts - 60.0
bucket = [t for t in bucket if t >= cutoff]
if len(bucket) >= max(1, int(limit_per_minute)):
self._rate_limit_buckets[bucket_key] = bucket
return True
bucket.append(now_ts)
self._rate_limit_buckets[bucket_key] = bucket
return False
def is_banned(self):
"""Return True if the client IP is currently in the abuse blacklist.
Uses a coarse class-level dict — fine for a handful of scanners hitting
a small team CRM. Auto-expires entries when their ban window passes.
"""
ip = self.get_client_ip()
now_ts = time.time()
with self._abuse_lock:
until = self._banned_ips.get(ip)
if until is None:
return False
if now_ts >= until:
self._banned_ips.pop(ip, None)
return False
return True
bucket.append(now_ts)
self._rate_limit_buckets[bucket_key] = bucket
return False
def record_404(self):
"""Track 404s per IP and auto-ban IPs that exceed the burst threshold.
Called from send_error_json whenever we send a 404. A scanner probing
/.env, /.git/config, /swagger, /actuator/env etc. will trip this fast
and get parked on the blacklist for ABUSE_BAN_SEC seconds.
"""
ip = self.get_client_ip()
now_ts = time.time()
with self._abuse_lock:
bucket = self._404_buckets.get(ip, [])
cutoff = now_ts - ABUSE_404_WINDOW_SEC
bucket = [t for t in bucket if t >= cutoff]
bucket.append(now_ts)
if len(bucket) >= ABUSE_404_THRESHOLD:
self._banned_ips[ip] = now_ts + ABUSE_BAN_SEC
self._404_buckets.pop(ip, None)
sys.stderr.write(
f"[abuse] Banning {ip} for {ABUSE_BAN_SEC}s after "
f"{len(bucket)} 404s in {ABUSE_404_WINDOW_SEC}s\n"
)
else:
self._404_buckets[ip] = bucket
# ── Response Helpers ──
@@ -1395,6 +1562,14 @@ class CRMHandler(BaseHTTPRequestHandler):
self.wfile.write(json.dumps(data, default=str).encode('utf-8'))
def send_error_json(self, message, status=400):
# Record 404s for the abuser-detection blacklist before responding.
# We do this on send (not on routing) so that any code path that 404s
# contributes to the burst counter, including unknown POST paths.
if status == 404:
try:
self.record_404()
except Exception:
pass
self.send_json({"error": message}, status)
def send_file(self, filepath, content_type='text/html'):
@@ -1412,6 +1587,8 @@ class CRMHandler(BaseHTTPRequestHandler):
# ── Routing ──
def do_OPTIONS(self):
if self.is_banned():
return self.send_error_json("Too many requests", 429)
self.send_response(200)
self.send_header('Access-Control-Allow-Origin', CORS_ORIGIN)
self.send_header('Access-Control-Allow-Methods', 'GET, POST, PUT, PATCH, DELETE, OPTIONS')
@@ -1419,8 +1596,25 @@ class CRMHandler(BaseHTTPRequestHandler):
self.end_headers()
def do_GET(self):
# Short-circuit known abusers before any work, including file reads.
if self.is_banned():
return self.send_error_json("Too many requests", 429)
# Generic per-IP GET rate limit. Generous enough for a normal user
# (page load fans out ~15 GETs + heartbeats every ~6s) but blocks a
# scanner blasting hundreds of paths per second.
if self.rate_limited('get', GET_RATE_LIMIT_PER_MIN):
return self.send_error_json("Too many requests", 429)
path = self.get_path()
# ─── Gmail integration routes (feature-flag-guarded) ─────────
try:
from email_integration.routes import try_handle as _email_try_handle
if _email_try_handle(self):
return
except ImportError:
pass
# Serve frontend
if path == '/' or path == '/index.html':
return self.send_file(os.path.join(FRONTEND_DIR, 'index.html'))
@@ -1510,6 +1704,8 @@ class CRMHandler(BaseHTTPRequestHandler):
# Fundraising grid state
if path == '/api/fundraising/state':
return self.handle_get_fundraising_state(user)
if path == '/api/fundraising/collab/state':
return self.handle_get_fundraising_collab_state(user)
if path == '/api/fundraising/export':
return self.handle_export_fundraising_state(user)
if path == '/api/fundraising/backups':
@@ -1538,12 +1734,23 @@ class CRMHandler(BaseHTTPRequestHandler):
self.send_error_json("Not found", 404)
def do_POST(self):
if self.is_banned():
return self.send_error_json("Too many requests", 429)
path = self.get_path()
body = self.get_body()
if self.rate_limited('write', WRITE_RATE_LIMIT_PER_MIN):
return self.send_error_json("Too many requests", 429)
# ─── Gmail integration routes (feature-flag-guarded) ─────────
try:
from email_integration.routes import try_handle as _email_try_handle
if _email_try_handle(self):
return
except ImportError:
pass
# Auth (no token needed)
if path == '/api/auth/login':
if self.rate_limited('login', LOGIN_RATE_LIMIT_PER_MIN):
@@ -1573,8 +1780,12 @@ class CRMHandler(BaseHTTPRequestHandler):
return self.handle_create_feature_request(user, body)
if path == '/api/fundraising/log-communication':
return self.handle_log_fundraising_communication(user, body)
if path == '/api/fundraising/collab/heartbeat':
return self.handle_fundraising_collab_heartbeat(user, body)
if path == '/api/admin/users':
return self.handle_admin_create_user(user, body)
if path == '/api/admin/reset-all-data':
return self.handle_admin_reset_all_data(user, body)
if path == '/api/fundraising/backup':
return self.handle_backup_fundraising_state(user)
if path == '/api/fundraising/restore-preview':
@@ -1587,6 +1798,8 @@ class CRMHandler(BaseHTTPRequestHandler):
self.send_error_json("Not found", 404)
def do_PUT(self):
if self.is_banned():
return self.send_error_json("Too many requests", 429)
path = self.get_path()
body = self.get_body()
if self.rate_limited('write', WRITE_RATE_LIMIT_PER_MIN):
@@ -1611,6 +1824,8 @@ class CRMHandler(BaseHTTPRequestHandler):
self.send_error_json("Not found", 404)
def do_PATCH(self):
if self.is_banned():
return self.send_error_json("Too many requests", 429)
path = self.get_path()
body = self.get_body()
if self.rate_limited('write', WRITE_RATE_LIMIT_PER_MIN):
@@ -1637,6 +1852,8 @@ class CRMHandler(BaseHTTPRequestHandler):
self.send_error_json("Not found", 404)
def do_DELETE(self):
if self.is_banned():
return self.send_error_json("Too many requests", 429)
path = self.get_path()
if self.rate_limited('write', WRITE_RATE_LIMIT_PER_MIN):
return self.send_error_json("Too many requests", 429)
@@ -1761,8 +1978,8 @@ class CRMHandler(BaseHTTPRequestHandler):
args.append(params['status'])
if params.get('search'):
search = f"%{params['search']}%"
query += " AND (c.first_name LIKE ? OR c.last_name LIKE ? OR c.email LIKE ? OR o.name LIKE ?)"
args.extend([search, search, search, search])
query += " AND (c.first_name LIKE ? OR c.last_name LIKE ? OR c.email LIKE ? OR o.name LIKE ? OR c.source LIKE ?)"
args.extend([search, search, search, search, search])
if params.get('organization_id'):
query += " AND c.organization_id = ?"
args.append(params['organization_id'])
@@ -1772,7 +1989,7 @@ class CRMHandler(BaseHTTPRequestHandler):
sort = params.get('sort', 'updated_at')
order = 'DESC' if params.get('order', 'desc').lower() == 'desc' else 'ASC'
allowed_sorts = ['first_name', 'last_name', 'email', 'created_at', 'updated_at', 'contact_type']
allowed_sorts = ['first_name', 'last_name', 'email', 'created_at', 'updated_at', 'contact_type', 'source']
if sort in allowed_sorts:
query += f" ORDER BY c.{sort} {order}"
else:
@@ -1837,20 +2054,25 @@ class CRMHandler(BaseHTTPRequestHandler):
contact_id = generate_id()
conn = get_db()
organization_id = body.get('organization_id')
if not organization_id and body.get('organization'):
organization_id = _ensure_organization_by_name(conn, body.get('organization'), user['user_id'])
tags = json.dumps(body.get('tags', []))
conn.execute("""
INSERT INTO contacts (id, first_name, last_name, email, phone, mobile, title,
organization_id, contact_type, status, source, tags, notes, linkedin_url,
preferred_contact, created_by)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
city, state, country, location_query, preferred_contact, created_by)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
contact_id, body['first_name'], body['last_name'],
body.get('email'), body.get('phone'), body.get('mobile'),
body.get('title'), body.get('organization_id'),
body.get('title'), organization_id,
body.get('contact_type', 'prospect'), body.get('status', 'active'),
body.get('source'), tags, body.get('notes'),
body.get('linkedin_url'), body.get('preferred_contact', 'email'),
body.get('linkedin_url'), body.get('city'), body.get('state'),
body.get('country'), body.get('location_query'),
body.get('preferred_contact', 'email'),
user['user_id']
))
@@ -1879,13 +2101,17 @@ class CRMHandler(BaseHTTPRequestHandler):
updatable = ['first_name', 'last_name', 'email', 'phone', 'mobile', 'title',
'organization_id', 'contact_type', 'status', 'source', 'notes',
'linkedin_url', 'preferred_contact']
'linkedin_url', 'city', 'state', 'country', 'location_query', 'preferred_contact']
sets = []
args = []
for field in updatable:
if field in body:
sets.append(f"{field} = ?")
args.append(body[field])
if 'organization' in body and 'organization_id' not in body:
org_id = _ensure_organization_by_name(conn, body.get('organization'), user['user_id'])
sets.append("organization_id = ?")
args.append(org_id)
if 'tags' in body:
sets.append("tags = ?")
@@ -2876,12 +3102,17 @@ class CRMHandler(BaseHTTPRequestHandler):
entity_type = body.get('entity_type', 'contacts')
mapping = body.get('mapping', {})
dry_run = body.get('dry_run', False)
update_existing = bool(body.get('update_existing', True))
action_overrides_raw = body.get('action_overrides', {}) or {}
if not csv_data:
return self.send_error_json("No data provided. Send 'data' as array of objects.")
conn = get_db()
results = {"created": 0, "updated": 0, "skipped": 0, "errors": []}
results = {"created": 0, "updated": 0, "skipped": 0, "errors": [], "matches": []}
# Keep in-memory email matches so dry-run mirrors real behavior for
# duplicate emails appearing multiple times in the same CSV batch.
batch_email_matches = {}
try:
for i, row in enumerate(csv_data):
@@ -2911,11 +3142,44 @@ class CRMHandler(BaseHTTPRequestHandler):
continue
email = data.get('email', '').strip()
email_key = email.lower()
linkedin_url = data.get('linkedin_url', data.get('linkedin', '')).strip()
city = data.get('city', '').strip()
state = data.get('state', '').strip()
country = data.get('country', '').strip()
location_query = data.get('location_query', '').strip()
raw_location = data.get('location', data.get('city_location', data.get('city/location', ''))).strip()
if raw_location:
p_city, p_state, p_country, p_query = _parse_location_text(raw_location)
city = city or p_city
state = state or p_state
country = country or p_country
location_query = location_query or p_query
# Check for existing contact by email
existing = None
existing_summary = None
if email:
existing = conn.execute("SELECT id FROM contacts WHERE email = ?", (email,)).fetchone()
if email_key in batch_email_matches:
existing_summary = batch_email_matches[email_key]
existing = {"id": existing_summary.get('id')}
else:
existing = conn.execute("""
SELECT c.id, c.first_name, c.last_name, c.email, o.name as organization_name
FROM contacts c
LEFT JOIN organizations o ON c.organization_id = o.id
WHERE lower(c.email) = lower(?)
ORDER BY c.updated_at DESC
LIMIT 1
""", (email,)).fetchone()
if existing:
existing_summary = {
"id": existing['id'],
"name": f"{str(existing['first_name'] or '').strip()} {str(existing['last_name'] or '').strip()}".strip(),
"email": str(existing['email'] or ''),
"organization": str(existing['organization_name'] or '')
}
batch_email_matches[email_key] = existing_summary
# Handle organization
org_id = None
@@ -2931,32 +3195,149 @@ class CRMHandler(BaseHTTPRequestHandler):
(org_id, org_name, user['user_id'])
)
action_override = None
if isinstance(action_overrides_raw, dict):
action_override = action_overrides_raw.get(str(i + 1)) or action_overrides_raw.get(i + 1)
default_action = 'update' if update_existing else 'skip'
action = action_override if action_override in ('update', 'skip', 'create_duplicate') else default_action
if existing:
incoming_name = f"{first_name} {last_name}".strip()
results['matches'].append({
"row": i + 1,
"incoming_name": incoming_name,
"incoming_email": email,
"incoming_organization": org_name,
"existing_id": existing_summary.get('id') if isinstance(existing_summary, dict) else existing['id'],
"existing_name": existing_summary.get('name') if isinstance(existing_summary, dict) else '',
"existing_email": existing_summary.get('email') if isinstance(existing_summary, dict) else email,
"existing_organization": existing_summary.get('organization') if isinstance(existing_summary, dict) else '',
"default_action": default_action,
"action": action
})
if not dry_run:
if existing:
conn.execute("""
UPDATE contacts SET first_name=?, last_name=?, phone=?, title=?,
organization_id=COALESCE(?, organization_id),
contact_type=COALESCE(?, contact_type), updated_at=?
WHERE id=?
""", (first_name, last_name, data.get('phone'),
data.get('title'), org_id,
data.get('contact_type'), now(), existing['id']))
results['updated'] += 1
if action == 'update':
conn.execute("""
UPDATE contacts SET first_name=?, last_name=?, phone=?, title=?,
organization_id=COALESCE(?, organization_id),
contact_type=COALESCE(?, contact_type),
linkedin_url=COALESCE(?, linkedin_url),
city=COALESCE(?, city),
state=COALESCE(?, state),
country=COALESCE(?, country),
location_query=COALESCE(?, location_query),
updated_at=?
WHERE id=?
""", (first_name, last_name, data.get('phone'),
data.get('title'), org_id,
data.get('contact_type'),
linkedin_url if linkedin_url else None,
city if city else None,
state if state else None,
country if country else None,
location_query if location_query else None,
now(), existing['id']))
if email:
batch_email_matches[email_key] = {
"id": existing['id'],
"name": f"{first_name} {last_name}".strip(),
"email": email,
"organization": org_name
}
updated_contact = row_to_dict(conn.execute("""
SELECT c.*, o.name as organization_name
FROM contacts c LEFT JOIN organizations o ON c.organization_id = o.id
WHERE c.id = ?
""", (existing['id'],)).fetchone())
_sync_contact_to_fundraising_state(conn, updated_contact, actor_user_id=user['user_id'], remove=False)
results['updated'] += 1
elif action == 'create_duplicate':
contact_id = generate_id()
conn.execute("""
INSERT INTO contacts (id, first_name, last_name, email, phone,
title, organization_id, contact_type, status, source,
linkedin_url, city, state, country, location_query, created_by)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active', 'import', ?, ?, ?, ?, ?, ?)
""", (contact_id, first_name, last_name, email,
data.get('phone'), data.get('title'), org_id,
data.get('contact_type', 'prospect'), linkedin_url,
city, state, country, location_query, user['user_id']))
if email:
batch_email_matches[email_key] = {
"id": contact_id,
"name": f"{first_name} {last_name}".strip(),
"email": email,
"organization": org_name
}
created_contact = row_to_dict(conn.execute("""
SELECT c.*, o.name as organization_name
FROM contacts c LEFT JOIN organizations o ON c.organization_id = o.id
WHERE c.id = ?
""", (contact_id,)).fetchone())
_sync_contact_to_fundraising_state(conn, created_contact, actor_user_id=user['user_id'], remove=False)
results['created'] += 1
else:
results['skipped'] += 1
results['errors'].append(f"Row {i+1}: Existing contact matched by email; skipped")
else:
contact_id = generate_id()
conn.execute("""
INSERT INTO contacts (id, first_name, last_name, email, phone,
title, organization_id, contact_type, status, source, created_by)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active', 'import', ?)
title, organization_id, contact_type, status, source,
linkedin_url, city, state, country, location_query, created_by)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'active', 'import', ?, ?, ?, ?, ?, ?)
""", (contact_id, first_name, last_name, email,
data.get('phone'), data.get('title'), org_id,
data.get('contact_type', 'prospect'), user['user_id']))
data.get('contact_type', 'prospect'), linkedin_url,
city, state, country, location_query, user['user_id']))
if email:
batch_email_matches[email_key] = {
"id": contact_id,
"name": f"{first_name} {last_name}".strip(),
"email": email,
"organization": org_name
}
created_contact = row_to_dict(conn.execute("""
SELECT c.*, o.name as organization_name
FROM contacts c LEFT JOIN organizations o ON c.organization_id = o.id
WHERE c.id = ?
""", (contact_id,)).fetchone())
_sync_contact_to_fundraising_state(conn, created_contact, actor_user_id=user['user_id'], remove=False)
results['created'] += 1
else:
if existing:
results['updated'] += 1
if action == 'update':
results['updated'] += 1
if email:
batch_email_matches[email_key] = {
"id": existing['id'],
"name": f"{first_name} {last_name}".strip(),
"email": email,
"organization": org_name
}
elif action == 'create_duplicate':
results['created'] += 1
if email:
batch_email_matches[email_key] = {
"id": f"dryrun-{i+1}",
"name": f"{first_name} {last_name}".strip(),
"email": email,
"organization": org_name
}
else:
results['skipped'] += 1
results['errors'].append(f"Row {i+1}: Existing contact matched by email; would be skipped")
else:
results['created'] += 1
if email:
# Simulate that the row now exists for subsequent duplicate-email rows.
batch_email_matches[email_key] = {
"id": f"dryrun-{i+1}",
"name": f"{first_name} {last_name}".strip(),
"email": email,
"organization": org_name
}
except Exception as e:
results['errors'].append(f"Row {i+1}: {str(e)}")
@@ -3141,6 +3522,58 @@ class CRMHandler(BaseHTTPRequestHandler):
conn.close()
return self.send_json({"data": updated})
def handle_admin_reset_all_data(self, user, body):
if not require_admin(user):
return self.send_error_json("Admin only", 403)
confirm_phrase = str(body.get('confirm_phrase') or '').strip()
if confirm_phrase != 'RESET ALL DATA':
return self.send_error_json("Confirmation phrase must be exactly: RESET ALL DATA", 400)
conn = get_db()
try:
ensure_fundraising_state_row(conn)
state = conn.execute("SELECT * FROM fundraising_state WHERE id = 'main'").fetchone()
pre_backup = create_fundraising_backup_file(state, kind="pre_restore") if state else None
conn.execute("DELETE FROM communications")
conn.execute("DELETE FROM opportunities")
conn.execute("DELETE FROM lp_profiles")
conn.execute("DELETE FROM custom_field_values")
conn.execute("DELETE FROM custom_fields")
conn.execute("DELETE FROM feature_requests")
conn.execute("DELETE FROM contacts")
conn.execute("DELETE FROM organizations")
default_grid = {
"columns": deep_copy_json(DEFAULT_FUNDRAISING_COLUMNS),
"rows": deep_copy_json(DEFAULT_FUNDRAISING_ROWS)
}
default_views = sanitize_grid_views(deep_copy_json(DEFAULT_GRID_VIEWS))
conn.execute("""
UPDATE fundraising_state
SET grid_json = ?, views_json = ?, version = COALESCE(version, 1) + 1, updated_by = ?, updated_at = ?
WHERE id = 'main'
""", (json.dumps(default_grid), json.dumps(default_views), user['user_id'], now()))
sync_fundraising_relational(conn, default_grid, default_views, actor_user_id=user['user_id'])
log_audit(conn, user['user_id'], 'system', 'all-data', 'reset', {
"pre_backup": pre_backup['filename'] if pre_backup else None
})
conn.commit()
except Exception as exc:
conn.rollback()
conn.close()
return self.send_error_json(f"Failed to reset data: {str(exc)}", 500)
conn.close()
return self.send_json({
"data": {
"status": "ok",
"pre_backup": pre_backup
}
})
def handle_list_audit_log(self, user, params):
if not require_admin(user):
return self.send_error_json("Admin access required", 403)
@@ -3574,6 +4007,128 @@ class CRMHandler(BaseHTTPRequestHandler):
}
})
def _cleanup_fundraising_collab(self, conn):
now_epoch = int(time.time())
conn.execute("DELETE FROM fundraising_presence WHERE expires_at_epoch <= ?", (now_epoch,))
conn.execute("DELETE FROM fundraising_cell_locks WHERE expires_at_epoch <= ?", (now_epoch,))
def _list_fundraising_collab_state(self, conn):
presence_rows = rows_to_list(conn.execute("""
SELECT user_id, username, full_name, active_view, row_id, col_id, is_editing, cell_key, last_seen_at
FROM fundraising_presence
ORDER BY last_seen_at DESC
""").fetchall())
lock_rows = rows_to_list(conn.execute("""
SELECT cell_key, row_id, col_id, locked_by_user_id, locked_by_username, locked_by_full_name, last_seen_at
FROM fundraising_cell_locks
ORDER BY last_seen_at DESC
""").fetchall())
for row in presence_rows:
row['is_editing'] = bool(row.get('is_editing'))
return {"presence": presence_rows, "locks": lock_rows}
def handle_get_fundraising_collab_state(self, user):
conn = get_db()
self._cleanup_fundraising_collab(conn)
snapshot = self._list_fundraising_collab_state(conn)
conn.commit()
conn.close()
return self.send_json({"data": snapshot})
def handle_fundraising_collab_heartbeat(self, user, body):
active_view = str(body.get('active_view') or '').strip()
selected = body.get('selected') if isinstance(body.get('selected'), dict) else {}
editing = body.get('editing') if isinstance(body.get('editing'), dict) else {}
selected_row_id = str(selected.get('row_id') or '').strip()
selected_col_id = str(selected.get('col_id') or '').strip()
editing_row_id = str(editing.get('row_id') or '').strip()
editing_col_id = str(editing.get('col_id') or '').strip()
is_editing = bool(editing_row_id and editing_col_id)
ttl_seconds = int(body.get('ttl_seconds') or 25)
ttl_seconds = max(10, min(120, ttl_seconds))
now_epoch = int(time.time())
expires_at_epoch = now_epoch + ttl_seconds
seen_at = now()
lock_conflict = None
conn = get_db()
self._cleanup_fundraising_collab(conn)
user_row = conn.execute("SELECT username, full_name FROM users WHERE id = ?", (user['user_id'],)).fetchone()
username = str(user_row['username']) if user_row and user_row['username'] else str(user.get('username') or '')
full_name = str(user_row['full_name']) if user_row and user_row['full_name'] else ''
editing_cell_key = f"{editing_row_id}:{editing_col_id}" if is_editing else None
if is_editing and editing_cell_key:
existing_lock = conn.execute("""
SELECT cell_key, row_id, col_id, locked_by_user_id, locked_by_username, locked_by_full_name, last_seen_at
FROM fundraising_cell_locks
WHERE cell_key = ? AND locked_by_user_id != ? AND expires_at_epoch > ?
LIMIT 1
""", (editing_cell_key, user['user_id'], now_epoch)).fetchone()
if existing_lock:
lock_conflict = row_to_dict(existing_lock)
is_editing = False
editing_cell_key = None
else:
conn.execute("""
INSERT INTO fundraising_cell_locks (
cell_key, row_id, col_id, locked_by_user_id, locked_by_username, locked_by_full_name, last_seen_at, expires_at_epoch
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(cell_key) DO UPDATE SET
row_id = excluded.row_id,
col_id = excluded.col_id,
locked_by_user_id = excluded.locked_by_user_id,
locked_by_username = excluded.locked_by_username,
locked_by_full_name = excluded.locked_by_full_name,
last_seen_at = excluded.last_seen_at,
expires_at_epoch = excluded.expires_at_epoch
""", (editing_cell_key, editing_row_id, editing_col_id, user['user_id'], username, full_name, seen_at, expires_at_epoch))
conn.execute("""
DELETE FROM fundraising_cell_locks
WHERE locked_by_user_id = ? AND cell_key != ?
""", (user['user_id'], editing_cell_key))
else:
conn.execute("DELETE FROM fundraising_cell_locks WHERE locked_by_user_id = ?", (user['user_id'],))
conn.execute("""
INSERT INTO fundraising_presence (
user_id, username, full_name, active_view, row_id, col_id, is_editing, cell_key, last_seen_at, expires_at_epoch
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(user_id) DO UPDATE SET
username = excluded.username,
full_name = excluded.full_name,
active_view = excluded.active_view,
row_id = excluded.row_id,
col_id = excluded.col_id,
is_editing = excluded.is_editing,
cell_key = excluded.cell_key,
last_seen_at = excluded.last_seen_at,
expires_at_epoch = excluded.expires_at_epoch
""", (
user['user_id'],
username,
full_name,
active_view,
selected_row_id or editing_row_id,
selected_col_id or editing_col_id,
1 if is_editing else 0,
editing_cell_key,
seen_at,
expires_at_epoch
))
snapshot = self._list_fundraising_collab_state(conn)
conn.commit()
conn.close()
return self.send_json({
"data": {
**snapshot,
"lock_conflict": lock_conflict
}
})
def handle_update_fundraising_state(self, user, body):
grid = body.get('grid', {})
views = body.get('views')
@@ -3948,7 +4503,20 @@ def main():
print("Demo data seeding disabled (set CRM_SEED_DEMO_DATA=1 to enable).")
start_backup_scheduler()
server = HTTPServer((HOST, PORT), CRMHandler)
# ─── Gmail sync scheduler (feature-flag-guarded) ─────────────────
if os.environ.get("CRM_GMAIL_INTEGRATION_ENABLED", "").lower() in ("1", "true", "yes", "on"):
try:
from email_integration.scheduler import start_sync_scheduler
start_sync_scheduler()
print("[email_integration] Gmail sync scheduler started")
except Exception as _e:
print(f"[email_integration] failed to start scheduler: {_e}")
# ThreadingHTTPServer lets one slow request (or a wave of scanner probes)
# not block legit users. SQLite is opened per-request via get_db(), and
# WAL mode allows concurrent readers + a single writer, so this is safe.
server = ThreadingHTTPServer((HOST, PORT), CRMHandler)
server.daemon_threads = True
print(f"\n{'='*60}")
print(f" Venture Fund CRM Server")
print(f" Running at http://{HOST}:{PORT}")