Phase 0 foundation: canonical schema, ingest pipeline, CRM MCP server
Workstream A–C substrate for the Ten31 agentic system: - A1: docs/crm-overview.md; CLAUDE.md conventions + guardrail #9 - A2: additive/reversible core migration (canonical_entities, entity_links, interaction_log, relationship_edges, soft-delete) + ledgered runner - B1/B3: chunking + deterministic entity resolution (backend/ingest) - B2: dense (bge-m3) + BM25 sparse ingest to Qdrant crm_chunks - C: CRM MCP server (reads, retrieval modes, logged writes) — no outbound tools - docs: redaction/re-hydration, Gmail enablement runbook - synthetic test data; .env.example; housekeeping (.gitignore, untrack crm.db, drop legacy files + start9/0.3.5) Verified end-to-end on synthetic data + live Sparks (hybrid > dense on entity queries). Real backfill runs on Ten31 infra; index holds synthetic data only. Branch snapshot also captures pre-existing working-tree changes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
import { sdk } from '../sdk'
|
||||
|
||||
export const actions = sdk.Actions.of()
|
||||
@@ -0,0 +1,7 @@
|
||||
import { sdk } from './sdk'
|
||||
|
||||
export const { createBackup, restoreInit } = sdk.setupBackups(async () =>
|
||||
// Preserve the entire service volume so crm.db, backup JSON files, and the
|
||||
// persisted JWT secret all remain compatible with the prior package layout.
|
||||
sdk.Backups.ofVolumes('main'),
|
||||
)
|
||||
@@ -0,0 +1,5 @@
|
||||
import { sdk } from './sdk'
|
||||
|
||||
export const setDependencies = sdk.setupDependencies(async () => {
|
||||
return {}
|
||||
})
|
||||
@@ -0,0 +1 @@
|
||||
export const i18n = (text: string) => text
|
||||
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* Plumbing. DO NOT EDIT.
|
||||
*/
|
||||
export { createBackup } from './backups'
|
||||
export { main } from './main'
|
||||
export { init, uninit } from './init'
|
||||
export { actions } from './actions'
|
||||
|
||||
import { buildManifest } from '@start9labs/start-sdk'
|
||||
import { manifest as sdkManifest } from './manifest'
|
||||
import { versionGraph } from './versions'
|
||||
|
||||
export const manifest = buildManifest(versionGraph, sdkManifest)
|
||||
@@ -0,0 +1,16 @@
|
||||
import { sdk } from '../sdk'
|
||||
import { setDependencies } from '../dependencies'
|
||||
import { setInterfaces } from '../interfaces'
|
||||
import { versionGraph } from '../versions'
|
||||
import { actions } from '../actions'
|
||||
import { restoreInit } from '../backups'
|
||||
|
||||
export const init = sdk.setupInit(
|
||||
restoreInit,
|
||||
versionGraph,
|
||||
setInterfaces,
|
||||
setDependencies,
|
||||
actions,
|
||||
)
|
||||
|
||||
export const uninit = sdk.setupUninit(versionGraph)
|
||||
@@ -0,0 +1,25 @@
|
||||
import { i18n } from './i18n'
|
||||
import { sdk } from './sdk'
|
||||
import { WEB_PORT } from './utils'
|
||||
|
||||
export const setInterfaces = sdk.setupInterfaces(async ({ effects }) => {
|
||||
const uiMulti = sdk.MultiHost.of(effects, 'ui-multi')
|
||||
const uiMultiOrigin = await uiMulti.bindPort(WEB_PORT, {
|
||||
protocol: 'http',
|
||||
})
|
||||
|
||||
const ui = sdk.createInterface(effects, {
|
||||
name: i18n('Web UI'),
|
||||
id: 'ui',
|
||||
description: i18n('The web interface of Ten31 Database'),
|
||||
type: 'ui',
|
||||
masked: false,
|
||||
schemeOverride: null,
|
||||
username: null,
|
||||
path: '',
|
||||
query: {},
|
||||
})
|
||||
|
||||
const uiReceipt = await uiMultiOrigin.export([ui])
|
||||
return [uiReceipt]
|
||||
})
|
||||
@@ -0,0 +1,33 @@
|
||||
import { i18n } from './i18n'
|
||||
import { sdk } from './sdk'
|
||||
import { DATA_MOUNT_PATH, IMAGE_ID, WEB_PORT } from './utils'
|
||||
|
||||
export const main = sdk.setupMain(async ({ effects }) => {
|
||||
console.info(i18n('Starting Ten31 Database'))
|
||||
|
||||
return sdk.Daemons.of(effects).addDaemon('primary', {
|
||||
subcontainer: await sdk.SubContainer.of(
|
||||
effects,
|
||||
{ imageId: IMAGE_ID },
|
||||
sdk.Mounts.of().mountVolume({
|
||||
volumeId: 'main',
|
||||
subpath: null,
|
||||
mountpoint: DATA_MOUNT_PATH,
|
||||
readonly: false,
|
||||
}),
|
||||
'ten31-database-main',
|
||||
),
|
||||
exec: {
|
||||
command: ['/usr/local/bin/docker_entrypoint.sh'],
|
||||
},
|
||||
ready: {
|
||||
display: i18n('Web Interface'),
|
||||
fn: () =>
|
||||
sdk.healthCheck.checkPortListening(effects, WEB_PORT, {
|
||||
successMessage: i18n('CRM API is responding.'),
|
||||
errorMessage: i18n('CRM API is not responding.'),
|
||||
}),
|
||||
},
|
||||
requires: [],
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,13 @@
|
||||
export const short = {
|
||||
en_US: 'Self-hosted investor and fundraising database for Ten31.',
|
||||
}
|
||||
|
||||
export const long = {
|
||||
en_US:
|
||||
'Ten31 Database is an Airtable-style investor CRM with fundraising grid, communications logging, views, backups, and CSV import. This StartOS 0.4 wrapper preserves the existing /data layout for upgrade-safe persistence.',
|
||||
}
|
||||
|
||||
export const alertUpdate = {
|
||||
en_US:
|
||||
'This 0.4 package is designed to keep using the existing /data/crm.db, /data/backups, and /data/.crm-secret layout from the 0.3.5.1 package.',
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
import { setupManifest } from '@start9labs/start-sdk'
|
||||
import { alertUpdate, long, short } from './i18n'
|
||||
|
||||
export const manifest = setupManifest({
|
||||
id: 'ten-database',
|
||||
title: 'Ten31 Database',
|
||||
license: 'MIT',
|
||||
packageRepo: 'https://github.com/ten31/ten31-database-startos',
|
||||
upstreamRepo: 'https://github.com/ten31/ten31-database',
|
||||
marketingUrl: 'https://ten31.vc',
|
||||
donationUrl: null,
|
||||
docsUrls: ['https://docs.start9.com/packaging/0.4.0.x/'],
|
||||
description: { short, long },
|
||||
volumes: ['main'],
|
||||
images: {
|
||||
main: {
|
||||
source: {
|
||||
dockerBuild: {
|
||||
dockerfile: './Dockerfile',
|
||||
workdir: '../..',
|
||||
},
|
||||
},
|
||||
arch: ['x86_64', 'aarch64'],
|
||||
},
|
||||
},
|
||||
alerts: {
|
||||
install: null,
|
||||
update: alertUpdate,
|
||||
uninstall: null,
|
||||
restore: null,
|
||||
start: null,
|
||||
stop: null,
|
||||
},
|
||||
dependencies: {},
|
||||
})
|
||||
@@ -0,0 +1,9 @@
|
||||
import { StartSdk } from '@start9labs/start-sdk'
|
||||
import { manifest } from './manifest'
|
||||
|
||||
/**
|
||||
* Plumbing. DO NOT EDIT.
|
||||
*
|
||||
* The exported `sdk` const is used throughout this package codebase.
|
||||
*/
|
||||
export const sdk = StartSdk.of().withManifest(manifest).build(true)
|
||||
@@ -0,0 +1,16 @@
|
||||
// Informational constants shared across the startos/ modules.
|
||||
// The authoritative id, title and version for the package come
|
||||
// from manifest/index.ts (id, title) and versions/ (version).
|
||||
export const PACKAGE_ID = 'ten-database'
|
||||
export const PACKAGE_TITLE = 'Ten31 Database'
|
||||
// ExVer form of the current 0.4 wrapper release (upstream 0.1.0, wrapper rev 41).
|
||||
// * 0.3.5 wrapper: 0.1.0.38 (legacy, aarch64)
|
||||
// * First 0.4: 0.1.0:39 (shipped seed snapshot for migration)
|
||||
// * Cleanup: 0.1.0:40 (seed removed + multi-threaded server + abuser auto-ban)
|
||||
// * Current: 0.1.0:41 (frontend persists auth across refreshes)
|
||||
export const PACKAGE_VERSION = '0.1.0:41'
|
||||
|
||||
export const DATA_MOUNT_PATH = '/data'
|
||||
export const WEB_PORT = 8080
|
||||
export const IMAGE_ID = 'main'
|
||||
export const VOLUME_ID = 'main'
|
||||
@@ -0,0 +1,11 @@
|
||||
import { VersionGraph } from '@start9labs/start-sdk'
|
||||
import { v_0_1_0_39 } from './v0.1.0.39'
|
||||
import { v_0_1_0_40 } from './v0.1.0.40'
|
||||
import { v_0_1_0_41 } from './v0.1.0.41'
|
||||
import { v_0_1_0_42 } from './v0.1.0.42'
|
||||
import { v_0_1_0_43 } from './v0.1.0.43'
|
||||
|
||||
export const versionGraph = VersionGraph.of({
|
||||
current: v_0_1_0_43,
|
||||
other: [v_0_1_0_39, v_0_1_0_40, v_0_1_0_41, v_0_1_0_42],
|
||||
})
|
||||
@@ -0,0 +1,38 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
// First StartOS 0.4 release of Ten31 Database.
|
||||
//
|
||||
// Upgrade context:
|
||||
// * The 0.3.5 wrapper shipped at 0.1.0.38 (legacy, aarch64 only).
|
||||
// * This 0.4 wrapper is built for x86_64 and is intended for a
|
||||
// parallel install on a new StartOS 0.4 host.
|
||||
// * Data continuity is NOT handled by a StartOS-level in-place
|
||||
// upgrade (that path does not exist across StartOS majors).
|
||||
// Instead the container image is pre-seeded with a snapshot of
|
||||
// /data (crm.db, backups/, optional .crm-secret). On first boot
|
||||
// docker_entrypoint.sh copies that snapshot into the mounted
|
||||
// `main` volume if it is empty.
|
||||
//
|
||||
// Because both "up" and "down" paths are inside the same wrapper
|
||||
// lineage (and the first 0.4 release has no earlier 0.4 version
|
||||
// to migrate from), the migration functions are intentionally
|
||||
// no-ops. Future 0.4.x releases can chain off this node in the
|
||||
// version graph.
|
||||
export const v_0_1_0_39 = VersionInfo.of({
|
||||
version: '0.1.0:39',
|
||||
releaseNotes: {
|
||||
en_US: [
|
||||
'First StartOS 0.4 package for Ten31 Database.',
|
||||
'Built for x86_64; sideload-only during beta.',
|
||||
'Container image ships with a baked-in /data snapshot so the',
|
||||
'service boots with the existing investor and fundraising data,',
|
||||
'saved views, backups, users, and app settings already in place.',
|
||||
'No StartOS-level migration is performed from the 0.3.5 package;',
|
||||
'this package is installed fresh on a 0.4 host.',
|
||||
].join(' '),
|
||||
},
|
||||
migrations: {
|
||||
up: async () => {},
|
||||
down: async () => {},
|
||||
},
|
||||
})
|
||||
@@ -0,0 +1,57 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
// Post-migration cleanup + hardening release.
|
||||
//
|
||||
// Context:
|
||||
// * 0.1.0:39 was the first 0.4 package and shipped a baked-in
|
||||
// /data snapshot that docker_entrypoint.sh copied into the
|
||||
// mounted `main` volume on first boot (only if the volume was
|
||||
// empty). That snapshot did its job and the live host now has
|
||||
// a populated /data with all real investor + fundraising data.
|
||||
// * 0.1.0:40 removes the seed snapshot from the image and the
|
||||
// seeding logic from the entrypoint. The live /data volume is
|
||||
// the sole source of truth from here on. StartOS preserves the
|
||||
// volume across sideloads, so this upgrade does not disturb
|
||||
// any data — it just slims the image and removes a code path
|
||||
// that should never run again.
|
||||
// * 0.1.0:40 also hardens the backend HTTP server against the
|
||||
// vulnerability scanners that find the StartTunnel-exposed
|
||||
// interface within hours of going live:
|
||||
// - HTTPServer → ThreadingHTTPServer so one slow request or
|
||||
// a wave of scanner probes can't block legit users.
|
||||
// - Per-IP GET rate limit (default 600/min) in addition to
|
||||
// the existing login/write limits.
|
||||
// - 404-burst auto-ban: any IP that produces ABUSE_404_THRESHOLD
|
||||
// 404s within ABUSE_404_WINDOW_SEC (default 15 in 60s) is
|
||||
// parked on a class-level blacklist for ABUSE_BAN_SEC
|
||||
// (default 15 minutes). Banned IPs get an instant 429 with
|
||||
// no DB or filesystem work.
|
||||
// - All limits stay tunable via env vars
|
||||
// (CRM_GET_RATE_LIMIT_PER_MIN, CRM_ABUSE_404_THRESHOLD,
|
||||
// CRM_ABUSE_404_WINDOW_SEC, CRM_ABUSE_BAN_SEC).
|
||||
//
|
||||
// No data migration is required: the SQLite schema is unchanged
|
||||
// and the live DB on /data is left exactly as-is.
|
||||
export const v_0_1_0_40 = VersionInfo.of({
|
||||
version: '0.1.0:40',
|
||||
releaseNotes: {
|
||||
en_US: [
|
||||
'Removes the baked-in /data seed snapshot now that the',
|
||||
'0.3.5 → 0.4 migration is complete. The live /data volume',
|
||||
'on the StartOS host is the sole source of truth and is',
|
||||
'preserved across sideloads, so no live data is touched by',
|
||||
'this upgrade. Image is smaller and the first-boot seeding',
|
||||
'code path has been removed. Also hardens the backend',
|
||||
'against vulnerability scanners hitting the public',
|
||||
'StartTunnel interface: the HTTP server is now multi-threaded',
|
||||
'so one slow request can no longer block legit users, GET',
|
||||
'requests are rate-limited per IP, and any IP that bursts',
|
||||
'too many 404s in a short window is auto-banned for 15',
|
||||
'minutes with no DB work performed.',
|
||||
].join(' '),
|
||||
},
|
||||
migrations: {
|
||||
up: async () => {},
|
||||
down: async () => {},
|
||||
},
|
||||
})
|
||||
@@ -0,0 +1,42 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
// Frontend convenience release: persist auth across page reloads.
|
||||
//
|
||||
// Background: through 0.1.0:40 the auth token + user object were held only
|
||||
// in React state in memory. Any refresh, tab close, or browser restart
|
||||
// dropped the token and forced the user back to the login screen. Since
|
||||
// the JWT is signed with /data/.crm-secret (which already survives sideloads
|
||||
// and container restarts), the underlying token is still valid for its full
|
||||
// 24-hour lifetime — we just weren't keeping it anywhere persistent.
|
||||
//
|
||||
// 0.1.0:41 stores the JWT and user object in localStorage on login (and
|
||||
// rehydrates from there on app mount), so refreshes and reopened tabs stay
|
||||
// signed in until the token expires. The api() helper now also dispatches
|
||||
// a 'crm:unauthorized' event whenever an authenticated request comes back
|
||||
// with a 401, and the AuthProvider listens for that event to clear the
|
||||
// stored auth — so an expired or rejected token immediately bounces the
|
||||
// user back to the login screen instead of leaving the app in a broken
|
||||
// "loaded but every request fails" state.
|
||||
//
|
||||
// Backend is unchanged: the JWT still carries the user's true role and is
|
||||
// re-verified on every request, so a tampered localStorage user object
|
||||
// cannot escalate privileges (the next admin call would just 401/403).
|
||||
//
|
||||
// No data migration is required.
|
||||
export const v_0_1_0_41 = VersionInfo.of({
|
||||
version: '0.1.0:41',
|
||||
releaseNotes: {
|
||||
en_US: [
|
||||
'Logins now persist across page refreshes and tab closures for',
|
||||
'the full 24-hour token lifetime. Previously every reload bounced',
|
||||
'you to the login screen even though the token was still valid.',
|
||||
'If the server later rejects a stored token (expired, secret key',
|
||||
'changed, etc.) the app automatically clears it and shows the',
|
||||
'login screen instead of leaving requests silently failing.',
|
||||
].join(' '),
|
||||
},
|
||||
migrations: {
|
||||
up: async () => {},
|
||||
down: async () => {},
|
||||
},
|
||||
})
|
||||
@@ -0,0 +1,58 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
// Gmail integration — Phase 1.
|
||||
//
|
||||
// Background: the CRM previously had no ingestion path for email
|
||||
// activity. Contacts were logged manually; correspondence history lived
|
||||
// only in our mailboxes. This release adds a one-way capture pipeline
|
||||
// that ingests sent and received mail for every Workspace user at
|
||||
// ten31.xyz, matches messages against existing investor records, and
|
||||
// records metadata (+ bodies and attachments for matched threads) into
|
||||
// the CRM database.
|
||||
//
|
||||
// Auth model: domain-wide delegation via a Google service account. The
|
||||
// service-account JSON key is stored on the /data volume at
|
||||
// /data/secrets/gmail-service-account.json (chmod 600, operator-dropped).
|
||||
// The integration is self-disabling: if the key file is absent, the
|
||||
// scheduler doesn't start and /api/email/* routes return 503. No key →
|
||||
// no behavior change from 0.1.0:41.
|
||||
//
|
||||
// When the key IS present, docker_entrypoint.sh auto-enables the
|
||||
// integration and sets sensible defaults (3-hour sync interval, domain
|
||||
// ten31.xyz, DWD auth). All defaults can still be overridden via env.
|
||||
//
|
||||
// Database: migration 0001 adds eight new tables under the email_
|
||||
// namespace (emails, email_accounts, email_recipients,
|
||||
// email_account_messages, email_attachments, email_threads,
|
||||
// email_investor_links, email_sync_runs). All CREATE TABLE IF NOT EXISTS,
|
||||
// so the migration is safely idempotent — re-applying is a no-op.
|
||||
//
|
||||
// Backend: wholly isolated under backend/email_integration/. Three tiny,
|
||||
// feature-flag-guarded hooks in server.py (migration call, scheduler
|
||||
// startup, /api/email/* route dispatch). Removing or disabling the
|
||||
// integration leaves server behavior identical to 0.1.0:41.
|
||||
//
|
||||
// New Python dep: cryptography==42.0.5 (required for RS256 JWT signing
|
||||
// in DWD bearer token exchange). Now installed in the image.
|
||||
//
|
||||
// No data migration code needed — new tables, additive only.
|
||||
export const v_0_1_0_42 = VersionInfo.of({
|
||||
version: '0.1.0:42',
|
||||
releaseNotes: {
|
||||
en_US: [
|
||||
'Adds a Gmail capture pipeline. When a Google Workspace',
|
||||
"service-account key is dropped into the server's /data/secrets",
|
||||
'folder, the CRM begins pulling sent and received mail for every',
|
||||
'ten31.xyz user on a 3-hour cycle, matching messages against',
|
||||
'existing investor records and storing metadata (plus bodies and',
|
||||
'attachments for matched threads) in the database. With no key',
|
||||
'present the feature is dormant and this release behaves',
|
||||
'identically to 0.1.0:41. Eight new email_* tables are added',
|
||||
'additively; no existing data is touched.',
|
||||
].join(' '),
|
||||
},
|
||||
migrations: {
|
||||
up: async () => {},
|
||||
down: async () => {},
|
||||
},
|
||||
})
|
||||
@@ -0,0 +1,44 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
// Hotfix for 0.1.0:42.
|
||||
//
|
||||
// Issue 1 (critical): POST requests to /api/email/* hung indefinitely.
|
||||
// server.py's do_POST called get_body() early in the dispatch to support
|
||||
// /api/auth/login, which reads bytes off the request stream. My Gmail
|
||||
// integration hook then ran route handlers that called get_body() a
|
||||
// second time — but the stream was already drained, so the second read
|
||||
// blocked waiting for bytes that never came. GET requests (which don't
|
||||
// read a body) were unaffected.
|
||||
//
|
||||
// Fix: get_body() now caches the parsed JSON on the handler instance
|
||||
// on first call. Repeat calls return the cached value. Handler
|
||||
// instances are per-request in ThreadingHTTPServer, so the cache is
|
||||
// naturally request-scoped and thread-safe.
|
||||
//
|
||||
// Issue 2 (minor): the /api/email/accounts/enroll endpoint required
|
||||
// both `email_address` and `user_id` in the body, making it painful to
|
||||
// call for the common single-admin-enrolling-themselves case.
|
||||
//
|
||||
// Fix: the endpoint now also accepts `email` as an alias, and if
|
||||
// user_id isn't supplied it auto-resolves by looking up the email in
|
||||
// the users table (falling back to the authenticated admin's own id
|
||||
// if no match).
|
||||
//
|
||||
// No schema changes, no data migration.
|
||||
export const v_0_1_0_43 = VersionInfo.of({
|
||||
version: '0.1.0:43',
|
||||
releaseNotes: {
|
||||
en_US: [
|
||||
'Hotfix for the Gmail integration in 0.1.0:42. POST requests to',
|
||||
'/api/email/* endpoints were hanging because the request body was',
|
||||
'being read twice from a single-shot stream. This release caches',
|
||||
'the parsed body on the request so subsequent reads are safe, and',
|
||||
'also relaxes the enroll endpoint to accept just an email and',
|
||||
'auto-resolve the CRM user.',
|
||||
].join(' '),
|
||||
},
|
||||
migrations: {
|
||||
up: async () => {},
|
||||
down: async () => {},
|
||||
},
|
||||
})
|
||||
Reference in New Issue
Block a user