Compare commits
151 Commits
069e60053b
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 05f15b9197 | |||
| b23c48bf7a | |||
| 8f82721a54 | |||
| 6e760b19ee | |||
| 9ffd4a1c6a | |||
| 1564c087bf | |||
| 985cba3c81 | |||
| 5b564d8042 | |||
| d6250f74d0 | |||
| fea88b6557 | |||
| 2746d1408c | |||
| 14c951de57 | |||
| b04f83e1d1 | |||
| 622d454461 | |||
| 463f624548 | |||
| 2a4c2c25a0 | |||
| a761caa3d0 | |||
| acd316ead4 | |||
| 8c9b8b8cc1 | |||
| fb71e0d041 | |||
| 92f97029ac | |||
| a917280bbb | |||
| 7fe5f57c6e | |||
| e824ff2206 | |||
| 92ab59de4e | |||
| 8b2eb01a65 | |||
| 5e115a3409 | |||
| efa2a4886a | |||
| 536358093f | |||
| be40520c3d | |||
| 0aca8848ee | |||
| f9026a4c08 | |||
| 959a6073f5 | |||
| 0490910687 | |||
| 81ed6cbbab | |||
| 707a270922 | |||
| f645288fc3 | |||
| abc614fc98 | |||
| e53a41ae80 | |||
| f0f1ed3bcd | |||
| 43c8048eab | |||
| 8e0f955342 | |||
| 42c169559c | |||
| 93ac0c240f | |||
| e57b154a6d | |||
| 60d67f6b7d | |||
| e3f5ef8dc8 | |||
| 490cab92a3 | |||
| d16a567a3c | |||
| 3f93daf28e | |||
| 099d87dad2 | |||
| e6a89450da | |||
| 7f711d1fae | |||
| ee9db6425a | |||
| 95beb7bb19 | |||
| e34a6fc672 | |||
| 984b950f80 | |||
| 4ed16ca828 | |||
| 634fc4260f | |||
| e46dd36517 | |||
| fe62df1a14 | |||
| 7b560c97b6 | |||
| d388464fe4 | |||
| 9777fe6e25 | |||
| 168336c318 | |||
| c7f959d7d5 | |||
| 99404db48b | |||
| ab0d82ff00 | |||
| 9d0d3068fb | |||
| 2d43bad6fc | |||
| f7b03ee109 | |||
| 68106d7a5a | |||
| 6c29c22601 | |||
| a166b49397 | |||
| f181525926 | |||
| ee6a4e52d2 | |||
| b2690c4342 | |||
| 9044641b08 | |||
| a10889b10b | |||
| 48bd29aaa3 | |||
| 29987061cb | |||
| 27e9ea5b0b | |||
| 5faa5ae4d6 | |||
| 41def0f014 | |||
| 114916b789 | |||
| 4df104b119 | |||
| 7f9a15ebf3 | |||
| 06482247df | |||
| c1ea1769a4 | |||
| b376b8ce33 | |||
| cae2dbc8b9 | |||
| b470ea2659 | |||
| a7b03837b3 | |||
| 0b893295e1 | |||
| fa6c9da0e6 | |||
| aefb2aa119 | |||
| 13326cbdc6 | |||
| fd2e3ed78e | |||
| 7ad0ee7624 | |||
| 172c76553b | |||
| 6e18d8ddd4 | |||
| c7b74a2704 | |||
| c29ac2f2ee | |||
| 40a0270a99 | |||
| 45fd037e3b | |||
| 6563a7811e | |||
| def7c9ea6a | |||
| 42d2b4b245 | |||
| f9705d2216 | |||
| cc25be4e14 | |||
| da052a181b | |||
| a5a9b06423 | |||
| c23384498b | |||
| 108210d8e1 | |||
| 5cda84a7c0 | |||
| 6677aab11d | |||
| 323f016f64 | |||
| 036226ed74 | |||
| 661ad35ee5 | |||
| 4b944ca8ad | |||
| fee037a630 | |||
| 47dfd110a0 | |||
| e62306be27 | |||
| 114a94c894 | |||
| a350f8f5dd | |||
| 2758ac81d3 | |||
| ecfc5d968a | |||
| eb53afc186 | |||
| 96bbf36b2b | |||
| f1bd26e2e4 | |||
| d09a22498d | |||
| 840f927324 | |||
| 7285bb0e52 | |||
| a74a540295 | |||
| 6816d4a4f0 | |||
| a9611b935b | |||
| aec2b7775b | |||
| 1959c22e19 | |||
| 090416f05e | |||
| cabbcae5d5 | |||
| ef869be082 | |||
| 07af9257f4 | |||
| fffc90c7a4 | |||
| c53fdcb4a0 | |||
| 606b336a00 | |||
| 49f84ca9a4 | |||
| 787d580550 | |||
| b5619d61e1 | |||
| 0943aeb2df | |||
| c2b84a1f26 | |||
| 701e37b579 |
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"version": "0.0.1",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "crm-preview",
|
||||||
|
"runtimeExecutable": "bash",
|
||||||
|
"runtimeArgs": ["-c", "CRM_DB_PATH=/tmp/crm_preview.db CRM_DATA_DIR=/tmp/crm_preview_data CRM_FRONTEND_DIR=$PWD/frontend CRM_PORT=8765 CRM_ENV=development exec python3 backend/server.py"],
|
||||||
|
"port": 8765
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/email.md
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/matrix-intake.md
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/migrations.md
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/nl-query.md
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/packaging.md
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/redaction.md
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/spark-ingest.md
|
||||||
Symlink
+1
@@ -0,0 +1 @@
|
|||||||
|
../../docs/guides/thesis.md
|
||||||
@@ -19,3 +19,65 @@ X_API_KEY=
|
|||||||
# ── CRM (ingest opens the SQLite file directly, read-only) ──
|
# ── CRM (ingest opens the SQLite file directly, read-only) ──
|
||||||
CRM_DB_PATH=./data/crm.db
|
CRM_DB_PATH=./data/crm.db
|
||||||
CRM_DEV_DB_PATH=./data/crm_dev.db
|
CRM_DEV_DB_PATH=./data/crm_dev.db
|
||||||
|
|
||||||
|
# ── Daily activity digest (Phase B) ──
|
||||||
|
# The daily digest (each team member's activity per investor + a by-investor view,
|
||||||
|
# summarized LOCALLY on Spark — never Claude) is controlled from Settings → Admin
|
||||||
|
# (stored in the DB). These env vars only SEED the first-boot default before an
|
||||||
|
# admin sets it; once the policy row exists, the admin panel wins. The "Send Digest
|
||||||
|
# Now" button works regardless. Leave blank to default to off / 6 PM.
|
||||||
|
CRM_DIGEST_ENABLED=
|
||||||
|
# Local (box-time) hour 0-23. Default 18 (6 PM).
|
||||||
|
CRM_DIGEST_SEND_HOUR=18
|
||||||
|
|
||||||
|
# ── Daily-digest sender ──
|
||||||
|
# The digest mailer prefers Gmail domain-wide delegation (the service account that
|
||||||
|
# already powers email capture; its grant includes gmail.compose, which can send) and
|
||||||
|
# falls back to SMTP below. For the Gmail/DWD path it sends impersonating this domain
|
||||||
|
# user; if unset, it uses the first active admin's email.
|
||||||
|
CRM_DIGEST_SENDER=
|
||||||
|
|
||||||
|
# ── Daily-digest outbound SMTP fallback (dev override of the per-package mailbox) ──
|
||||||
|
# On the Start9 box these are set by the "Configure Digest SMTP" action (written
|
||||||
|
# to /data/secrets/smtp/* and exported by docker_entrypoint.sh). For dev, set them
|
||||||
|
# here. SMTP_SECURITY is one of: starttls (587) | tls (465) | none.
|
||||||
|
SMTP_HOST=
|
||||||
|
SMTP_PORT=587
|
||||||
|
SMTP_SECURITY=starttls
|
||||||
|
SMTP_FROM=
|
||||||
|
SMTP_USERNAME=
|
||||||
|
SMTP_PASSWORD=
|
||||||
|
|
||||||
|
# ── Matrix intake bot (backend/matrix_intake/, runs as its own process on the Spark) ──
|
||||||
|
# Parses a typed message in a dedicated Matrix room into a proposed fundraising-grid
|
||||||
|
# add/edit (local Qwen via Spark Control above), then writes through the CRM API only
|
||||||
|
# after in-thread human approval. Reuses SPARK_CONTROL_URL / CRM_CHAT_MODEL above.
|
||||||
|
MATRIX_HOMESERVER=https://<homeserver>
|
||||||
|
MATRIX_USER=@intake-bot:<homeserver>
|
||||||
|
MATRIX_ACCESS_TOKEN=
|
||||||
|
MATRIX_DEVICE_ID=ten31-intake-bot
|
||||||
|
MATRIX_INTAKE_ROOM=!<roomid>:<homeserver>
|
||||||
|
# Dedicated room for reviewing CRM-drafted email-activity proposals (the proposed grid notes the
|
||||||
|
# Email Capture panel shows). The bot posts a review card per pending proposal here and relays the
|
||||||
|
# in-thread yes/no/edit back to the CRM, in sync with the web panel. Separate from the intake room
|
||||||
|
# so high-volume email proposals don't drown the conversational intake. Leave empty to disable the
|
||||||
|
# whole email-review poll loop. The bot must be a member of this room. Needs the server side in the
|
||||||
|
# s9pk (≥ v0.1.0:89) and the bot's CRM user set to role 'bot' (see docs/guides/matrix-intake.md).
|
||||||
|
MATRIX_EMAIL_REVIEW_ROOM=!<roomid>:<homeserver>
|
||||||
|
# Dedicated read-only Q&A room (W2): every top-level message here is answered as a natural-language
|
||||||
|
# query (translated on the box's LOCAL model — nothing leaves the box), no '?'/'@bot' trigger needed.
|
||||||
|
# The '?'/'@bot' trigger still also works in the intake room. Leave empty to disable the dedicated
|
||||||
|
# room (questions then go via the intake-room trigger). The bot must be a member of this room. Needs
|
||||||
|
# the server side in the s9pk (POST /api/query/nl) and the bot's CRM user set to role 'bot'.
|
||||||
|
MATRIX_QUERY_ROOM=!<roomid>:<homeserver>
|
||||||
|
# CRM write-back: the bot logs in as a DEDICATED service user (admin-created CRM user;
|
||||||
|
# the CRM has no service-key path, so it uses normal Bearer-JWT auth).
|
||||||
|
CRM_API_BASE=http://127.0.0.1:8080
|
||||||
|
CRM_BOT_USERNAME=
|
||||||
|
CRM_BOT_PASSWORD=
|
||||||
|
# Set to false only if CRM_API_BASE is https with a self-signed cert.
|
||||||
|
CRM_API_VERIFY_TLS=true
|
||||||
|
# Ten31 team-member names (comma-separated), fed to the parser so a teammate's name reads as
|
||||||
|
# the person DOING outreach, not the prospect ("Jonathan is chatting with Wyoming" → Wyoming).
|
||||||
|
# Optional; first names as actually used in the room. Leave empty to disable the framing.
|
||||||
|
INTAKE_TEAM_ROSTER=
|
||||||
|
|||||||
+16
@@ -2,6 +2,12 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
**/.DS_Store
|
**/.DS_Store
|
||||||
|
|
||||||
|
# ── Design provenance: keep the text artifacts (*.dc.html, store.js, *.md, tokens.json)
|
||||||
|
# but not the heavy binaries (the raw .zip export + screenshot/thumbnail PNGs). ──
|
||||||
|
design/_imports/**/*.zip
|
||||||
|
design/_imports/**/*.png
|
||||||
|
design/_imports/**/*.thumbnail
|
||||||
|
|
||||||
# ── Python ──
|
# ── Python ──
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
@@ -13,6 +19,16 @@ __pycache__/
|
|||||||
!.env.beta.example
|
!.env.beta.example
|
||||||
data/secrets/
|
data/secrets/
|
||||||
|
|
||||||
|
# ── Claude Code (deny by default, allow-list shared wiring — see standards/portability.md) ──
|
||||||
|
.claude/*
|
||||||
|
!.claude/rules/
|
||||||
|
!.claude/agents/
|
||||||
|
!.claude/commands/
|
||||||
|
!.claude/skills/
|
||||||
|
!.claude/settings.json
|
||||||
|
# launch.json is a deliberate, secret-free debug config CRM commits
|
||||||
|
!.claude/launch.json
|
||||||
|
|
||||||
# ── Local databases & SQLite sidecars ──
|
# ── Local databases & SQLite sidecars ──
|
||||||
# CRM is canonical and lives on the Start9 /data volume; never commit live LP
|
# CRM is canonical and lives on the Start9 /data volume; never commit live LP
|
||||||
# data into git (guardrail #1). Dev/synthetic DBs are local-only.
|
# data into git (guardrail #1). Dev/synthetic DBs are local-only.
|
||||||
|
|||||||
@@ -1,89 +0,0 @@
|
|||||||
# Ten31 Agentic System — Project Memory
|
|
||||||
|
|
||||||
This file is the project constitution. Read it first; it states settled decisions and non-negotiable guardrails. Where anything here conflicts with a one-off prompt, this file wins.
|
|
||||||
|
|
||||||
## What we're building
|
|
||||||
|
|
||||||
Ten31 is an investment platform (bitcoin ecosystem, energy, AI infrastructure, freedom tech) that raises from LPs and deploys into private companies. We are building an in-house system of AI agents to widen the fundraising funnel, sharpen and propagate our investment thesis, and automate marketing/branding. Build agents on the **Claude Agent SDK**, connected to our systems via **MCP**. Frontier reasoning runs on **Claude**; privacy-sensitive and high-volume work runs on **local models on our DGX Sparks**, fronted by **Spark Control**.
|
|
||||||
|
|
||||||
Full architecture and rationale: see `@./docs/Ten31_Agentic_Build_Plan.md`.
|
|
||||||
Current phase tasks and acceptance criteria: see `@./docs/PHASE_0.md`.
|
|
||||||
Embedding/retrieval API contract + ingest recipe (authoritative): see `@./docs/EMBEDDINGS.md`.
|
|
||||||
|
|
||||||
**We are in Phase 0.** Phase 0 builds the data + retrieval substrate. There are NO live, outward-facing agents in Phase 0.
|
|
||||||
|
|
||||||
## Settled architecture
|
|
||||||
|
|
||||||
- **Reasoning model:** Claude via the Agent SDK / API (API-key auth, not claude.ai login).
|
|
||||||
- **Local models (Sparks, via Spark Control gateway):**
|
|
||||||
- Chat/triage: Qwen3.6 35B-A3B on Spark 1.
|
|
||||||
- Embeddings: `BAAI/bge-m3` (dense, 1024-dim, L2-normalized) → `/v1/embeddings` (OpenAI shape).
|
|
||||||
- Reranker: `BAAI/bge-reranker-v2-m3` (cross-encoder) → `/v1/rerank` (Cohere shape).
|
|
||||||
- Served by **spark-embed**, a small FastAPI server on Spark 2 (NGC PyTorch image — *not* HF TEI, which ships no arm64 CUDA image). Shipped in Spark Control v0.15.0.
|
|
||||||
- Audio: transcription + diarization + TTS on Spark 2.
|
|
||||||
- **Canonical data store:** the self-built CRM on the Start9 server. This is the single source of truth for LP/prospect data.
|
|
||||||
- **Vector index:** Qdrant v1.16.0 on Spark 2 (ports 6333/6334). Derived and rebuildable from the CRM (~8–15 min full re-embed) — NOT a second source of truth. But it holds the only *live* copy of the index, so it is never auto-restarted; the ingest pipeline must be idempotent so a rebuild is always safe.
|
|
||||||
- **Retrieval:** one orchestrated call, `POST /api/search` (embed query → Qdrant dense+sparse RRF with payload pre-filter → cross-encoder rerank → top_k). The sparse/BM25 leg is generated **client-side** with FastEmbed (`Qdrant/bm25`) at both ingest and query time, with Qdrant applying IDF over our own corpus — so exact entity/name matching is weighted by our term statistics, not bge-m3's pretrained sparse. Authoritative contract + ingest recipe: `@./docs/EMBEDDINGS.md`.
|
|
||||||
- **Gateway:** Spark Control (on Start9) fronts all local model services behind one trusted URL with shared TLS, access control, and observability.
|
|
||||||
|
|
||||||
## Environment & services
|
|
||||||
|
|
||||||
- All local model calls go through **Spark Control**, never directly to a Spark.
|
|
||||||
- Endpoints: `/v1/chat/completions`, `/v1/embeddings`, `/v1/rerank`, `/api/search` (orchestrated hybrid retrieval), `/v1/audio/transcriptions`, `/v1/audio/speech`.
|
|
||||||
- **Secrets live in `.env` (gitignored). Never commit secret values.** Required variables (names only):
|
|
||||||
- `ANTHROPIC_API_KEY`
|
|
||||||
- `SPARK_CONTROL_URL` — gateway for `/v1/embeddings`, `/v1/rerank`, `/api/search` (reads + dense embeds)
|
|
||||||
- `QDRANT_URL` — direct Qdrant on Spark 2 (`http://<spark2>:6333`) for collection admin + ingest upserts
|
|
||||||
- `X_API_KEY` — the X (Twitter) API key for Scout/Analyst enrichment. **Note:** this is *not* a CRM auth key; the CRM has no service-key/API-key path today (see below).
|
|
||||||
- CRM connection vars:
|
|
||||||
- `CRM_DB_PATH` — absolute path to the SQLite file (default `<CRM_DATA_DIR>/crm.db`). The CRM has **no network DB protocol** — ingest "connects" by opening this file directly (read-only, `mode=ro`), co-located with the Start9 `/data` volume.
|
|
||||||
- `CRM_DATA_DIR` — the `/data` volume root (holds `crm.db`, `backups/`, `secrets/`, `email_attachments/`).
|
|
||||||
- `CRM_BASE_URL` — `http://<host>:8080` (env `CRM_HOST`/`CRM_PORT`), for any HTTP access to the running CRM.
|
|
||||||
- `CRM_SECRET_KEY` — the CRM's own JWT signing secret (set on the Start9 deployment, persisted at `/data/.crm-secret`); only needed if the MCP server authenticates over HTTP rather than reading SQLite directly.
|
|
||||||
- A `.env.example` lists the variable names with empty values.
|
|
||||||
|
|
||||||
## The agents (target roster — built in later phases)
|
|
||||||
|
|
||||||
- **Scout** — monitors public sources (X via API, filings, etc.); flags trigger events. (Phase 2)
|
|
||||||
- **Analyst** — builds LP dossiers, enriches records, maps warm-intro paths. (Phase 2)
|
|
||||||
- **Architect** — owns/refines the canonical thesis; collaborative copilot. (Phase 1)
|
|
||||||
- **Scribe** — distributes the thesis as content across channels. (Phase 1)
|
|
||||||
- **Closer** — drafts outreach, nurture, meeting prep. Humans approve/send everything. (Phase 3)
|
|
||||||
- **Orchestrator** — schedules and routes work; picks per-agent retrieval modes. (Phase 3)
|
|
||||||
|
|
||||||
## Division of labor
|
|
||||||
|
|
||||||
- **Spark developer (separate):** TEI serving (BGE-M3 + reranker) and Qdrant on Spark 2, exposed via Spark Control `/v1/embeddings` + `/v1/rerank`.
|
|
||||||
- **This repo (Claude Code + the partners):** CRM schema extensions, ingest/sync pipeline, CRM MCP server, retrieval-mode library, and (later phases) the agents.
|
|
||||||
|
|
||||||
## Guardrails — NON-NEGOTIABLE
|
|
||||||
|
|
||||||
1. **Sovereignty.** Sensitive LP and relationship data stays on our infrastructure (Start9 + Sparks). Send only the minimum necessary, non-sensitive context to the Claude API. Never bulk-export the LP list to any third party.
|
|
||||||
2. **CRM is canonical.** Qdrant and any other store are derived. Never treat a derived index as the source of truth; never let them silently diverge.
|
|
||||||
3. **No destructive data ops.** Never hard-delete CRM records or history. Soft-delete/archive only. Migrations must be reversible and reviewed before running.
|
|
||||||
4. **Human-in-the-loop on anything outbound.** No agent sends email, posts publicly, or contacts an LP/prospect autonomously. Agents draft; a partner approves and sends. (Especially Closer and Scribe.)
|
|
||||||
5. **Log every agent action** to the interaction log, for compliance and debugging.
|
|
||||||
6. **Compliance gate before Phase 3.** No cold/outbound capability ships until counsel has defined solicitation posture (e.g. 506(b) vs 506(c)), accreditation/QP verification, and recordkeeping rules.
|
|
||||||
7. **Secrets never committed.** Use `.env` / a secrets store. No keys, tokens, or credentials in code, configs, or docs.
|
|
||||||
8. **Enrichment is one-way and public.** Per-prospect public lookups that write INTO the CRM; never push our data outward.
|
|
||||||
9. **Development data handling — keep real LP data out of Claude during the build.** Claude Code (the engineering partner) runs on the Anthropic API, so anything it reads is sent to a third party. Therefore Claude Code works only on **code, the schema, and synthetic or properly-redacted data** — never the real LP list, live records, or raw note/email prose. The real backfill and ingest **run on Ten31 infrastructure** (Start9 + Sparks) via **local models**; sensitive rows are never pasted into a Claude Code session or sent to the Claude API during development. To produce a realistic test corpus, redact/pseudonymize a copy **on the Sparks** (local) — do not hand-feed real records to Claude to "clean up." This is the same sovereignty boundary as guardrail #1, applied to the engineering workflow itself.
|
|
||||||
|
|
||||||
## Conventions
|
|
||||||
|
|
||||||
*Filled in from the CRM code (2026-06). Full detail: `@./docs/crm-overview.md`.*
|
|
||||||
|
|
||||||
- **Language / runtime:** Python 3.11, standard library only at runtime. The CRM is one file, `backend/server.py` (~4.5k lines): a stdlib `http.server.ThreadingHTTPServer` + hand-written `CRMHandler` with manual path dispatch. **Not** FastAPI — `backend/requirements.txt` lists FastAPI/SQLAlchemy/Alembic/Pydantic but **none are imported** (vestigial). The only non-stdlib runtime deps are optional `bcrypt`/`jwt` and (for the Gmail module) `cryptography`.
|
|
||||||
- **Storage:** a single SQLite DB (`data/crm.db`), WAL mode, `foreign_keys=ON`, opened per-request via `get_db()`. Two parallel investor models coexist (classic `contacts`/`lp_profiles` + the `fundraising_*` grid) — see `docs/crm-overview.md` §2.3; reconciling them to canonical IDs is the core Phase-0 entity-resolution task.
|
|
||||||
- **Migrations:** **additive and reversible only.** Core schema uses ordered `backend/migrations/NNNN_*.sql` files applied once at startup by `backend/core_migrations.py`, tracked in a `schema_migrations` ledger; ship a paired `NNNN_*.down.sql` for rollback. (The Gmail module has its own runner under `backend/email_integration/migrations/`.) SQLite ALTER is add-column/rename only — which enforces the additive guardrail.
|
|
||||||
- **Run locally:** `./start.sh` (dev defaults, port 8080). `./start_beta.sh` for a Tailscale/production-mode launch (requires `CRM_SECRET_KEY`). No build step.
|
|
||||||
- **Tests / lint:** none in-repo. Sanity-check edits with `python3 -m py_compile backend/server.py`. Verify migrations against a *copy* of `crm.db`, never production.
|
|
||||||
- **Production:** Start9 package `ten-database`. **`start9/0.4/` is the live target** (TypeScript SDK manifest under `start9/0.4/startos/`); `start9/0.3.5/` (YAML manifest) is the superseded prior generation. All state on the persistent `/data` volume.
|
|
||||||
- **Auth:** username/password → HS256 JWT (Bearer header), two roles (`admin`/`member`), no row-level authorization. `X_API_KEY` (in this file's env list) is the *X/Twitter* key — there is **no CRM service-key path in code**; an MCP/ingest client must read SQLite directly or authenticate as a real CRM user.
|
|
||||||
- Prefer clear, reviewable changes over cleverness. Keep the ingest pipeline and MCP server modular so retrieval modes and sources can be added without rewrites.
|
|
||||||
|
|
||||||
## First actions for a new session
|
|
||||||
|
|
||||||
1. Read `@./docs/PHASE_0.md` and `@./docs/EMBEDDINGS.md` (the latter is the authoritative embedding/retrieval contract and ingest recipe).
|
|
||||||
2. Read the CRM source in the repo; produce a short written summary of the storage engine, schema, and API surface, and fill in the Conventions section above and the CRM env vars.
|
|
||||||
3. Confirm Spark Control is reachable and `/v1/embeddings`, `/v1/rerank`, and `/api/search` respond (these shipped in v0.15.0; check `GET /api/endpoints`).
|
|
||||||
4. Proceed through the Phase 0 workstreams in order. Do not build any outward-facing agent behavior in Phase 0.
|
|
||||||
@@ -0,0 +1,79 @@
|
|||||||
|
# Evaluation — CRM (Ten31 Venture CRM + Agentic System) — 2026-06-12
|
||||||
|
|
||||||
|
Intent: A self-hosted venture-fund CRM for Ten31 (replacing Airtable to keep sensitive LP/prospect data off third-party servers) managing contacts/organizations/opportunities/communications/LP profiles, with a new in-house AI/agentic layer that drafts outreach and sharpens the investment thesis — packaged as a StartOS s9pk, currently Phase 0/1 (agents draft, humans send).
|
||||||
|
|
||||||
|
Agents run: evaluator, security-auditor, exerciser, start9-spec-checker. Skipped: reviewer (working tree is clean — no uncommitted diff to review).
|
||||||
|
|
||||||
|
## Verdict
|
||||||
|
|
||||||
|
This is a functional, deliberately stdlib-only CRM (one ~5,400-line `backend/server.py` monolith over SQLite) with a genuinely well-engineered privacy layer bolted on; it largely achieves its intent, and the redaction boundary that keeps LP data out of Claude is the strongest part of the codebase. **The headline risk is a single P0: an unauthenticated path-traversal in the `/assets/` route (`backend/server.py:1717`) that runs *before* the auth gate and lets any LAN/Tailnet client read arbitrary files — the live LP database, the JWT signing secret (→ forge an admin token), and the Gmail service-account private key.** A separate P1 punches a hole in the privacy premise from the other side: the outreach drafter sends raw email bodies to Claude with the NER backstop disabled, leaking any non-CRM names in cleartext. The project's own test suite is not green today — two thesis tests fail against the shipped v0.1.0:73 seed (stale assertions, not a runtime bug). Net: not ready to trust as "private" until the traversal and the outreach leak are closed; everything else is hygiene or polish.
|
||||||
|
|
||||||
|
## Cross-referenced findings
|
||||||
|
|
||||||
|
- **Path traversal (the P0).** Found independently by the **evaluator** (P1, `server.py:1717-1718`) and the **security-auditor** (P0, escalated because the same read also yields `/data/.crm-secret` → admin-token forgery and the Gmail DWD key). The **exerciser** ran a path-traversal probe and reported "blocked" — a **false negative**: `curl` collapses `../` client-side, so the test never sent a literal `..`. The two code-reading agents are correct (`get_path()`/`urlparse` does not normalize `..`; a raw client like `curl --path-as-is` reaches it). Merged as **one P0** with the auditor's higher severity, on the strength of the secret/key exposure.
|
||||||
|
- **Two thesis tests fail.** The **evaluator** (P1) and the **exerciser** (P2) independently reproduced and root-caused the same failure: `test_thesis_seed.py` / `test_thesis_actions.py` assert the `positioning` variant_group has 2 members, but `ensure_positioning_framings` (added after the tests) seeds 5 more → 7. Merged as one **P2** (no runtime impact; but "all tests pass" is false today).
|
||||||
|
- **`X-Forwarded-For` trusted** for rate-limit/ban keying — flagged by both **evaluator** (P3) and **auditor** (P3). One P3.
|
||||||
|
- **CORS default `*`** — flagged by both **evaluator** (P3) and **auditor** (P3); both note it's benign today (Bearer auth, no cookies). One P3.
|
||||||
|
|
||||||
|
## Priority queue
|
||||||
|
|
||||||
|
- [P0] Unauthenticated path traversal in `/assets/` → arbitrary file read (LP DB, JWT secret → admin forgery, Gmail key); runs before auth — `backend/server.py:1717-1732`, `1671-1681` — evaluator + security-auditor (exerciser's "blocked" was a false negative)
|
||||||
|
- [P1] Outreach drafter sends raw email bodies to Claude with NER backstop disabled (`Boundary(...)` built with no `ner_fn`), leaking non-CRM names — contradicts documented fail-closed design — `backend/mcp/outreach_agent.py:230`, `106-118`, `151-170` — security-auditor
|
||||||
|
- [P1] Soft-deleted contacts and organizations remain fully readable by direct ID (`GET /api/{contacts,organizations}/{id}` omit `deleted_at IS NULL`) — exerciser
|
||||||
|
- [P2] Two thesis tests fail against the shipped seed (stale 2-vs-7 member assertion) — `backend/test_thesis_seed.py:50`, `backend/test_thesis_actions.py:40` — evaluator + exerciser
|
||||||
|
- [P2] Non-integer query params (`?limit=abc`, `?offset=abc`) raise unhandled `ValueError` and crash the request thread → connection reset, no error body (8 list endpoints) — exerciser
|
||||||
|
- [P2] 5,383-line single handler class with 6 near-identical copy-pasted CRUD update blocks — raises cost of safe change — `backend/server.py:1523`, `2267-2293` — evaluator
|
||||||
|
- [P2] Frontend loads React + Babel-standalone from `unpkg.com` with no SRI — offline-fragile (contradicts data-sovereignty premise) + unpinned supply chain + in-browser transpile cost — `frontend/index.html:9-11` — evaluator
|
||||||
|
- [P2] TLS verification disabled on the scrub-gateway path (`CERT_NONE`, `check_hostname=False`); ships pre-wired, MITM exposes the LP-name dictionary if gateway backend enabled — `backend/ingest/http_util.py:11-17`, `docker_entrypoint.sh:86` — security-auditor
|
||||||
|
- [P2] `cryptography==42.0.5` shipped in image carries a known bundled-OpenSSL advisory (used for Gmail RS256) — bump to ≥43 — `start9/0.4/Dockerfile:50` — security-auditor
|
||||||
|
- [P2] `assets/ABOUT.md` is stale and user-visible on the box: claims first-boot seeds the volume (removed in v0.1.0:40) — fresh install shows an empty CRM with no explanation — `start9/0.4/assets/ABOUT.md:9` — start9-spec-checker
|
||||||
|
- [P2] Hardcoded LAN IPs (`192.168.1.72` Spark, `192.168.1.87` Qdrant) compiled into the s9pk (16 occurrences) — network change forces edit+rebuild+reinstall — `start9/0.4/startos/actions/*.ts`, `docker_entrypoint.sh:85-87` — start9-spec-checker
|
||||||
|
- [P3] `X-Forwarded-For` trusted verbatim for rate-limit/ban keying — spoofable to evade ban or poison another IP's bucket — `backend/server.py:1588-1592` — evaluator + security-auditor
|
||||||
|
- [P3] CORS default `*` — benign with Bearer auth today, pin in prod — `backend/server.py:81` — evaluator + security-auditor
|
||||||
|
- [P3] DB connections closed only on success paths (no `try/finally`) — hygiene, not a leak in practice — `backend/server.py` (many handlers) — evaluator
|
||||||
|
- [P3] `get_body` reads `Content-Length` bytes with no max-size cap → memory-exhaustion DoS on write routes — `backend/server.py:1538-1554` — security-auditor
|
||||||
|
- [P3] Container runs as root (no `USER` in Dockerfile) — drop privileges — `start9/0.4/Dockerfile` — security-auditor
|
||||||
|
- [P3] `requirements.txt` lists ~12 vulnerable/unused deps (e.g. python-jose 3.3.0, CVE-2024-33663) — not imported at runtime, but a trap if ever installed — `backend/requirements.txt` — security-auditor
|
||||||
|
- [P3] `gmailResult.gmail_url` rendered into an `href` without scheme validation (server-generated today) — `frontend/index.html:10134` — security-auditor
|
||||||
|
- [P3] No uniqueness constraint on `contacts.email` — duplicates silently accepted — exerciser
|
||||||
|
- [P3] Create-opportunity does not validate the `stage` field (only the PATCH stage route does) — arbitrary stage strings stored — exerciser
|
||||||
|
- [P3] No server-side length limits on text fields (10k-char names accepted, stored, and exported) — exerciser
|
||||||
|
- [P3] `POST /api/fundraising/log-communication` ignores a valid `investor_id` and demands `row_id`/`investor_name` — grid-vs-contacts model mismatch — exerciser
|
||||||
|
- [P3] Deprecated `datetime.utcnow()` calls emit warnings (will break on a future Python) — exerciser
|
||||||
|
- [P3] `npm audit`: fast-xml-parser/-builder transitive advisories (build-time only, via start-sdk) — `npm audit fix` — security-auditor
|
||||||
|
- [P3] `anthropic` dependency unpinned — reproducibility/supply-chain gap — security-auditor
|
||||||
|
- [P3] Manifest declares `aarch64` but no native arm image is built (runs via QEMU; `fastembed`/`mcp` unverified on arm) — drop the arch or build it — `start9/0.4/startos/manifest/index.ts:23` — start9-spec-checker
|
||||||
|
- [P3] `start9/0.4/README.md:39-40` describes the removed seed mechanism (developer-facing) — start9-spec-checker
|
||||||
|
- [P3] `packageRepo`/`upstreamRepo` manifest URLs 404 (private/nonexistent) — fine for private use, fails registry validation — start9-spec-checker
|
||||||
|
- [P3] Stale `start9/0.4/javascript.tmp.1776377780/` build artifact on disk (gitignored, harmless) — start9-spec-checker
|
||||||
|
|
||||||
|
## Scorecard
|
||||||
|
|
||||||
|
The evaluator's six-lens table, unadjusted (other agents' evidence reinforces but does not contradict it):
|
||||||
|
|
||||||
|
| Lens | Score /5 | Justification |
|
||||||
|
|---|---|---|
|
||||||
|
| Architecture | 3 | Clean module separation (`redaction`/`ingest`/`mcp`/`email_integration`) + consistent central dispatch, but all CRM logic in one 5,383-line handler — `server.py:1523`. |
|
||||||
|
| Security | 2 | Strong auth crypto (pbkdf2 200k, `compare_digest`, pinned HS256) and clean secrets hygiene, undermined by the P0 traversal + the P1 outreach leak the auditor added — `server.py:1717`, `outreach_agent.py:230`. |
|
||||||
|
| Performance | 4 | Per-request connections + WAL + 42 indexes + locked abuse state, right-sized for ~400 records / 5 users. |
|
||||||
|
| Testing | 3 | 13 fast isolated tests covering the hard parts (redaction leak-hunts, grounding boundary), but 2 are red against the current seed. |
|
||||||
|
| Code quality | 3 | Comments explain *why* well; the monolith and copy-pasted CRUD blocks raise change cost. |
|
||||||
|
| Documentation | 5 | AGENTS.md + 6 scoped guides are accurate and verifiable (the "FastAPI is vestigial" claim checks out — zero runtime imports). |
|
||||||
|
|
||||||
|
Note: the auditor's P0 escalation and added P1 leak both land on the Security lens; they corroborate the score of 2 rather than push it lower (it was already the floor of the table).
|
||||||
|
|
||||||
|
## Disagreements & gaps
|
||||||
|
|
||||||
|
- **Path traversal — exerciser vs. the two readers.** The exerciser reported path traversal "blocked"; the evaluator and auditor both found it exploitable by code reading. Resolution: the exerciser's tool (`curl`) normalized `../` before sending, so the probe never tested the vuln — it is **real and P0**. Lesson: black-box probes for traversal must use a raw, non-normalizing client (`curl --path-as-is`).
|
||||||
|
- **Shared blind spot — the differentiating Phase-1 AI paths are unverified at runtime.** No agent could exercise live Claude/Anthropic calls (no `ANTHROPIC_API_KEY` in env), Qdrant ingest (`/api/index/*`), or Gmail draft creation. So `/api/outreach/draft`, `/api/architect/ground`, and thesis generation were reached only in their degraded/no-key form. The P1 outreach-leak finding is from code reading, not a live capture — confidence is high but a live request would confirm it. This is the one gap every agent shares.
|
||||||
|
- **StartOS spec — two UNVERIFIED items.** The spec-checker could not confirm whether 0.4 still requires `instructions.md`/`prepare.sh` (docs pages 404'd) or measure the expanded image size; `start-sdk verify` failed only because the machine has the wrong-era (0.3.5) binary — `start-cli s9pk inspect` succeeds and the artifact is valid. No packaging blocker for private sideload.
|
||||||
|
|
||||||
|
## Suggested order of work
|
||||||
|
|
||||||
|
1. **Close the P0 traversal first** — `os.path.realpath` + `commonpath` containment check (or stdlib `translate_path`) on the `/assets/` branch in `backend/server.py:1717`, and confirm with `curl --path-as-is 'http://host/assets/../../data/crm.db'`. Until this is fixed the "private" claim is false; do not deploy.
|
||||||
|
2. **Fix the P1 outreach leak** — pass `ner_fn=_ner_local` to the `Boundary` in `outreach_agent.py:230` (mirroring `architect_grounding.py`), fail closed if NER is unreachable, add a minimize-first pass. Both #1 and #2 directly protect the LP data the project exists to protect.
|
||||||
|
3. **Make the test suite green and authoritative** — fix the two stale thesis assertions (assert structurally, not on an exact count) and add a one-line aggregate runner so "do the tests pass" has a single answer; then the suite can gate the next deploy.
|
||||||
|
4. **Fix the two functional bugs** — add `AND deleted_at IS NULL` to the get-by-ID handlers (P1), and wrap query-param `int()` parsing to return a 400 instead of crashing the thread (P2).
|
||||||
|
5. **Deploy-prep the package** — update `ABOUT.md` (and README) to current first-boot behavior so a fresh install isn't a mystery empty CRM, and lift the hardcoded Spark/Qdrant IPs into config/env before the box ever moves networks.
|
||||||
|
6. **Then verify the live Phase-1 paths on the box** with a real `ANTHROPIC_API_KEY` — the outreach/architect/thesis features that no agent could exercise here.
|
||||||
|
7. **Hardening sweep** (P3 batch) — bump `cryptography`, drop the vestigial vulnerable `requirements.txt` entries, stop trusting `X-Forwarded-For`, cap request-body size, run as non-root.
|
||||||
+438
-4
@@ -12,9 +12,6 @@
|
|||||||
- `GET /api/fundraising/backups`
|
- `GET /api/fundraising/backups`
|
||||||
- `GET/PATCH /api/fundraising/backup-policy`
|
- `GET/PATCH /api/fundraising/backup-policy`
|
||||||
- `GET /api/fundraising/relational-summary`
|
- `GET /api/fundraising/relational-summary`
|
||||||
- `GET /api/feature-requests`
|
|
||||||
- `POST /api/feature-requests`
|
|
||||||
- `PATCH /api/feature-requests/:id`
|
|
||||||
- New DB tables:
|
- New DB tables:
|
||||||
- `fundraising_state`
|
- `fundraising_state`
|
||||||
- `fundraising_investors`
|
- `fundraising_investors`
|
||||||
@@ -22,7 +19,6 @@
|
|||||||
- `fundraising_funds`
|
- `fundraising_funds`
|
||||||
- `fundraising_commitments`
|
- `fundraising_commitments`
|
||||||
- `fundraising_views`
|
- `fundraising_views`
|
||||||
- `feature_requests`
|
|
||||||
- `app_settings`
|
- `app_settings`
|
||||||
- Grid saves/restores now sync into relational fundraising tables automatically.
|
- Grid saves/restores now sync into relational fundraising tables automatically.
|
||||||
- Formula engine is now sandboxed (no `eval`/`new Function`) with expanded function support.
|
- Formula engine is now sandboxed (no `eval`/`new Function`) with expanded function support.
|
||||||
@@ -84,6 +80,444 @@
|
|||||||
3. Email/communication integrations (optional)
|
3. Email/communication integrations (optional)
|
||||||
4. Granular permissions (if team grows)
|
4. Granular permissions (if team grows)
|
||||||
|
|
||||||
|
## Backlog (post-Phase-1 agentic)
|
||||||
|
|
||||||
|
### Data-model cleanups (deferred from the v0.1.0:104 session)
|
||||||
|
|
||||||
|
- **Retire `contacts.contact_type`** (the Contacts Investors/Prospects tabs + TYPE badge). It's a legacy binary that's set mechanically — `'investor'` just means "exists in the grid" (stamped unconditionally by `_upsert_contact_from_fundraising`), `'prospect'` means "imported/added, not in the grid" — and is superseded by the grid-derived signals `contact_grid_signals()` already injects (`existing_investor`/`committed`, `pipeline_stage`). Plan: replace the tabs + TYPE badge with those signals, repoint the dashboard `total_lps`/`total_prospects` counts, then drop the column. Live UI change → its own small design pass. (Grant: "I want to delete it, next session.")
|
||||||
|
|
||||||
|
- **Consolidate `contacts` ↔ `fundraising_contacts` into one linked model.** Goal (Grant): everyone in `contacts` maps to a `fundraising_investors` row (an individual maps to their own row). Today `contacts` is the canonical person directory (FK target for `communications`/`opportunities`); `fundraising_contacts.contact_id` (migration `0004`) points INTO it; the mobile Contacts page reads `contacts`. Three populations: **A** linked (grid pill ↔ contact), **B** `contacts`-only (imported prospects / manual adds — need a grid row), **C** pill-only (`fundraising_contacts.contact_id IS NULL` — need a contact row). **Census-first:** before designing any migration, count A/B/C on the box — Grant runs the SQL himself (he is **not** providing a DB copy), so hand him a counts-only script. The census decides whether this is a ~20-row cleanup or a ~300-row structural migration with `communications`/`opportunities` repointing. Then Grant reconciles B (add grid rows/pills) and C (add contact rows) and ensures all are linked. **(v0.1.0:105) A TEMPORARY admin census ships to read A/B/C off the box without shell access: `GET /api/admin/contacts-census` (`handle_contacts_census`) + a Settings → Admin "Run census" button, mirroring `backend/scripts/contacts_census.sql` (counts only). DELETE the endpoint + route + button after the numbers are captured — all tagged `TEMPORARY` in code.**
|
||||||
|
|
||||||
|
### Captured tweaks (Matrix, 2026-06-18/20)
|
||||||
|
*Small UI/UX + capture-quality items captured via Matrix; not yet scheduled.*
|
||||||
|
|
||||||
|
- **[P2] Backup history (Settings) defaults minimized, chevron-expand, pinned to the bottom** — it's rarely viewed, so it shouldn't take prime space. Frontend-only (`frontend/index.html`). (2026-06-18)
|
||||||
|
- **[P2] Preserve the active tab across a page refresh** — a refresh currently snaps back to the top/default tab. Persist the selected tab (e.g. localStorage / URL hash) and rehydrate on load. Frontend-only. (2026-06-18)
|
||||||
|
- **[P2] Email capture matches an investor on `To:`/`From:` only, not `Cc:`** — today if an investor's address appears anywhere on a message landing in a team mailbox (including when a teammate is merely cc'd on an outbound reply to the investor), it logs a spurious "received from investor" entry. Restrict the investor-link match to the to/from headers so a cc doesn't create a phantom inbound note. `backend/email_integration/` matching (see `docs/guides/email.md`). (2026-06-20)
|
||||||
|
|
||||||
|
### Follow-ups/reminders + NL search + bot grid-mutations (agreed plan, 2026-06-18)
|
||||||
|
*Agreed with Grant 2026-06-18. Three workstreams, sequenced **W1 → W2 → W3**. **Overarching constraint (Grant):** the dominant risk is **leaking LP data (names, $, notes, contacts) to third-party LLMs — NOT write-safety.** A wrong number is recoverable; investor substance reaching Claude is not. Consequences: W2 keeps LP rows off Claude (only the question text + schema vocabulary leave the box; entity names resolved locally); W3 keeps bot mutation-parsing on local Qwen. Because this DB *logs* commitments/pipeline but doesn't move money, a bot mutation is low-stakes → **any team member may approve one in Matrix**; the guardrail is "the bot can't silently mass-change numbers," enforced by the per-mutation human approval gate, not a tight money gate.*
|
||||||
|
|
||||||
|
**W1 — Reminders & follow-ups — BUILT + tested locally 2026-06-18 (v0.1.0:92, deploy pending).** First-class tickler tied to the grid: `reminders` table (in-app migration `0006`; logical FK to `fundraising_investors.id` + denormalized name, like `0005`), full CRUD (`GET/POST/PATCH/DELETE /api/reminders`; soft-delete; status open/done/snoozed/cancelled; assignee; `source` human/bot/automation), a read-only **derived `reminder_status` grid column** (overdue/due_soon/open — injected like `pipeline_stage`, **filterable so the follow-up view can later key off reminders instead of the binary `follow_up` checkbox**, per Grant), an orphan reconciler (`reconcile_grid_reminders` — cancels reminders when their investor leaves the grid, the pipeline reconciler's twin), a **Reminders** nav page (filter/complete/snooze/edit/delete + create), a Dashboard **"Reminders Due"** card, a **"Reminders due"** daily-digest section, and a per-investor **`last_activity_at`** recency rollup (the shared building block W2's "not nurtured" query needs). Pure local CRM — no LLM path, no leak surface. Tests: `test_reminders.py` + digest reminders test; **31/31 suite green, render-smoke green**. **Deploy:** needs an s9pk build + install (version bumped to 92); get authorization first.
|
||||||
|
- **W1b (deferred fast-follow):** nurture-gap automation — a daily job flags "committed / in-pipeline + no activity in N days + no open reminder" → auto-suggests a reminder (`source='automation'`, human confirms). Build once the recency rollup is proven in practice.
|
||||||
|
- **Left untouched (deliberate):** the grid `follow_up` checkbox + automation list-memberships, and `communications.next_action_date` + `/api/outreach/radar` — reminders are the new richer layer; folding those into it is a later cleanup, not now.
|
||||||
|
|
||||||
|
**W2 — Natural-language query (read-only). BACKEND BUILT + tested + validated locally 2026-06-18; web/Matrix UI pending.** = the **"Email/communication search + NL query → item 3 (NL→safe structured query)"** below, now sequenced second and **redesigned** (see below). Subsystem detail: `docs/guides/nl-query.md`.
|
||||||
|
|
||||||
|
- **Approach changed from the original "Claude behind redaction + a validated filter-AST" to LOCAL-ONLY + a named-intent catalog (decided with Grant 2026-06-18).** Rationale: (a) the dominant risk is LP data reaching a vendor — running translation on the **local Qwen via Spark Control** keeps the question on the box entirely (same basis as intake/digest), so there is **no Claude path and no redaction boundary** to manage, which is both simpler and safer; (b) a generic SQL/AST compiler was over-built for the real need — instead there are **~12 curated, hand-written, parameterized "named queries"** (`backend/nl_query/intents.py`) each with typed slots, and the **slot validator** (`runner.validate`) is the whole trust boundary (no dynamic identifiers, no raw SQL). The LLM only maps a question → `{intent, slots}`; its output is still validated, so a hallucinated intent is rejected. **Results never go to any model** (deterministic local render). Both design choices were pressure-tested by independent review agents before building.
|
||||||
|
- **As built:** `backend/nl_query/` (`intents.py` catalog, `runner.py` validator/executor + audit, `translate.py` local-Qwen translator, `try_questions.py` dev harness). `POST /api/query/nl` (`{question}` or direct `{intent,slots}`) + `GET /api/query/catalog`, `require_bot_or_admin`, read-only, audited (`audit_log` `entity_type='nl_query'`). Soft-delete-correct per table (`fundraising_*` has no `deleted_at` — `graveyard` is the axis; emails via a live `email_account_messages` sighting; reminders/opps/comms via `deleted_at`). Builds on W1's `last_activity_at`. Tests: `nl_query/test_nl_query.py` + `test_translate.py` + `test_nl_query_endpoint.py` (34/34 suite green).
|
||||||
|
- **Validation:** the local Qwen translated **12/12 of Grant's real example questions** correctly (right intent + slots, incl. "3 months"→90, sent/received→direction) against the live Spark — settles local-only; Claude not needed. Translation quality on messy/typo/no-match inputs shakes out in live use.
|
||||||
|
- **Remaining:** **step 4** = web "Ask" box in the Communications tab (calls the endpoint, renders rows + the interpreted query); **step 5** = Matrix `@bot <question>` (thin client of the endpoint; the 2-admin review room means a full-book dump is acceptable, so no bulk-result cap — only a light anti-flood truncation). Reads need no approval gate. Then deploy with reminders (v92) as **v0.1.0:93**.
|
||||||
|
|
||||||
|
**W3 — Bot grid-mutations behind a Matrix approval gate.** Generalize the email-proposal scaffold (`email_proposal_matrix` + propose→post→decide→apply) into one `agent_proposals` table (kind discriminator + JSON payload + target). Bot proposes set-commitment / assign-fund / change-stage / set-reminder; a human approves/edits/rejects in Matrix (**any member**); then apply. **Surgical, version-checked mutations — never blob RMW:** stage rides the existing `opportunities` link + validated stage endpoint; reminders write the W1 table; set-commitment/assign-fund need a version-checked single-cell upsert into the grid blob. Triggers the deferred **scoped service-token** item below (per-mutation-kind allowlist on the bot credential; money/merge/delete always require human approval regardless of scope — the autonomy axis). Parse on local Qwen, not Claude.
|
||||||
|
|
||||||
|
|
||||||
|
### Matrix-bridge intake for the fundraising grid — M1+M2+M3 BUILT & LIVE
|
||||||
|
*Requested 2026-06-16. **M1+M2 live since v0.1.0:86 (2026-06-17); M3 (business-card photo) shipped & live 2026-06-20** — code in `backend/matrix_intake/`, guide at `docs/guides/matrix-intake.md`. M3 unblocked once the Spark Control daily-driver model became vision-capable: the bot OCRs a card via Spark Control's `/v1/chat/completions` multimodal passthrough (same `CRM_CHAT_MODEL`), then runs the existing intake flow; captures contact name/email/title/city/LinkedIn/phone/mobile (server half of phone/mobile = s9pk v0.1.0:98). Remaining: ongoing on-device card testing (OCR accuracy on small-in-frame cards). Next major build is **Pipeline adoption** (see below).*
|
||||||
|
|
||||||
|
Use the **matrix-bridge** repo's pattern to listen on a dedicated ten31-database Matrix room. Send a message (with an optional business-card photo) and a local LLM **via Spark Control** parses it into the fundraising-grid schema and **auto-creates the investor entity + contact row**. For an existing investor, send a meeting note and it **appends an interaction-log entry**. Approval gate: the bot replies in Matrix with the proposed add/edit; the user approves / rejects / edits in-thread before the write commits (keeps the draft→human-approve guardrail).
|
||||||
|
- Fits the "grid is canonical" direction (writes land in `fundraising_*`) and the never-send-autonomously rule (in-thread human approval before any write).
|
||||||
|
|
||||||
|
**Locked design (2026-06-16, approved) — build now, M1 then M2:**
|
||||||
|
- **Separate component, shared scaffold:** new `backend/matrix_intake/` (its own process; lifts matrix-bridge's connect/prime-then-listen/threaded-reply plumbing). `matrix-nio` is isolated to this component's `requirements.txt` — it never enters the stdlib CRM runtime. Keeps the CRM write credential + LP data out of the general-purpose matrix-bridge bot (blast-radius + data-sovereignty), and lets the two iterate independently. Runs on the Spark (placement settled against `standards/guides/placement.md` at deploy).
|
||||||
|
- **~~v1 = text-only~~ — M3 business-card photo SHIPPED (2026-06-20).** The Spark Control daily-driver model is now vision-capable (multimodal `image_url` passthrough), so card→text→fields works end-to-end. Transcribe-then-reuse (vision OCRs to text; the existing text extractor pulls fields) preserves the email/phone integrity rules. See the matrix-intake guide.
|
||||||
|
- **Parse:** local Qwen via Spark Control `/v1/chat/completions` (temp 0, JSON-only), reusing the existing Spark client pattern (`backend/redaction`/`backend/ingest`).
|
||||||
|
- **Approval handshake (the one stateful piece):** in-memory pending-proposal store keyed by Matrix thread root; user replies **yes / edit field=value / no** in-thread. Satisfies never-write-autonomously; exempt from "agents draft, humans send" (internal data entry, like the digest).
|
||||||
|
- **CRM-side:** `POST /api/intake/investor` (service-auth) creates a new investor+contact **through the existing grid-save path** (so relational sync + audit + backup-on-write happen as with a UI edit; bot never does whole-blob RMW) or appends a meeting note to the interaction log for an existing investor; `GET /api/intake/match?q=` fuzzy-matches via the existing entity-resolution/email-matcher. New investor needs no fund at intake.
|
||||||
|
- **Phases:** M1 = scaffold + parse + in-thread propose, **no writes** (proves Matrix↔Spark). M2 = intake endpoint + match + write-on-approve + tests. **M3 = business-card photo (SHIPPED 2026-06-20).**
|
||||||
|
|
||||||
|
**Post-deploy enhancement — fuzzy match + in-thread confirm (Grant, 2026-06-17). DEPLOYED & LIVE 2026-06-17 (v0.1.0:86; box migration chain …85→86 clean, `candidates` endpoint verified); Matrix live-smoke pending.** Today `find_intake_match` is **exact-after-normalization** (`_normalize_text` = lowercase+strip), so near-misses — "Charlie" vs "Charles" (same last name), "Acme Capital" vs "Acme Capital LLC", a one-character email typo — return no match and the bot proposes a **new** investor, risking a duplicate the human approves without realizing a near-match exists. The existing in-thread approval gate is useless against this because the human is never *shown* the near-match. Fix: matcher returns **ranked fuzzy candidates** (deterministic pre-filter: normalized name similarity / token overlap + email edit-distance ≤ ~2), surfaced in-thread for the human to confirm or pick, with the **local Spark LLM optionally re-ranking/judging the shortlist** (good at Charlie/Charles + legal-suffix equivalence; fed only the shortlist, never the whole LP list). Keeps the approval gate but makes it effective against duplicates. Land **after** the live smoke — net-new logic + reply grammar + tests; the current exact match is safe and its failure mode (a duplicate) is recoverable via the existing entity-merge subsystem (`backend/entity_*.py`).
|
||||||
|
- **As built:** `find_intake_candidates` in `server.py` (deterministic — stdlib `difflib` name similarity + token-set Jaccard, legal-suffix-aware via `_strip_legal_suffix`, + email Levenshtein ≤ 2; ranked, ≥0.62, top 5). `GET /api/intake/match` now returns `{match, candidates}`. Bot: a new `_stage="disambiguate"` shortlist (`proposals.render_disambiguation` / `interpret_disambiguation` / `attach_to_candidate` / `promote_to_new`) — human picks a number / `new` / `no`. **The optional LLM-judge re-rank was deliberately deferred** (the deterministic filter already surfaces the named cases; an LLM judge is the right *pruner* for shortlist noise — build if the deterministic ranking proves too noisy in practice). Tests: `test_intake_endpoints.py` (server fuzzy cases), `matrix_intake/test_proposals.py` (disambiguation grammar), `matrix_intake/test_crm_client.py` (candidate shape).
|
||||||
|
|
||||||
|
**Post-deploy enhancement — conversational (LLM-mediated) edits (Grant, 2026-06-17). DEPLOYED & LIVE 2026-06-17 (bot-side; pulled + restarted on the Spark `modelo32`); Matrix live-smoke pending.** Today an in-thread correction uses a rigid grammar (`edit field=value`). Let a free-form reply that isn't `yes`/`no`/a literal `edit …` be treated as a natural-language revision instruction: send {current proposal + the instruction} back through local Qwen (`spark.py`, the same parse leg — no Claude, no scrub) and re-render the revised proposal card for approval (e.g. "add that we met on June 14" → updated Note). Keeps the draft→human-approve gate (the human still confirms the LLM's revision) and subsumes `edit field=value` as a deterministic fast path. Thread the instruction text into `normalize`'s source so the email-integrity rule still holds (a revised email must appear in the original message or the instruction). Pairs naturally with the fuzzy-match item above — build both as one conversational-UX pass after the smoke. (Parsing of free-form *intake* messages already works today via the Qwen parse leg; this item is specifically about the *edit/refine* turn.)
|
||||||
|
- **As built:** `parse.revise` + `_apply_revision` (offline-testable; the approval-stage `else` branch in `bot.py` routes any non-yes/no/edit reply here). `parse_message` now stashes `_source_text` so revise can re-check email integrity against {instruction + original}; the model's email field is never trusted. No-op revisions are caught via `proposals.same_fields` (re-prompt, not a false "Updated"). **Known v1 limit:** revise edits fields but does not re-run the matcher on a mid-thread firm rename. Tests: `matrix_intake/test_parse.py` (revise merge + email integrity + match-id preservation).
|
||||||
|
|
||||||
|
**Managed service — DONE (container) 2026-06-17; dashboard card deferred to a spark-control session.** The bot ran as a bare `nohup` process (silently died on a Spark reboot). Now it's a **docker-compose service** (`docker-compose.yml` at the repo root + `backend/matrix_intake/Dockerfile`; `restart: unless-stopped` → survives reboot; image bundles `backend/matrix_intake` + the stdlib `backend/ingest` Spark client; `.env` mounted read-only). Cutover done on the Spark (nohup stopped, container `matrix-intake` up + listening). **Still bare `docker`/SSH-managed** — a spark-control dashboard card (Update/Restart/Stop/Logs tile like `matrix-bridge`) is a separate task in the spark-control repo: see `docs/handoffs/add-intake-bot-to-spark-control.md`.
|
||||||
|
|
||||||
|
**Parse mis-identifies the investor when the message names an internal teammate (found in live-smoke, 2026-06-17).** *"jonathan is chatting with wyoming soon about fund commitment"* → the bot picked **jonathan** (a colleague/CRM admin) as the investor and offered a Jonathan/Nathan fuzzy shortlist, when the investor is **Wyoming**. Root cause is upstream of matching: local Qwen has no notion of who's internal, and mis-read the sentence role. **Fix (cheap, high-confidence, near-term):** feed the parse prompt the ~5-person team roster + the frame *"messages are written by a team member about a prospect; a named team member is the person doing outreach, never the investor"* (roster from a config value or a small read — not the admin-gated `/api/users`, since the bot is a member). Offline-testable (stub the model). **Bigger design (deferred, needs more failure samples):** the user's idea of routing inputs through the LLM *with grid context* for entity resolution — feasible (local Qwen, same as the digest, never Claude) but feed a **bounded shortlist, not the full ~400-name grid** (a small model dilutes on a haystack); pairs with the deferred LLM-judge. Also exposes a missing concept: the **internal deal owner** (Jonathan), which the bot doesn't model. Get 3–5 more real intake messages before re-architecting; the roster fix lands regardless.
|
||||||
|
|
||||||
|
**Long-term — extract the intake bot to its own repo (recommended, not yet done).** Containerizing from this monorepo is the pragmatic now-state, but the bot is a genuinely separate deployable (own process, own `matrix-nio` dep, own lifecycle); its only CRM coupling is the HTTP API (a clean network contract) plus ~40 lines of stdlib Spark client (cheap to vendor). The tell: the spark-control Update button would run `git reset --hard origin/main` on the **whole CRM clone** — wrong blast radius. `matrix-bridge` is already a dedicated repo; mirror it. The extraction is a migration (new Gitea repo, move code + tests + guide, vendor the client, re-point the Spark deploy), so it's deferred until worth the lift — do it *before* wiring the spark-control card if both land in the same push.
|
||||||
|
|
||||||
|
### In-app camera business-card intake — DONE & live (v0.1.0:100, device-confirmed 2026-06-20)
|
||||||
|
*Shipped: a camera button in the mobile top bar (left of the quick-log pencil) → take/choose a photo → vision-transcribe → parse → fuzzy-match → edit/approve/reject, surfaced as an inline mobile sheet (`source="app_card"`, form-field edits only, any-authenticated-member). The reusable core is nio-free and already reachable from the CRM (`server.py` imports `llm`; `matrix_intake/parse.py`+`spark.py` import no `matrix-nio`), so it landed as **one endpoint** (`POST /api/intake/card`) + **one mobile component** — no bot refactor / new dep / migration; reuses the New-investor sheet pre-filled + the `.quicklog-btn svg` icon-sizing fix. History: commits `463f624` (feature) / `622d454` (handoff). (Plan doc removed — git history is the record.)*
|
||||||
|
|
||||||
|
### Scoped service-credential auth path for automated CRM writers
|
||||||
|
*Surfaced 2026-06-17 while deploying the Matrix intake bot. **Decision: defer — the bot uses a dedicated member username/password for now.** The CRM has no API-key/service-token path; its only auth is username+password → JWT. A dedicated **member** login is appropriately scoped against what matters operationally (no admin: can't manage users, reset data, or change settings) and unblocks the live smoke today.*
|
||||||
|
|
||||||
|
**Accepted residual risk (why this is worth revisiting):** a member credential is far broader than the bot's actual need (two endpoints: `GET /api/intake/match`, `POST /api/fundraising/log-communication`). A member can **read the entire LP/prospect database** — the exact data this system exists to keep off third-party servers — plus broad member-level *write* within the fundraising domain (could create/append on any investor). The credential lives in a `.env` on the Spark, so a Spark compromise leaks read-access to all LP data. Mitigating context: own-infra, LAN-local; the Matrix bot is the **first out-of-process API writer** (the digest runs in-process with direct DB access), so there is exactly **one** consumer today → building a token-scope framework now is premature (YAGNI).
|
||||||
|
|
||||||
|
**Right long-term design:** a hashed, revocable **service token** with a per-route **scope allowlist** (intake-match + log-communication only), minted/revoked from the admin panel, replacing the bot's member login. Revocation then kills the token without rotating a reused human password.
|
||||||
|
|
||||||
|
**Build trigger:** when a **second** out-of-process automated writer appears, OR before **any** automated writer is reachable beyond the LAN — whichever comes first. Build it once, properly, at that point.
|
||||||
|
|
||||||
|
### Admin-only vs. all-users web-UI surface — audit
|
||||||
|
*Requested 2026-06-16 (idea, P2).* Have the **explorer agent** report which web-UI functionality is visible only to admins vs. to all users (member role) — a map of the role-gated surface across `frontend/index.html` and the backend route auth checks. Useful input for the consolidation/permissions work.
|
||||||
|
|
||||||
|
### Daily activity digest (email to the team)
|
||||||
|
*Requested 2026-06-15. **Phase A deployed** (v0.1.0:76). **Phase B deployed & verified live in v0.1.0:77 (2026-06-16)** — digest content + Spark summarization + daily scheduler + by-investor section + admin-panel control + on-demand send. Auto-send defaults OFF until an admin enables it in Settings → Admin. **v0.1.0:83 (built, deploy pending): in-app windowed preview** — Settings → Admin builds a digest over a chosen window (last 24h or since a date) and shows it before sending (`POST /api/admin/digest/preview`), so the **real Spark summarizer can be verified on demand** even on a quiet day (the fixed last-24h `send-now` couldn't); manual send uses the same window and never touches the daily cursor.*
|
||||||
|
|
||||||
|
**Decisions (locked 2026-06-15):** recipients = **all active admins**; summarization = **Spark-LLM narrative** (never Claude — un-anonymized substance stays local); granularity = **grouped by user** (→ per investor).
|
||||||
|
|
||||||
|
**Send transport — DECIDED 2026-06-15: Gmail domain-wide delegation** (not SMTP). The box's existing service-account grant (which powers email capture) **includes `gmail.compose`**, which authorizes `users.messages.send` — verified by a token-mint probe **and a live `messages.send` to grant**. So the digest sends through the account the CRM already uses: **no app password, no new account, no admin change.** The narrow `gmail.send` scope is *not* granted, so the sender must request `gmail.compose`.
|
||||||
|
|
||||||
|
**Phase A — DONE:** (v0.1.0:75) `configureDigestSmtp` Start9 action + `docker_entrypoint.sh` `SMTP_*` export + `backend/smtp_send.py` + admin `POST /api/admin/digest/test-email` (recipient-restricted to the admin set — not an open relay) + Settings button. (v0.1.0:76, redeploy pending) `backend/email_integration/gmail_send.py` (`users.messages.send` via DWD/compose) + `backend/digest_mailer.py` (**Gmail-DWD preferred, SMTP fallback**); the endpoint + button route through it; sender = `CRM_DIGEST_SENDER` else first active admin. Tests: `test_smtp_send.py`, `test_smtp_endpoint.py`, `test_gmail_send.py`.
|
||||||
|
|
||||||
|
**Phase B — DONE (2026-06-15/16):** `backend/digest_builder.py` builds **two sections** — *by team member* (per-user **Spark** narrative + both directions, with a deterministic fallback) and *by investor* (team-wide, inbound + outbound, deduped per email, structured). Soft-delete filtered throughout. `backend/email_integration/digest_scheduler.py` is an always-on daily thread that re-reads a **DB-backed policy** each cycle and sends once/day at the configured hour to all active admins (window cursor in `app_settings`). Control moved out of env into the **admin panel**: `app_settings.digest_policy` + `GET/PATCH /api/admin/digest/policy` + a Settings → Admin **enable toggle + send-time dropdown** (env vars only seed the first-boot default). Plus admin `POST /api/admin/digest/send-now` + a "Send Digest Now" button. Decisions settled: **6 PM default**, **always-send** (empty-day note), **per-user narrative + by-investor section**, **in-app control** (not StartOS). Tests: `backend/test_digest_builder.py`. Detail: `docs/guides/email.md`.
|
||||||
|
|
||||||
|
Have the CRM send a **daily digest email** summarizing each registered user's activity — primarily **who emailed which investors and the substance of those emails** — to the fund principal (and eventually other admins). Scales with the synced-user count: 2 users synced today, ~5 eventually.
|
||||||
|
|
||||||
|
- **Source data:** the captured email-activity already flowing through the Gmail DWD propose→approve pipeline (`backend/email_integration/`), keyed per registered user → per investor/contact. Optionally fold in other CRM activity (audit feed, automation runs, new opportunities) later.
|
||||||
|
- **Send path is NEW capability.** Today nothing leaves the box — the system only *captures* Gmail and *creates drafts*. This needs outbound SMTP. StartOS 0.4 has a system-wide SMTP account (since v0.4.0-beta.9): the user configures it once for the whole server and services read it via `sdk.getSystemSmtp(effects).const()`, which returns a `T.SmtpValue` (`host`, `port`, `from`, `username`, `password`, `security`). Wire the digest sender to that rather than hardcoding any account. *Implementation path (researched 2026-06-15, our SDK pin `^0.4.0-beta.66`):* model a `manageSMTP` action on [gitea-startos](https://github.com/Start9Labs/gitea-startos) / [vaultwarden-startos](https://github.com/Start9Labs/vaultwarden-startos) — a three-way `selection` (system / custom / disabled) built on `sdk.inputSpecConstants.smtpInputSpec`, persisted to `storeJson`, with `main.ts` injecting `SMTP_HOST/PORT/USER/PASS/FROM/SECURITY` env vars into the daemon `exec` block (same shape as the existing `setAnthropicApiKey.ts` action). The Python sender reads them via `os.environ` and opens `smtplib.SMTP`/`SMTP_SSL`. **"Custom SMTP" is a dedicated per-package account, fully independent of the server's system SMTP** — the custom branch never calls `getSystemSmtp`, so the digest can send through its own provider even on a box with no system account configured (confirmed in both reference packages). This is the likely fit here: a digest-only mailbox separate from anyone's Gmail. Note StartOS 0.4 dropped the old `Config`/`Properties` manifest spec — SMTP config is an **action + storeJson**, not a manifest config field. **SDK note (verified 2026-06-15):** our pin `^0.4.0-beta.66` resolves to exactly `0.4.0-beta.66` (caret on a prerelease stays within the `0.4.0` tuple), whose SMTP surface — `getSystemSmtp` → `T.SmtpValue {host, port, from, username, password, security}`, `inputSpecConstants.smtpInputSpec` (providers gmail/ses/sendgrid/mailgun/protonmail/other; selection disabled/system/custom), `smtpShape`, `smtpPrefill` — is **byte-identical** to the 1.5.3 reference packages (verified from published tarballs; repo `node_modules` is absent). Build against beta.66 as-is — **no SDK bump needed** (moving to 1.x is a major-track change with broad blast radius across `startos/`, and nothing about SMTP justifies it).
|
||||||
|
- **Analysis runs on Spark, never Claude.** The digest is deliberately **un-anonymized** (real LP names + email substance), so any summarization/analysis must go through **Spark Control to local models** — this is the one path that intentionally bypasses the scrub→Claude→re-hydrate boundary, because keeping the substance local is the whole point. Never route digest content to Claude.
|
||||||
|
- **Exempt from "agents draft, humans send."** That rule governs outward LP/prospect contact. This is an internal ops digest to the team's own inboxes — a different category — so an automated daily send here does not violate the draft-only guardrail. State this explicitly at build time.
|
||||||
|
- **Scheduling:** a daily cron, naturally co-located with the existing `backend/email_integration/scheduler.py` sync cadence.
|
||||||
|
- **Soft-delete:** every aggregate/read in the digest must filter `deleted_at IS NULL` (see the standing soft-delete rule).
|
||||||
|
|
||||||
|
Open design questions (settled at build time): send time = **6 PM box-local** (configurable in the admin panel), covering the ~24h window up to send; empty days = **always send** with a "no activity" note; summary granularity = **one per-user narrative** plus a **by-investor structured section** (inbound + outbound, team-wide) added 2026-06-16; enable/time live in the **admin panel** (DB-backed), not StartOS actions.
|
||||||
|
|
||||||
|
### Email/communication search + natural-language query
|
||||||
|
*Requested 2026-06-16. Three increments, **sequenced 1 → 2 → 3** (1 and 2 first as a quick increment; 3 is a separate, larger build after). Origin: Grant asked whether we can query "emails sent to a specific investor" / "activity by user," and floated NL queries like "existing investors who have committed capital across our funds that we haven't emailed in a while."*
|
||||||
|
|
||||||
|
**Status: items 1 & 2 SHIPPED in v0.1.0:83 (built + verified locally 2026-06-16, deploy pending).** The Communications tab now has the structured activity surface (item 1: typed/fixed investor dropdown, mailbox + direction + **date-range** filters, free-text, **click-to-expand full body** via `GET /api/email/detail`) and a **"Search content"** semantic mode (item 2: `GET /api/email/search` over the Qdrant email index). The dropdown-empty bug (the facet only listed grid investors) was the v83 fix — it now mirrors the list across grid/org/contact matches. **Item 3 (NL→SQL) remains** — the larger, separate build below. Detail: `docs/guides/email.md`.
|
||||||
|
|
||||||
|
**Context — the data is captured but currently has NO front-end.** The entire Gmail email schema (`emails`, `email_threads`, `email_investor_links`, `email_account_messages`, `email_activity_proposals`, …) exists and is populated by the DWD capture pipeline, but is surfaced **nowhere** in `frontend/index.html` today (only as inputs to the daily digest). So all three items below are about making already-captured data queryable/visible. Email bodies of *matched* emails are already chunked + embedded into Qdrant with `{lp_id, lp_name, doc_type:"email", date_ts}` metadata.
|
||||||
|
|
||||||
|
**Caveat that shapes all three — the two-model join.** "Emails to an investor" link to the **fundraising grid** (`email_investor_links.fundraising_investor_id`); "committed capital" lives in the grid too (`fundraising_commitments`, multi-fund). But manually-logged `communications` and `lp_profiles` (single-fund) live in the **classic** model, and the two models are only bridged by fuzzy email/name matching (no authoritative join key). Any query spanning "committed capital" + "email recency" must reckon with this. Prefer the grid side as the higher-signal source (matcher already does).
|
||||||
|
|
||||||
|
**1. Activity query endpoints + panel — DONE (v0.1.0:83).** Delivered as the **Communications tab** rather than the originally-sketched `/api/activity` endpoints: `GET /api/email/activity` (`db.query_email_activity`) returns the actual records filterable by investor / mailbox / direction / **date range** / free-text, and `GET /api/email/detail` expands the full body. Answers "emails to investor X" and "what has mailbox Y sent" interactively. Soft-delete filtered throughout; investor identity is typed (`fund:`/`org:`/`contact:`) so org/contact-only matches resolve and are pickable. *(The `collect_user_activity()`/`collect_investor_activity()` digest helpers remain the by-user/by-investor pivot source; a dedicated per-user pivot UI was not needed for the answer Grant wanted, which the mailbox+direction filters already give.)*
|
||||||
|
|
||||||
|
**2. Email content search box — DONE (v0.1.0:83).** A **"Search content"** toggle in the Communications tab → `GET /api/email/search?q=` wraps `backend/ingest/search.py:hybrid_search` filtered to `doc_type='email'`; hits are hydrated + soft-delete-filtered against SQLite (canonical) and link back to the full body. Semantic/lexical search over email *content* ("find where we discussed the mining deal"), distinct from item 1's structured filters. 503 (clean "unavailable") when Spark/Qdrant is unreachable.
|
||||||
|
|
||||||
|
**3. Natural-language → safe structured query — SUPERSEDED & BUILT as W2 above (2026-06-18).** The design constraints below (especially "LLM = Claude behind the redaction boundary" and the validated-AST shape) were **revisited and changed** during the build: translation runs on the **local Qwen** (no Claude, no redaction), and the safe surface is a **named-intent catalog**, not a generic query AST. See the W2 entry above and `docs/guides/nl-query.md` for what shipped; the original framing is kept here for provenance. _An LLM translates a plain-English question into a **safe, read-only** DB query against the CRM, for relational/analytical questions that semantic search *cannot* answer — Grant's example ("committed across funds AND not emailed in a while") is joins + aggregates + recency, not a text-topic match. Original design constraints (locked at request time):_
|
||||||
|
- **LLM = Claude behind the redaction boundary** (better at text-to-SQL than local Qwen; the scrub→Claude→re-hydrate path already exists for the PII concern). Not Spark — Spark Control offers embeddings/rerank/RAG + local chat, but **no text-to-SQL**.
|
||||||
|
- **Safety is the hard part, not the parsing.** Do NOT hand the LLM open-ended SQL against the live DB (soft-delete leaks, injection, runaway scans). Constrain it: read-only connection/view, a curated/parameterized query surface or a validated query AST, soft-delete-filtered views, row/time caps. Treat as its own designed feature with its own tests.
|
||||||
|
- Must reckon with the two-model join caveat above (capital lives in the grid; recency from email links).
|
||||||
|
|
||||||
|
### Consolidate on the fundraising grid as canonical; retire vestigial classic-CRM surfaces
|
||||||
|
*Decided 2026-06-16. The CRM carries two stacked models: the original generic CRM (contacts / lp_profiles / opportunities / manual communications) and the fundraising grid + email capture. The team uses the grid; most classic surfaces are un-adopted (verified on the box: Pipeline + Communications empty, Contacts auto-populated from the grid). **Decision: the fundraising grid + email capture is the canonical system of record;** prune or repurpose the rest rather than maintain a parallel half-empty CRM.*
|
||||||
|
|
||||||
|
**Retire `lp_profiles` + LP Tracker — DONE & deployed live (v0.1.0:78, 2026-06-16).** 21/21 backend tests green, `py_compile` clean; installed to the box (`installed-version`→`0.1.0:78`, migration chain …77→78 clean, server up on :8080).
|
||||||
|
- Removed the orphaned `LPTrackerPage` component + the `lp-tracker`→`fundraising-grid` redirect (frontend).
|
||||||
|
- Removed the `/api/lp-profiles*` endpoints (list/get/create/update) and their handlers, the unused `lp-breakdown` report + route, the contact-dossier LP display (frontend + the `lp_profile` block in `handle_get_contact`), and the demo-seed LP block.
|
||||||
|
- **Dashboard KPIs repointed:** "Total Committed" now sums `fundraising_investors.total_invested` (the canonical grid rollup), **excluding graveyarded investors** so the headline reflects live committed capital — a deliberate divergence from `/api/fundraising/relational-summary`, which sums all rows. "Total Funded" dropped — the grid has no funded-vs-committed concept and the frontend never rendered it. (If a funded/wired status is wanted later, that's a new grid feature, not a revival of lp_profiles.) Regression-guarded by `test_dashboard_report.py`.
|
||||||
|
- **Left in place (intentional):** the empty `lp_profiles` table + index (no destructive drop, per never-hard-delete); the contact-delete soft-delete cascade; the `--reset-all-data` clear; and the inert MOCK_MODE `mockDb.lp_profiles` fixtures (dev-only fallback, never hits the backend — its dashboard mock still reads mock lp_profiles, a known dev-only divergence from the real backend). Updated `test_soft_delete_reads.py` to drop the now-removed `lp_profile` assertions (kept its org `total_funded` opportunities-aggregate checks).
|
||||||
|
|
||||||
|
**Adopt the Pipeline — wire it to the grid. — DONE: DEPLOYED & live-smoked 2026-06-18 (v0.1.0:88; migration chain …86→88 clean, `0005_grid_pipeline_link.sql` applied on the box, server up; the full +Pipeline → board → advance-stage → remove round-trip is verified on the box).** *(Was: second build after the Matrix-bridge intake.)*
|
||||||
|
- Pipeline (`opportunities`) is fully built and functional but unused. Keep it: it's the one classic surface that tracks something the grid doesn't — a forward-looking deal funnel (stage, `expected_amount × probability`, owner, close date) vs. the grid's actual committed dollars + flags.
|
||||||
|
- New idea (Grant, 2026-06-16): let users **flag an investor in the grid as a pipeline opportunity** (a grid column/control) so it **auto-creates / syncs an `opportunities` row** that loads into the Pipeline board. Design the grid↔pipeline link (which fund seeds it? what sets stage/expected amount? keep them reconciled). Turns Pipeline from a disconnected second data-entry surface into a view driven by the canonical grid.
|
||||||
|
- Revisit the stray contact-create side-door (the "Create Opportunity" modal `POST /api/contacts`) once the grid-driven flow exists.
|
||||||
|
|
||||||
|
**As built (decisions locked with Grant 2026-06-17):** UX = **row action + seed modal** ("Add to Pipeline" per grid row → captures primary contact / target fund / expected amount / stage / probability). The durable link is `opportunities.fundraising_investor_id` (**migration 0005**, additive + reversible); "is in pipeline?" / "what stage?" are **derived from a live opp join**, never a denormalized flag (no drift). **Ownership split:** the grid owns whether the link exists + the seed; the **board owns stage/probability/owner/close/next-step** — a grid save never reseeds a live opp (`POST /api/fundraising/pipeline/link` is idempotent: one live opp/investor, re-link returns the existing one). Contact is **reused from the grid's synced `fundraising_contacts.contact_id`** — the `POST /api/contacts` side-door is **gone**. Grid `lead` → opp owner (fallback acting user). Two **read-only** grid columns (Pipeline action + Pipeline Stage) injected on read; their row values are stripped on write so they never persist or dirty the autosave. **Remove from pipeline** (`POST .../unlink`) **soft-deletes the opp; the grid row is left fully intact** (Grant's explicit ask). Deleting an investor from the grid archives its orphaned opp (`reconcile_grid_pipeline_links`, called after `sync_fundraising_relational`). **Folded in:** the P2 soft-delete leak in `handle_pipeline_report` + dashboard pipeline aggregates (archived opps no longer counted). Tests: `backend/test_grid_pipeline_link.py` (link/idempotent/round-trip/guards/unlink-intact/re-link/orphan/aggregates), 28/28 suite green, render-smoke green. **Deploy:** server-side → needs an **s9pk build + install** (v87); get authorization first.
|
||||||
|
- **Follow-up (v0.1.0:88, frontend-only, DEPLOYED & verified 2026-06-18):** retired the Pipeline page's **"+ New Opportunity"** button + its create-by-contact modal — an opportunity is now born **only** from a fundraising-grid investor row ("+ Pipeline"), matching how the team works (they live in the grid). The board is now a view + stage-management surface; button replaced with a muted "Add deals from the Fundraising Grid" hint. Removed the dead handler/state + the page's unused `/api/contacts` fetch.
|
||||||
|
- **Deferred (not built):** no write-back of committed dollars into grid fund cells (grid stays canonical for committed $); a graveyarded investor with a live opp still shows its stage (deliberate — a live deal is a live deal).
|
||||||
|
|
||||||
|
**Keep the Contacts table — as the read-only per-person directory it already is.** Confirmed 2026-06-16: the grid models **investor entity → many people** correctly today. The grid "contacts" column is a multi-pill editor; each pill syncs to a `fundraising_contacts` row AND its own classic `contacts` row (5-person family office → 1 investor + 5 contacts, linked via `fundraising_contacts.contact_id`, migration 0004). The Contacts page is **read-only for creation** (header: "added from the Fundraising Grid"; no New-Contact button), edit-only via the detail slide-over — the desired flow already holds. Email capture already rolls **multiple people up to one investor** (matcher indexes each pill's email separately, all → same `fundraising_investor_id`; `email_investor_links` records both investor and specific person). No build here — future email-surfacing UI should present comms grouped by investor across all its people.
|
||||||
|
|
||||||
|
**Legacy-usage sweep — what's still wired, what's dead, what can be deleted (Grant, 2026-06-19; after mobile feature-complete).** Phase 7's `.badge-*` remap surfaced that several legacy badge classes (lead/outreach/meeting/due-diligence/committed/funded) have no — or near-no — live JSX caller, and Grant flagged other likely-dead legacy bits (e.g. the `investor type` field). Once the mobile redesign is feature-complete, do a systematic pass: for each legacy component / CSS class / endpoint / DB field, determine whether it still renders or runs anywhere (JSX callers, `do_GET`/`do_POST` dispatch, DB reads), then soft-retire/delete the genuinely-dead ones (never hard-delete data — archive per convention; `refactor-scout`/`janitor` can inventory). Start list: the orphaned `.badge-*` classes, `investor type`, and other classic-CRM remnants not reachable from the grid-canonical flow.
|
||||||
|
|
||||||
|
### Front-end: pre-compile JSX, drop runtime Babel (optional, larger)
|
||||||
|
*Logged 2026-06-16 during the v0.1.0:82 vendor+SRI work. The scoped fix shipped: React/ReactDOM/Babel are now vendored + SRI-pinned and served same-origin, and a jsdom render smoke check gates every build (`docs/guides/packaging.md`). This is the bigger alternative we deliberately deferred.*
|
||||||
|
|
||||||
|
Today the app ships `@babel/standalone` (~3 MB) and transforms ~5k lines of inline JSX **in the browser on every page load**. A build step that pre-compiles the JSX to plain JS would (a) eliminate the runtime-transform blank-screen class entirely (no Babel in production), and (b) load much faster. **Cost:** it introduces a build step, which contradicts the current **"No build step"** convention (single `frontend/index.html`, inline-Babel React) — so this is a real architecture change, not a tweak. Weigh only if page-load size/latency or render robustness becomes a felt problem; the render-smoke gate already de-risks the status quo. If taken: keep the source `index.html` editable, emit a compiled artifact into the s9pk, and keep the smoke check pointed at the built output.
|
||||||
|
|
||||||
|
### One-off feature batch (Grant, 2026-06-18)
|
||||||
|
*Eight one-off ideas, triaged against the backend 2026-06-18. **Cross-cutting guardrail:** anything framed as "auto-add / auto-forward / auto-suggest" lands as a **proposal surfaced for human approval** (reuse the `email_proposal_matrix` propose→Matrix→decide rails), never a silent write — per "agents draft, humans approve." #1 is built (deploy pending); #6 is a spark-control task (→ INBOX); the rest are scoped backlog. #2/#4/#7 reuse existing rails (email-proposal loop + W2 NL-query) — they're "wire a new source into an existing pipeline," not greenfield.*
|
||||||
|
|
||||||
|
- **1. Drag-reorder fundraising grid views — BUILT (frontend; deploy pending), 2026-06-18.** The sidebar view list is now drag-reorderable (HTML5 DnD mirroring the column-reorder idiom: `moveViewBefore` + `draggingViewId`/`dragOverViewId` in `frontend/index.html`). Order persists via the grid page's **existing autosave** (`views` is already in its snapshot + deps → `PUT /api/fundraising/state` → `views_json`), the same path rename/delete use — **no backend change.** Render-smoke green; the in-app drag interaction itself not yet browser-tested. **Known edge (same as existing rename/delete):** reordering while *off* the grid page only updates localStorage and is re-hydrated from the backend on next grid mount — reorder while viewing the grid. **Deploy:** needs an s9pk build + install.
|
||||||
|
|
||||||
|
- **2. [P2] Suggest new contacts from digested emails (outreach detector).** When a captured *outbound* email goes to an address not already in `contacts`/the grid and looks like outreach, propose adding it as a contact. Hangs off the existing email capture + `email_proposal_matrix` / `/api/intake/email-proposals` review rails — net-new is the detector + "looks like outreach" criteria (exclude vendors / newsletters / internal domains). Lands as a **proposal**, not an auto-add.
|
||||||
|
|
||||||
|
- **3. Pipeline stages + investor flags/labels — sharpened into a LOCKED SPEC (2026-06-19).** Was "new pipeline stages"; the design conversation collapsed it into a 4-stage per-investor funnel + auto-derived Existing-Investor flag + staleness overlay/nudge. **Full locked spec: see "Pipeline stages + investor flags/labels — LOCKED SPEC" below.**
|
||||||
|
|
||||||
|
- **4. [P2] Squarespace website form-submissions → DB (near-term, high value).** Parse `form-submission@squarespace.info` capture emails — structured Name / Email / Company / LinkedIn / Location / comments (see the website-lead screenshots, Grant 2026-06-18) — and feed them into the proposal flow. Deterministic parser (fixed format) + existing proposal rails = relatively contained. **Guardrail:** despite the "auto-added" ask, land each lead as a **Matrix proposal → one-tap approve**, not a silent insert (same pattern as email proposals). Real leads (e.g. Matt Baas, Vikrum Tatla) are currently only living in an inbox.
|
||||||
|
|
||||||
|
- **5. [P3] Matrix voice note → Spark Control transcription → intake.** matrix-nio receives an audio/voice event → download + decrypt the media → **Spark Control** transcription endpoint (Whisper-class — **confirm it exists; external dep**) → feed the text into the existing local-Qwen intake parse + disambiguation. Never call a Spark directly (Spark Control only). Larger; gated on the transcription endpoint existing.
|
||||||
|
|
||||||
|
- **6. → INBOX (spark-control repo, not this one). Dashboard card for the crm/intake bot** (Update/Restart/Stop/Logs tile like `matrix-bridge`). Already noted under the Matrix-intake "Managed service" item + `docs/handoffs/add-intake-bot-to-spark-control.md`; captured to `standards/INBOX.md` to confirm/do in a spark-control session.
|
||||||
|
|
||||||
|
- **7. [P2] Intake: "query the LLM when the name doesn't match."** Extend the disambiguation grammar (today: number / new / no — see screenshot) with a `search: <text>` option that runs the read-only **W2 NL-query** to locate the real existing investor when the typed name doesn't fuzzy-match a candidate. Builds on the existing NL-query + intake rails; keeps the human approval gate.
|
||||||
|
|
||||||
|
- **8. [P2] Email capture learns from approve/reject (scope down to rules v1).** Use the already-logged approve/reject decisions to pre-suggest a decision. **v1 = deterministic, not ML:** detect `List-Unsubscribe` / `Precedence: bulk` (newsletters) + a learned denylist of rejected sender addresses/domains → pre-mark / auto-suggest reject (e.g. recurring non-investor newsletters). Don't build a classifier until the rules prove insufficient.
|
||||||
|
|
||||||
|
### Pipeline stages + investor flags/labels — LOCKED SPEC (Grant, 2026-06-19)
|
||||||
|
*Sharpened from the inherited 6-stage funnel (lead/outreach/meeting/due_diligence/committed/funded) over a design conversation 2026-06-18/19. Supersedes one-off batch item #3. **Locked — ready to build on green-light.** Grounding (verified): the grid's only labeling today is 3 boolean flags (priority/follow_up/graveyard) + a derived longshot + the `lead` **owner** column; there is **no investor type field**; "existing investor" is implicit in `total_invested > 0`; the 6-stage pipeline lives on classic `opportunities` and only applies to rows explicitly "+Add to Pipeline"'d; saved views are driven off the flags, not stage.*
|
||||||
|
|
||||||
|
**Conceptual frame — three orthogonal axes (were conflated):**
|
||||||
|
- **A. Relationship** — existing-LP vs prospect → collapsed to a single **auto-derived "Existing Investor" flag** (below). No prospect/lead/advisor sub-types: leads become prospects fast, and there are no advisors in this grid.
|
||||||
|
- **B. Disposition flags** — keep **Priority** (the focus set) + **Graveyard** (truly dead). **Drop Longshot** — labeling something longshot is already half-giving-up, overlaps graveyard, and doesn't earn a third bucket. Everything not Priority/Graveyard is the neutral middle.
|
||||||
|
- **C. Pipeline stage** — the active-raise funnel (below), per-investor.
|
||||||
|
|
||||||
|
**1. Funnel = 4 stages, per-investor, terminal at Commitment:** `Lead → Engaged → Diligence → Commitment`
|
||||||
|
- **Lead** — identified + first contact (cold outreach, a logged first meeting, or a website inbound); one-directional so far.
|
||||||
|
- **Engaged** — a **two-way** conversation exists (they replied / there's a back-and-forth). *(Boundary confirmed with Grant: two-way, not "a second person at the firm.")*
|
||||||
|
- **Diligence** — substantive: follow-up calls/meetings or data-room access.
|
||||||
|
- **Commitment** — terminal. On commit → hand off to fund admin + record the $ in the grid fund cell; the pipeline's job is done.
|
||||||
|
- **No Funded** (fund admin owns post-commitment; the Existing-Investor flag is effectively the "closed" signal). **No Meeting** (an activity, not a position). **No Lost** stage (the Graveyard flag covers dead).
|
||||||
|
- **Start at any stage** — a known LP re-solicited for a new fund drops straight into Engaged/Diligence, not Lead.
|
||||||
|
|
||||||
|
**2. "Existing Investor" = auto-derived flag** from `total_invested > 0`, injected read-only like `pipeline_stage` (never a maintained column); rendered as a star/indicator (esp. mobile). Orthogonal to stage — a re-solicited LP shows the star **and** a live stage at once. Lifecycle: prospect runs Lead→…→Commitment → $ recorded in the grid cell → they light up as an Existing Investor.
|
||||||
|
|
||||||
|
**3. Staleness — a derived overlay on the stage + a Matrix nudge, NEVER an auto-demotion.** Governing principle: **derive-and-display freely; mutate state only via a human.**
|
||||||
|
- A quiet deal does **not** change stage. Staleness shows on the **last-contact recency value** (the grid row's / mobile card's "2d ago"): light-grey when fresh → **amber → red** by days since `last_activity_at`, appending "stale" once it crosses the threshold (e.g. "35d stale"). The stage chip stays clean; the warning rides the recency line. The **same `last_activity_at` source drives the desktop grid and the mobile card**, so both color-code automatically.
|
||||||
|
- **Why not auto-flip off Engaged/Diligence:** it re-couples axes B+C, silently destroys information ("stalled mid-diligence" vs "never engaged"), is a silent un-approved mutation (against the human-in-the-loop guardrail), and creates a perverse "log junk to stay alive" incentive.
|
||||||
|
- The "auto" part is the **nudge = W1b nurture-gap** (see the W1/W2/W3 backlog; this refines its target set to **Engaged/Diligence**, not Commitment): daily job flags "in pipeline (Engaged/Diligence) + no activity > threshold + no open reminder" → bot **suggests a reminder**, a human confirms → re-engage (logging a comm resets `last_activity_at`) **or** consciously graveyard. The system nudges; the human acts. Deals never silently fall off.
|
||||||
|
- **Stale threshold: ONE global threshold (locked 2026-06-19).** Not stage-aware for v1 (Diligence-trips-faster was considered and deferred). Pick the amber/red day-counts at build.
|
||||||
|
- **Stale-as-a-view:** also a saved grid view keyed on `last_activity_at` (e.g. >90d, not graveyarded) — distinct from the per-stage overlay; both reuse `last_activity_at`, no new field.
|
||||||
|
|
||||||
|
**Accepted tradeoff (per-investor, not per-fund — Grant's call):** re-soliciting an existing LP for a new fund reuses their single opportunity (set fund + reset stage) — you won't see "Funded Fund I / Diligence Fund III" as two simultaneous pipeline entries. The grid's per-fund $ columns remain the record of which funds an investor is in; the pipeline shows only the *current* raise. (Per-fund stage was considered and deferred as a bigger build.)
|
||||||
|
|
||||||
|
**Concrete change set (cost asymmetry: labels/overlays are cheap; the enum is the one-time expensive bit):**
|
||||||
|
1. **Enum:** `PIPELINE_STAGES = ['lead','engaged','diligence','commitment']` (`server.py:1833`) + the ~8 mirror sites: report ordering CASEs (`server.py:3782/3859`), `nl_query/intents.py:34/37`, frontend kanban (`index.html:4168`, mock `:2174`), opp-form `<option>`s (`:7732`), and the `'funded'/'lost'` filters in `total_funded`/`pipeline_value` (`server.py:2721/3766/3877`).
|
||||||
|
2. **Data migration** of existing `opportunities.stage`: `outreach,meeting→engaged`; `due_diligence→diligence`; `committed,funded→commitment`. Reconcile the stray `lost` value (not in the settable enum) to graveyard-flag semantics.
|
||||||
|
3. **Existing-Investor flag:** derive from `total_invested > 0`, injected read-only (grid column + mobile star).
|
||||||
|
4. **Drop Longshot:** remove the derived `longshot_followup` + its deprecated view filter.
|
||||||
|
5. **Staleness overlay:** green/amber/red on the injected `pipeline_stage` by `last_activity_at`, + the stale saved view.
|
||||||
|
6. **Nudge:** specialize **W1b** to Engaged/Diligence in-pipeline deals.
|
||||||
|
|
||||||
|
Items 3–6 are cheap (derived/read-time/frontend, reuse `last_activity_at`, no migration); items 1–2 are the deliberate one-time enum + migration.
|
||||||
|
|
||||||
|
**Card presentation (mobile + grid, locked 2026-06-19):**
|
||||||
|
- **Stage chip** = one of the 4 stages, shown **only when the row is in the pipeline** (most grid rows aren't — no chip / a faint "+ Pipeline" affordance otherwise).
|
||||||
|
- **Top-right corner** = the **Priority** disposition only (star/pill when flagged, empty otherwise). Graveyard rows live in the Graveyard view / render muted — not a corner badge.
|
||||||
|
- **Existing Investor** (auto-derived, `total_invested > 0`) = its own distinct indicator (star by the name or a left accent — **not** a per-card banner; keep it restrained per `design/DESIGN.md`).
|
||||||
|
- **Last-contact recency** carries the staleness color (grey→amber→red, "Nd stale").
|
||||||
|
- This **replaces the design-mockup's INVESTOR/PROSPECT category chip** — we have no prospect/investor *type*; that two-value badge was the tool deriving committed-$>0, which is exactly our Existing-Investor flag. Feeds `design/BRIEF.md` §3a.
|
||||||
|
|
||||||
|
### Mobile-first implementation — backlog (design landed 2026-06-19)
|
||||||
|
*The `/design` round-trip is complete: the contract now describes the mobile-first system
|
||||||
|
(`design/DESIGN.md` §8 + the `mobile` token group), provenance + per-surface interaction model
|
||||||
|
are in `design/_imports/2026-06-19/`, and the input brief is `design/BRIEF.md`. This is the gap
|
||||||
|
between that contract and the current desktop-only `frontend/index.html` — the implementation
|
||||||
|
backlog. **Scoped 2026-06-19 (plan below); not yet started.**
|
||||||
|
The comps are signed-off prototypes, **not drop-in** (Claude Design runtime, seed data) — each
|
||||||
|
surface is re-authored in the app's React idiom and wired to the **real API**.*
|
||||||
|
|
||||||
|
#### Mobile PWA — installable home-screen app — BUILT 2026-06-20 (deploy pending)
|
||||||
|
|
||||||
|
**Option A (iPhone-first, no service worker).** Makes the app installable to the iOS home
|
||||||
|
screen and launch **standalone** (full-screen, no Safari chrome, dark themed status bar,
|
||||||
|
splash). Shipped: `frontend/manifest.webmanifest` (`display:standalone`, `start_url:/`,
|
||||||
|
`theme_color`/`background_color` = the brand base `#0b1118` already reserved for this in
|
||||||
|
`design/tokens.tokens.json`); square icons generated from `ten31-app-icon.svg` (full-bleed
|
||||||
|
`#0b1118` + white "T31", maskable-safe) → `icon-192.png`/`icon-512.png`/`apple-touch-icon.png`
|
||||||
|
(180); `<head>` gains `rel=manifest`, `theme-color`, the `apple-mobile-web-app-*` metas
|
||||||
|
(status bar `black` — opaque, so content never slides under the notch), `apple-touch-icon`,
|
||||||
|
and `viewport-fit=cover` (so the tab bar's existing `env(safe-area-inset-bottom)` clears the
|
||||||
|
home indicator). One pre-auth backend route serves `/manifest.webmanifest` as
|
||||||
|
`application/manifest+json` (`backend/server.py`); icons serve via the existing `/assets/`
|
||||||
|
handler. **No service worker** — on iOS the install prompt doesn't exist regardless (A2HS is
|
||||||
|
always manual via Share), standalone display needs none, and a cache-first SW would reintroduce
|
||||||
|
the stale-shell class the render-smoke gate guards against. Verified: render-smoke green +
|
||||||
|
live-curl (manifest + icons 200 pre-auth, correct content-types). **Deploy:** ships in the
|
||||||
|
next s9pk with the mobile phases.
|
||||||
|
- **Known minor:** the iOS status bar is fixed `black` at launch (can't follow the in-app
|
||||||
|
light/dark toggle); a barely-perceptible seam vs the `#0b1118` app. Acceptable; dark is default.
|
||||||
|
- **Deferred (not needed for iPhone):** a network-first service worker → Android's "Install"
|
||||||
|
prompt + faster relaunches; the JSX-precompile build-step (ROADMAP below) is the better lever
|
||||||
|
if relaunch speed is ever a felt problem.
|
||||||
|
- **Adjacent issue (not PWA, noted while here):** a phone in **landscape** can exceed the 768px
|
||||||
|
breakpoint and render the *desktop* layout; `orientation:portrait` in the manifest hints at
|
||||||
|
this but iOS ignores it for home-screen apps. Revisit if it bites during device-testing.
|
||||||
|
|
||||||
|
#### Phase 8 — conform to the FINAL Claude Design mockups (mobile) — **NEXT SESSION (scoped 2026-06-19)**
|
||||||
|
|
||||||
|
*Phases 0–7 built the mobile surfaces + light theme. Phase 8 closes the gap to the **final** design + functional parity. Two independent agent passes ran 2026-06-19 (functional-parity + visual-conformance); their findings + the source-of-truth correction below drive this plan.*
|
||||||
|
|
||||||
|
**⚠️ Anchor on the `.dc.html` prototypes at their DEFAULT props — NOT the screenshots.** The Claude Design export (`design/_imports/2026-06-19_zip-file/Venture-CRM mobile redesign/`) ships parameterized `*.dc.html` prototypes whose `data-props` **defaults are the landed decisions** (`variant:compact`, `theme:dark`, `font:plex`, **`lpFlag:earmark`** — see `GridApp.dc.html:320`). The PNG `screenshots/` are **option-history** (rejected/stale prop combos: INVESTOR/PROSPECT disposition badges, a 6-stage MEETING/FUNDED funnel, the star flag). **The per-surface build reference is `design/phase8-conformance.md`** — a re-anchored visual-conformance spec (anatomy + deltas + line refs, built against the `.dc.html` defaults); it supersedes the original screenshot-anchored pass. **The functional-parity agent report stands** (built from `store.js` + `.dc.html` wiring, not screenshots). *(General learning promoted to `standards/guides/design.md` Phase C.)* **Per-surface existing-LP signal: Grid + Pipeline = earmark corner-triangle** (Grant override 2026-06-19 — the dc used ★ for Pipeline; unify on the earmark) **· Contacts = avatar ring.** **Contacts (decided):** drop the investor/prospect type tabs (prospect type unused), but **keep a Priority-flag sort** as an enhancement beyond the dc.
|
||||||
|
|
||||||
|
**Authoritative final investor card** (from `GridApp.dc.html:84–105`, the `<sc-for>` card; supersedes the screenshot "3-zone" card): card = `--panel` bg, 1px `--border`, radius 10, padding 12×14, `--shadow-card`, gap 8. **Existing-LP = an earmark corner-triangle** top-left (18px `--accent` dog-ear via the border trick), *not* a star, *not* a left-border. Row 1: investor **name** (16px/600, left) · **PRIORITY** badge right (mono 10px pill, **only if flagged — no INVESTOR/PROSPECT disposition badge**). Row 2: committed **`$amount`** (mono 15px, left) · **4-stage pipeline-stage chip** (mono 11px pill, `lead/engaged/diligence/commitment`). Row 3: **recency** "2d ago" (mono 12px). **No contact-name footer band** (that was an older screenshot variant). The current app card is close — 8a is mostly: swap the existing-LP signal to the earmark corner-triangle, move priority to the right-side PRIORITY badge, confirm `$` placement.
|
||||||
|
|
||||||
|
**Sequenced plan (each = a reviewable commit; lands on `main` deploy-pending; Grant device-tests the whole set at the end):**
|
||||||
|
- **8a — Card re-author (Grid + Contacts)** to the spec above + earmark; **reconcile `design/DESIGN.md` §8 + tokens to this final card** (the contract may still describe the older card). Closes visual G1/G2/C2 (re-anchored).
|
||||||
|
- **8b — Detail-surface actions:** Contacts detail → bottom-sheet w/ copy-email, **Log communication**, **Open-in-Grid**, org/stage/committed summary; Pipeline detail → **comms timeline + "+ Log"**; add Open-in-Grid cross-nav to Reminders too. Closes funct GAP 2/3/5, visual C1/P1.
|
||||||
|
- **8c — Global quick-log pencil** in the mobile top bar (Grid + Contacts) → pick investor/contact → `log-communication`. Closes funct GAP 1, visual S3.
|
||||||
|
- **8d — Sort controls** (Grid + Pipeline) at Claude-design detail (name/stage/amount/staleness/priority). Closes funct GAP 4, visual G4.
|
||||||
|
- **8e — Reminders parity:** grid reminder = **read/edit/clear/create** the investor's existing reminder w/ date presets; swipe-snooze → **snooze sheet** w/ presets (not fixed +7d); **investor picker** on the Reminders-tab create sheet — **easy: the `reminders` table already has `investor_id` and `POST /api/reminders` already accepts it (server.py:3761); the standalone create sheet just omits it** (NOT a backend limit — the functional agent mislabeled it); due-chip pill + urgency dot + "Today" bucket. Closes funct DIV 1/2/3, visual R1/R2/R4/R5.
|
||||||
|
- **8f — Pipeline card + nav:** labelled `‹ Lead`/`Engaged ›` footer buttons + ★/recency on card; **clickable pager dots**; per-stage segmented tint. Closes visual P2/P3/P4, funct pager-dots. *(Pipeline **accordion mode** explicitly **NOT wanted** — Grant: a prototype variant we never used.)*
|
||||||
|
- **8g — Add-investor stage + priority:** the create sheet sets an initial pipeline stage + priority (create → `pipeline/link` at chosen stage; priority on the row). **Grant-approved product decision.**
|
||||||
|
- **8h — Loose ends:** grid create-sheet **duplicate matches tappable** → open existing investor; **logging a "Note" resets staleness** (Grant-approved — confirm `log-communication` recency rule does this); remove/handle the **dead `'snoozed'` status** (snooze = keep `open` + push due_date by design, yet still in the edit dropdown).
|
||||||
|
- **8i — Shell:** bottom-tab **SVG line icons** (replace emoji glyphs) + the **`·Ten31·` wordmark** in the top bar (currently shows the page title). Closes visual S1/S2.
|
||||||
|
|
||||||
|
**Effort note:** almost all of 8a–8i is **client wiring against endpoints that already exist** — only 8g (create→link) and 8e (reminder fields) touch create/link flows, both already-built endpoints. No new backend tables. Verify each phase with the jsdom/CSSOM harness + a commit. To see the prototypes rendered (optional): `support.js` needs `window.React` + a local server — render `*.dc.html` at default props (don't trust the screenshots).
|
||||||
|
|
||||||
|
**Prerequisite — inline-style→CSS migration: SCOPED 2026-06-19 — much smaller/divisible than
|
||||||
|
the "~1,300 inline styles" framing suggested.** Ground truth from `index.html`: **370** total
|
||||||
|
`style={{}}` objects (not 1,300), against an existing **1,861-line `<style>` block** (with
|
||||||
|
`:root` vars + ~all the `.nav-item`/`.sidebar`/`.table` classes + **4 media queries already**,
|
||||||
|
incl. a `min-width` one) and **1,088 `className=` usages** — the app is already majority
|
||||||
|
class-based. Two consequences:
|
||||||
|
- **The responsive migration that gates mobile is only ~114 inline styles**, confined to the four
|
||||||
|
mobile surfaces + shell: FundraisingGrid **70**, Reminders **18**, Contacts **17**, Pipeline **7**,
|
||||||
|
App shell **2**. The other **240** inline styles live on desktop-only pages (Settings 104,
|
||||||
|
Outreach/Email/Status 57, Thesis 44, Comms 31, Dashboard 4) that are **absent on mobile**, so they
|
||||||
|
never block it. → **Not a monolithic blocker; it divides per-surface** and folds into each surface's
|
||||||
|
build (no upfront sweep).
|
||||||
|
- **Two separable axes, not one.** (1) *Responsive* = layout-bearing inline styles → CSS classes +
|
||||||
|
`min-width` queries (the ~114 above; gates mobile layout). (2) *Theming* = inline **hex → `var()`**
|
||||||
|
so `[data-theme="light"]` can re-bind them — **183 hex literals** in the JSX region, app-wide but
|
||||||
|
mechanical (precedent: the design guide's inline-hex→`var()` field notes); gates the **light theme**
|
||||||
|
only. Sequence them apart.
|
||||||
|
|
||||||
|
**Data-layer dependency — the locked pipeline-stages/flags spec** (see the section above) lands
|
||||||
|
**first, standalone (Phase 0 below)**: the mobile cards render the 4-stage chip, the auto-derived
|
||||||
|
Existing-Investor star, and the staleness overlay, all of which need the stage enum + migration +
|
||||||
|
`total_invested>0` derivation + the `last_activity_at` ramp. Building the cards before the data
|
||||||
|
layer means hardcoding against a model that's about to change.
|
||||||
|
|
||||||
|
**Implementation plan (sequenced; decisions confirmed with Grant 2026-06-19) — fold the per-surface
|
||||||
|
migration into each surface's build, behind one shared foundation step. No upfront sweep.**
|
||||||
|
|
||||||
|
- **Phase 0 — Pipeline-stages/flags data layer — BUILT + tested locally 2026-06-19 (deploy pending).**
|
||||||
|
The locked spec above. **Shipped:** enum → `['lead','engaged','diligence','commitment']`
|
||||||
|
(`server.py`) + all mirror sites (report CASEs/filters, `total_funded`→`commitment`,
|
||||||
|
`nl_query/intents.py`); reversible migration **`0007_pipeline_stages_v2`** (outreach/meeting→engaged,
|
||||||
|
due_diligence→diligence, committed/funded→commitment, stray `lost`→archived; up+down verified on
|
||||||
|
synthetic data — the live DB has 0 opps so it's a real no-op there); backend injection of
|
||||||
|
`existing_investor` (`total_invested>0`), `last_activity_at`, and `staleness` (`''`/`aging`≥30d/
|
||||||
|
`stale`≥60d, boundaries inclusive) into the grid GET + stripped on write (`_computed_row_values` +
|
||||||
|
frontend `stripComputedRows`); frontend enum sites (Pipeline board, opp-form, mock) + a 4-stage
|
||||||
|
`pipeline_stage` chip with DESIGN tints. **Drop Longshot (spec item 4) was already done** by prior
|
||||||
|
cleanup (vestigial empty column + strip code) — left as-is (still cleans legacy blobs). Tests:
|
||||||
|
`test_pipeline_stages_v2.py` (migration remap + derivation values/boundaries) + updated
|
||||||
|
`test_grid_pipeline_link`/`test_soft_delete_reads`/`nl_query`; **36/36 suite green, render-smoke
|
||||||
|
green, fresh-DB migrate clean**. **Deferred to Phase 3 (co-lands with the mobile cards, where the
|
||||||
|
card design specifies them):** the *visible* desktop rendering of the existing-investor star + the
|
||||||
|
staleness-colored recency column + the seeded "Stale" saved view — the data is injected and
|
||||||
|
test-locked now, so Phase 3 is pure frontend. W1b nudge specialization is a separate fast-follow.
|
||||||
|
**Deploy:** needs an s9pk build + install (**authorize first**).
|
||||||
|
- **Phase 1 — Shared mobile foundation — BUILT 2026-06-19 (deploy pending).** Shipped: `:root` mobile
|
||||||
|
vars (`--mobile-tab-bar-h`/`--mobile-touch-target`/`--mobile-input-h`/`--mobile-sheet-radius`/screen-pad +
|
||||||
|
fonts + `--text-subtle`/`--border-strong`); CSS for the safe-area-aware **`.bottom-tab-bar`**, the
|
||||||
|
**`.bottom-sheet`/`.sheet-scrim`/`.sheet-handle`** primitive (styling), and `.mobile-only`/`.desktop-only`
|
||||||
|
utilities — all `display:none` on desktop so **zero desktop change**; the **4-tab bottom bar**
|
||||||
|
(Grid·Pipeline·Reminders·Contacts → `setPage`) + a **mobile account/logout popover** wired into `App`
|
||||||
|
(sidebar already CSS-hidden <768px). Render-smoke green. **Deliberately deferred:** (a) the
|
||||||
|
**`<BottomSheet>` React component + `useIsMobile()` hook** → Phase 2, designed against their first real
|
||||||
|
consumer (no dead code); (b) the **13→15px type bump is per-surface**, not a global body rule — `body`
|
||||||
|
has no base font-size, so it lands as each surface is re-authored (Phases 2–5); (c) the
|
||||||
|
`[data-theme="light"]` block → Phase 6 (dead without the toggle). Browser-interaction (the bar on a real
|
||||||
|
phone) untested, like view-reorder.
|
||||||
|
- **Phase 2 — Contacts (pattern-validator spike, BEFORE the Grid) — BUILT 2026-06-19 (deploy pending).**
|
||||||
|
Read-only A–Z directory (sticky letter headers, sorted/sectioned by last name) + segmented
|
||||||
|
All/Investors/Prospects tabs + pinned search → **full-screen read-only detail** (`.fs-detail`, promotes
|
||||||
|
the slide-over: contact info w/ tap-to-copy email, opportunities, communication history) → **sort
|
||||||
|
BottomSheet** (the sheet primitive's first, read-only consumer: Name A–Z / Z–A / Recently-contacted —
|
||||||
|
restores the column-sort the card list loses). Proves the list→detail→sheet pattern + per-surface
|
||||||
|
migration mechanics on the lowest-risk surface before the crux. *(Reordered ahead of the earlier "Grid
|
||||||
|
first" draft.)* **Lands the shared primitives** (deferred from Phase 1, designed against this first
|
||||||
|
consumer — no dead code): **`<BottomSheet>`** (scrim/Escape/**pointer drag-to-dismiss**, mount enter/exit
|
||||||
|
animation, built on the Phase-1 `.bottom-sheet` CSS) + **`useIsMobile()`** (768px `matchMedia`; surfaces
|
||||||
|
swap via a rules-of-hooks-safe wrapper — `ContactsPage` → `Desktop`/`MobileContactsPage`, **zero desktop
|
||||||
|
change**). This surface's **15px body bump** lands on `.mobile-screen`. Writes: **none** — Contacts is
|
||||||
|
read-only on mobile per `BRIEF.md` §3b (create/edit live on the Grid). Grew the `:root`/mobile var set
|
||||||
|
(`--bg-input`, `--accent-light`, mobile card/control radii + card/screen/detail-title fonts) per DESIGN §9.
|
||||||
|
Verified: render-smoke green + a throwaway jsdom interaction harness (mounted the real app at 375px,
|
||||||
|
stubbed `/api/contacts` — list/grouping/sort-sheet/detail/back all asserted, 14/14). **No browser/real-phone
|
||||||
|
check yet** (same deferral as Phase 1 + view-reorder). **Deploy:** folds into the next s9pk build.
|
||||||
|
- **Phase 3 — Fundraising Grid (the crux). P3a BUILT 2026-06-19 (deploy pending); P3b (name/pill edit) deferred.**
|
||||||
|
Split confirmed with Grant 2026-06-19: P3a ships the readable + already-write-supported surface now;
|
||||||
|
editing an existing investor's **name + contact pills** is **P3b** (needs a new narrow per-row PATCH +
|
||||||
|
a pill-editor UI — `log-communication` can't rename/edit pills, and the whole-grid PUT is forbidden on
|
||||||
|
mobile).
|
||||||
|
- **P3a (built):** lean **`MobileFundraisingGrid`** (separate component — the desktop grid's debounced
|
||||||
|
whole-grid-PUT autosave would race on every mobile edit, so it's NOT reused; `FundraisingGridPage` is
|
||||||
|
now a `useIsMobile()` wrapper → `Desktop`/`Mobile`, desktop untouched). Card list over the **active
|
||||||
|
view** (ported the desktop view-filter predicate — graveyard/follow-up/lead flags + columnFilters — to
|
||||||
|
a shared pure helper so it can't drift), tappable view-name → **view-picker sheet**, search, the locked
|
||||||
|
**card model** (name · committed $ via `formatMoneyMobile` · stage chip · staleness-colored recency ·
|
||||||
|
Existing-Investor left-accent · Priority corner; graveyard muted). Full-screen detail (read-only:
|
||||||
|
commitments/funds, contact pills, notes) + **edit sheets**: **log a note** (`log-communication`),
|
||||||
|
**pipeline stage** (linked → `PATCH /api/opportunities/{id}/stage` via the new injected `opportunity_id`;
|
||||||
|
unlinked → `pipeline/link` then it; + remove-from-pipeline), **set a reminder** (`POST /api/reminders`),
|
||||||
|
and **`+ New` investor** (`log-communication` + `create_investor_if_missing`, client-side dedup
|
||||||
|
typeahead). **Never whole-grid `PUT /state`.** Backend: one small hook — read-only **`opportunity_id`**
|
||||||
|
injected into grid rows (`opportunity_id_by_source_row`, added to both strip points), so the detail can
|
||||||
|
PATCH the linked opp directly. Tests: `test_grid_pipeline_link` extended (opp_id inject/strip/round-trip),
|
||||||
|
36/36 green; render-smoke green; a throwaway stateful jsdom harness drove the real surface at 375px
|
||||||
|
(view filter, picker, detail, stage-PATCH, log-note, reminder, create+dedup — 18/18). **No real-phone
|
||||||
|
check yet** (same deferral as P1/P2). **Deploy:** folds into the next s9pk.
|
||||||
|
- **P3b (deferred):** `POST /api/fundraising/update-row` (version-safe single-row name/contacts mutation,
|
||||||
|
+test) + the bottom-sheet **pill editor** (add/edit/remove pills, client-side dedup). Then name + pills
|
||||||
|
become editable on an existing investor, completing BRIEF §3a's editable set.
|
||||||
|
- **Phase 4 — Pipeline — BUILT 2026-06-19 (deploy pending).** Lean **`MobilePipeline`** (separate
|
||||||
|
component; `PipelinePage` is now a `useIsMobile()` wrapper → `Desktop`/`Mobile`, desktop kanban
|
||||||
|
untouched, just renamed `DesktopPipelinePage`). **Swipe-between-stages:** a count-forward segmented
|
||||||
|
stage control (`.pipeline-seg`) + a horizontal **CSS scroll-snap** container of four full-width stage
|
||||||
|
pages (`.pipeline-swipe`/`.pipeline-stage-page`) + page dots; tapping a segment scrolls to its page,
|
||||||
|
scrolling syncs the active segment/dots. Each card shows opp name · contact·org · expected $, with
|
||||||
|
per-card **‹/› stage move** (`PATCH /api/opportunities/{id}/stage`, disabled at the lead/commitment
|
||||||
|
boundaries) — the kanban "advance" without opening the detail. Tap a card → full-screen `.fs-detail`
|
||||||
|
(read-only `OpportunityDetailPanel`-equivalent fields via `MobileDetailRow` + a `StageChip`) with a
|
||||||
|
**stage-picker `BottomSheet`**. **Opp-centric** (operates on the same `opportunities` rows + stage
|
||||||
|
endpoint as the desktop board and the Grid detail's stage edit), amounts read-only; **no Existing-Investor
|
||||||
|
star** (opps carry `fundraising_investor_id` but not `total_invested`). Removal/deletion stays on the
|
||||||
|
desktop board + the Grid detail's "remove from pipeline" — the Pipeline tab is **view + advance-stage
|
||||||
|
only**. A `reviewer` pass was applied (reset the detail's stage-sheet open-state on back; `moveStage`
|
||||||
|
awaits the PATCH). Verified: render-smoke green + a throwaway jsdom 375px harness drove the real surface
|
||||||
|
(seg counts, stage pages, segment/dot sync, ‹/› move re-bucketing, detail + stage-sheet PATCH, back — 12/12).
|
||||||
|
No real-phone check yet (same deferral as P1–P3a). Reuses the P2/P3a primitives directly; **no backend
|
||||||
|
change.** **Deploy:** folds into the next s9pk.
|
||||||
|
- **Phase 5 — Reminders — BUILT 2026-06-19 (deploy pending).** Lean **`MobileReminders`**
|
||||||
|
(`RemindersPage` is now a `useIsMobile()` wrapper → `Desktop`/`Mobile`; the desktop page renamed
|
||||||
|
`DesktopRemindersPage`, otherwise untouched). **Urgency-grouped list** over `/api/reminders`
|
||||||
|
(Overdue → Due soon → Later → Done → Cancelled buckets via `reminderBucket`; group headers carry the
|
||||||
|
overdue-red/due-soon-amber tint) with a compact **Active/Done/All** segmented filter + **`+ New`**.
|
||||||
|
Each row is a **`ReminderRow`** pointer-drag swipe (own per-row drag state): **swipe-left → mark done**,
|
||||||
|
**swipe-right → snooze +7d** (threshold 70px; snooze keeps status `open` and pushes `due_date`, mirroring
|
||||||
|
the desktop's "no wake mechanism" rationale), a **tap → create/edit `BottomSheet`** (title · due date ·
|
||||||
|
investor *(create-only free-text label — PATCH can't change investor, matching the backend)* · assignee
|
||||||
|
*(if `/api/users` is readable)* · details · status *(edit-only)* · Delete). Vertical-dominant drags release
|
||||||
|
to list scroll; non-swipeable (done/cancelled) rows stay tap-to-edit. Added `formatDueShort`/`reminderDueDelta`
|
||||||
|
(local-midnight delta — the desktop `formatDate` mis-renders FUTURE dates). A `reviewer` pass was applied
|
||||||
|
(**`pointercancel` no longer fires a spurious mark-done** — the key fix; stray drag on a non-swipeable row
|
||||||
|
recovers as a tap; cancelled gets its own bucket header). **No backend change.** Verified: render-smoke
|
||||||
|
green + a throwaway jsdom 375px harness (grouping/counts, swipe done + snooze PATCH, tap→edit prefilled,
|
||||||
|
create POST, Done-filter reload — 12/12). No real-phone check yet (same deferral as P1–P4). **Deploy:**
|
||||||
|
folds into the next s9pk.
|
||||||
|
- **Phase 6 — Light theme + toggle — BUILT 2026-06-19 (deploy pending).** App-wide light theme
|
||||||
|
behind a `:root[data-theme="light"]` switch; **dark stays the default** (a pre-paint boot script
|
||||||
|
in `<head>` reads `localStorage.venture_crm_theme`, no `prefers-color-scheme` auto-switch). Toggle
|
||||||
|
is **app-wide**: a labeled control in the desktop sidebar footer + a sun/moon icon in the mobile
|
||||||
|
top bar, both driven by one `theme` state in `App` (single source of truth). Authoritative color
|
||||||
|
pairs came from the **full Claude Design export** (`design/_imports/2026-06-19_zip-file/`, incl. the
|
||||||
|
previously-missing `store.js` + the four `*App.dc.html` `DCLogic` palettes) — every stage/recency/
|
||||||
|
note/priority/reminder/money tint uses the designer's exact dark+light values, not guesses.
|
||||||
|
- **Method (zero dark-mode regression by construction):** grew `:root` to a full token set (44
|
||||||
|
themed color slots) whose **dark values equal the originals byte-for-byte**, so `var(--x)`
|
||||||
|
resolves identically in dark — verified (16 exact-match vars asserted == their original hex).
|
||||||
|
Migrated **319 hex literals → `var()`** across the JSX inline region *and* the `<style>` block
|
||||||
|
(a context-aware script for the unambiguous structural ones; targeted edits for the
|
||||||
|
context-dependent semantic/chip helpers — `StageChip` is now className-based off
|
||||||
|
`.stage-chip--{stage}`, `PIPELINE_STAGE_CHIP` deleted). All mobile surfaces + chrome are fully
|
||||||
|
var-based → **mobile light is complete**.
|
||||||
|
- **Known light rough edges (desktop only — for the conformance pass below, NOT mobile):** a
|
||||||
|
handful of bespoke `<style>`-block desktop shades (login glow/gradients `#101926`/`#4a9adf`,
|
||||||
|
scrollbar `#36506a`, desktop table row-hover `#172435`, dashboard KPI green `#10b981`) and the
|
||||||
|
legacy off-palette `.badge-*` family (`#ec407a`/`#ff9800`/etc., partly vestigial old-stage
|
||||||
|
badges) stay dark-tuned in light; **shadows** also stay dark-tuned (not yet tokenized). A few
|
||||||
|
desktop one-off shades were *consolidated* to the nearest token (small intentional dark deltas:
|
||||||
|
e.g. `#2a3a4d`→border, `#d9a15f`→due-soon, `#f3b2b2`→danger-text) — enumerate when polishing.
|
||||||
|
- **Verified:** render-smoke green (Babel transform + jsdom mount); a throwaway jsdom interaction
|
||||||
|
harness mounted the **authed shell** and exercised the toggle (boot-defaults-dark → click→light
|
||||||
|
+persist+relabel → click→dark, 7/7); theme parity + dark-identity + no-undefined-var checks all
|
||||||
|
green. **No real-phone / real-browser check yet** (same deferral as P1–P5 — verify on a device).
|
||||||
|
**Deploy:** folds into the next s9pk.
|
||||||
|
|
||||||
|
- **Phase 7 (NEW) — Full design-conformance pass against the complete Claude Design export
|
||||||
|
(`design/_imports/2026-06-19_zip-file/`).** Per Grant (2026-06-19): make sure *everything we
|
||||||
|
implement* matches what he built in Claude Design — **all buttons, colors, spacing, and
|
||||||
|
functionality**, across all four surfaces + the light theme. Concretely: (1) run `design-checker`
|
||||||
|
now that the surfaces exist; (2) reconcile remaining drift (the P6 desktop light rough edges
|
||||||
|
above — bespoke `<style>` shades, the legacy `.badge-*` family, themed shadows; plus any
|
||||||
|
button/interaction deltas vs the `*App.dc.html` comps + the ~25 `screenshots/`); (3) re-pull
|
||||||
|
anything still cloud-only. This is the "conform to the design" sweep that the per-surface builds
|
||||||
|
(P2–P6) deferred.
|
||||||
|
|
||||||
|
**Note on `design-checker`:** not run for this round-trip — it audits *existing* UI conformance,
|
||||||
|
and the desktop UI still conforms to §1–7 (unchanged). The mobile gap is greenfield
|
||||||
|
implementation (captured here), not conformance drift, so there's nothing for it to flag yet; run
|
||||||
|
it after the mobile surfaces exist.
|
||||||
|
|
||||||
## Definition of done for "Airtable substitute" v1
|
## Definition of done for "Airtable substitute" v1
|
||||||
- Team can manage all investors in one master table
|
- Team can manage all investors in one master table
|
||||||
- Saved views replicate current Airtable workflows
|
- Saved views replicate current Airtable workflows
|
||||||
|
|||||||
@@ -0,0 +1,483 @@
|
|||||||
|
"""Daily activity digest — content builder (Phase B).
|
||||||
|
|
||||||
|
Assembles the per-user -> per-investor email-activity digest and summarizes each
|
||||||
|
team member's day with ONE narrative paragraph from the LOCAL Spark model
|
||||||
|
(ingest/llm.py via Spark Control). NEVER Claude: the digest is deliberately
|
||||||
|
un-anonymized (real LP names + email substance), so every summarization stays on
|
||||||
|
Ten31 infra. Keeping the substance local is the whole point — this is the one
|
||||||
|
path that intentionally bypasses the scrub -> Claude -> re-hydrate boundary.
|
||||||
|
|
||||||
|
This is an internal ops email to the team's own admins, so it is exempt from the
|
||||||
|
"agents draft, humans send" rule — that rule governs outward LP/prospect contact,
|
||||||
|
not an internal digest to the fund's own inboxes. Never extend it to send to LPs.
|
||||||
|
|
||||||
|
Soft-delete: every read here filters the relevant tombstones —
|
||||||
|
`email_account_messages.deleted_at IS NULL`, `users.is_active = 1`, and the
|
||||||
|
org/contact name joins drop soft-deleted rows (falling back to the raw address).
|
||||||
|
|
||||||
|
Stdlib only; the local-LLM client is imported lazily so this module stays
|
||||||
|
importable (and testable with an injected chat fn) without Spark configured.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
|
|
||||||
|
# One row per (account-sighting x investor-link) in the window. Grouped into
|
||||||
|
# per-user buckets in Python. Investor display name resolves fundraising grid ->
|
||||||
|
# organization -> contact -> the raw matched address, skipping soft-deleted
|
||||||
|
# org/contact rows (fundraising_investors has no soft-delete column — it is a
|
||||||
|
# rebuilt projection of the grid).
|
||||||
|
_ACTIVITY_SQL = """
|
||||||
|
SELECT
|
||||||
|
ea.user_id AS user_id,
|
||||||
|
u.username AS username,
|
||||||
|
u.full_name AS full_name,
|
||||||
|
ea.email_address AS account_email,
|
||||||
|
eam.is_sent AS is_sent,
|
||||||
|
e.id AS email_id,
|
||||||
|
e.from_email AS from_email,
|
||||||
|
e.subject AS subject,
|
||||||
|
e.body_text AS body_text,
|
||||||
|
e.snippet AS snippet,
|
||||||
|
e.sent_at AS sent_at,
|
||||||
|
COALESCE(
|
||||||
|
NULLIF(TRIM(fi.investor_name), ''),
|
||||||
|
NULLIF(TRIM(o.name), ''),
|
||||||
|
NULLIF(TRIM(COALESCE(c.first_name, '') || ' ' || COALESCE(c.last_name, '')), ''),
|
||||||
|
eil.matched_address
|
||||||
|
) AS investor_name
|
||||||
|
FROM email_account_messages eam
|
||||||
|
JOIN email_accounts ea ON ea.id = eam.account_id
|
||||||
|
JOIN users u ON u.id = ea.user_id
|
||||||
|
JOIN emails e ON e.id = eam.email_id
|
||||||
|
JOIN email_investor_links eil ON eil.email_id = e.id
|
||||||
|
LEFT JOIN fundraising_investors fi ON fi.id = eil.fundraising_investor_id
|
||||||
|
LEFT JOIN organizations o ON o.id = eil.organization_id AND o.deleted_at IS NULL
|
||||||
|
LEFT JOIN contacts c ON c.id = eil.contact_id AND c.deleted_at IS NULL
|
||||||
|
WHERE eam.deleted_at IS NULL
|
||||||
|
AND u.is_active = 1
|
||||||
|
AND e.is_matched = 1
|
||||||
|
AND e.sent_at >= ? AND e.sent_at < ?
|
||||||
|
ORDER BY u.full_name, u.username, e.sent_at ASC
|
||||||
|
"""
|
||||||
|
|
||||||
|
_RULE = "─" * 52
|
||||||
|
_FOOTER = ("— Internal Ten31 CRM digest. Narratives are generated locally (Spark), "
|
||||||
|
"never Claude. This mailbox is unmonitored.")
|
||||||
|
|
||||||
|
_SYSTEM = (
|
||||||
|
"You write a brief internal activity digest for a venture fund's partners. "
|
||||||
|
"Given one team member's emails with investors over the last day, write 2-4 "
|
||||||
|
"sentences summarizing what they did: which investors they engaged and the "
|
||||||
|
"gist of each thread. Name the investors. Past tense, plain prose, no "
|
||||||
|
"greeting, no bullet points, no sign-off."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Reminders due is a current-state addendum (what needs action now), NOT bound to the
|
||||||
|
# email-activity window — a 6 PM digest should surface what's overdue / due today.
|
||||||
|
# status='open' only: a 'snoozed' reminder is an explicit mute, so it stays out of the
|
||||||
|
# digest by design (the quick-snooze UI keeps a reminder 'open' with a pushed-out date).
|
||||||
|
_REMINDERS_SQL = """
|
||||||
|
SELECT r.title AS title,
|
||||||
|
r.due_date AS due_date,
|
||||||
|
r.investor_name AS investor_name,
|
||||||
|
COALESCE(NULLIF(TRIM(u.full_name), ''), u.username) AS assignee
|
||||||
|
FROM reminders r
|
||||||
|
LEFT JOIN users u ON u.id = r.assignee_id
|
||||||
|
WHERE r.deleted_at IS NULL
|
||||||
|
AND r.status = 'open'
|
||||||
|
AND r.due_date IS NOT NULL AND TRIM(r.due_date) != ''
|
||||||
|
AND substr(r.due_date, 1, 10) <= ?
|
||||||
|
ORDER BY r.due_date ASC
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ collection
|
||||||
|
|
||||||
|
def _fetch_activity_rows(conn, since_iso, until_iso):
|
||||||
|
"""Raw (sighting x investor-link) rows for the window. [] if email tables absent."""
|
||||||
|
try:
|
||||||
|
return conn.execute(_ACTIVITY_SQL, (since_iso, until_iso)).fetchall()
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
return [] # email tables not present (integration disabled) — nothing to report
|
||||||
|
|
||||||
|
|
||||||
|
def _own_addresses(conn):
|
||||||
|
"""Lower-cased set of enrolled mailbox addresses — used to decide whether an
|
||||||
|
email is outbound (from us) or inbound (from the investor) at the email level."""
|
||||||
|
try:
|
||||||
|
return {(r[0] or "").lower().strip()
|
||||||
|
for r in conn.execute("SELECT email_address FROM email_accounts")}
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
|
||||||
|
def collect_user_activity(conn, since_iso, until_iso):
|
||||||
|
"""Return per-user activity buckets for emails in [since_iso, until_iso).
|
||||||
|
|
||||||
|
Each bucket: {user_id, username, full_name, account_email, emails[], investors[],
|
||||||
|
sent, received, total}. Empty list if the email tables are absent. Only users
|
||||||
|
who had activity appear. Direction here is per-mailbox (eam.is_sent): did THIS
|
||||||
|
user send the message."""
|
||||||
|
rows = _fetch_activity_rows(conn, since_iso, until_iso)
|
||||||
|
groups = {}
|
||||||
|
for r in rows:
|
||||||
|
uid = r["user_id"]
|
||||||
|
g = groups.get(uid)
|
||||||
|
if g is None:
|
||||||
|
g = {"user_id": uid, "username": r["username"], "full_name": r["full_name"],
|
||||||
|
"account_email": r["account_email"], "_emails": {}, "_inv": set()}
|
||||||
|
groups[uid] = g
|
||||||
|
eid = r["email_id"]
|
||||||
|
em = g["_emails"].get(eid)
|
||||||
|
if em is None:
|
||||||
|
em = {"email_id": eid,
|
||||||
|
"direction": "sent" if r["is_sent"] else "received",
|
||||||
|
"subject": r["subject"], "sent_at": r["sent_at"],
|
||||||
|
"text": r["body_text"] or r["snippet"] or "", "investors": []}
|
||||||
|
g["_emails"][eid] = em
|
||||||
|
inv = (r["investor_name"] or "").strip()
|
||||||
|
if inv:
|
||||||
|
if inv not in em["investors"]:
|
||||||
|
em["investors"].append(inv)
|
||||||
|
g["_inv"].add(inv)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for g in groups.values():
|
||||||
|
emails = sorted(g["_emails"].values(), key=lambda e: e["sent_at"] or "")
|
||||||
|
sent = sum(1 for e in emails if e["direction"] == "sent")
|
||||||
|
out.append({
|
||||||
|
"user_id": g["user_id"], "username": g["username"],
|
||||||
|
"full_name": g["full_name"], "account_email": g["account_email"],
|
||||||
|
"emails": emails, "investors": sorted(g["_inv"]),
|
||||||
|
"sent": sent, "received": len(emails) - sent, "total": len(emails),
|
||||||
|
})
|
||||||
|
out.sort(key=lambda x: (x["full_name"] or x["username"] or "").lower())
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def collect_investor_activity(conn, since_iso, until_iso):
|
||||||
|
"""Re-pivot the same window by investor (across the whole team), deduping each
|
||||||
|
email so a reply to several team members counts once. Direction is decided at
|
||||||
|
the EMAIL level: outbound if the sender is one of our mailboxes, else inbound.
|
||||||
|
|
||||||
|
Each bucket: {name, emails[{email_id, direction in/out, subject, sent_at,
|
||||||
|
members[]}], inbound, outbound, total}. Sorted most-active first."""
|
||||||
|
rows = _fetch_activity_rows(conn, since_iso, until_iso)
|
||||||
|
own = _own_addresses(conn)
|
||||||
|
groups = {}
|
||||||
|
for r in rows:
|
||||||
|
name = (r["investor_name"] or "").strip() or "(unmatched)"
|
||||||
|
g = groups.get(name)
|
||||||
|
if g is None:
|
||||||
|
g = {"name": name, "_emails": {}}
|
||||||
|
groups[name] = g
|
||||||
|
eid = r["email_id"]
|
||||||
|
em = g["_emails"].get(eid)
|
||||||
|
if em is None:
|
||||||
|
outbound = (r["from_email"] or "").lower().strip() in own
|
||||||
|
em = {"email_id": eid, "direction": "out" if outbound else "in",
|
||||||
|
"subject": r["subject"], "sent_at": r["sent_at"], "members": []}
|
||||||
|
g["_emails"][eid] = em
|
||||||
|
# Attribute the sending team member on outbound mail (the sighting with
|
||||||
|
# is_sent=1); inbound is "from them", so no member shown.
|
||||||
|
if em["direction"] == "out" and r["is_sent"]:
|
||||||
|
who = (r["full_name"] or r["username"] or "").strip()
|
||||||
|
if who and who not in em["members"]:
|
||||||
|
em["members"].append(who)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for g in groups.values():
|
||||||
|
emails = sorted(g["_emails"].values(), key=lambda e: e["sent_at"] or "")
|
||||||
|
inbound = sum(1 for e in emails if e["direction"] == "in")
|
||||||
|
out.append({"name": g["name"], "emails": emails,
|
||||||
|
"inbound": inbound, "outbound": len(emails) - inbound, "total": len(emails)})
|
||||||
|
out.sort(key=lambda x: (-x["total"], x["name"].lower()))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def collect_due_reminders(conn, today_iso):
|
||||||
|
"""Open reminders due on or before `today_iso` (overdue + due today), soft-delete
|
||||||
|
filtered. Returns [{title, due_date, investor_name, assignee, overdue}] sorted soonest
|
||||||
|
first. Empty if the reminders table is absent (feature not migrated on this box)."""
|
||||||
|
try:
|
||||||
|
rows = conn.execute(_REMINDERS_SQL, (today_iso,)).fetchall()
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
return []
|
||||||
|
out = []
|
||||||
|
for r in rows:
|
||||||
|
due = str(r["due_date"] or "")[:10]
|
||||||
|
out.append({
|
||||||
|
"title": (r["title"] or "").strip(),
|
||||||
|
"due_date": due,
|
||||||
|
"investor_name": (r["investor_name"] or "").strip(),
|
||||||
|
"assignee": (r["assignee"] or "").strip(),
|
||||||
|
"overdue": bool(due and due < today_iso),
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ policy
|
||||||
|
|
||||||
|
DIGEST_POLICY_KEY = "digest_policy"
|
||||||
|
DEFAULT_DIGEST_POLICY = {"enabled": False, "send_hour": 18}
|
||||||
|
|
||||||
|
|
||||||
|
def load_digest_policy(conn):
|
||||||
|
"""Resolve the live digest policy. Precedence: the app_settings DB row (the
|
||||||
|
admin-panel control) wins; absent that, the CRM_DIGEST_ENABLED/SEND_HOUR env
|
||||||
|
vars seed a first-boot default; absent those, DEFAULT_DIGEST_POLICY. Returns
|
||||||
|
{enabled: bool, send_hour: int 0-23}. Shared by the server (API) and the
|
||||||
|
scheduler so both read one source of truth."""
|
||||||
|
pol = dict(DEFAULT_DIGEST_POLICY)
|
||||||
|
|
||||||
|
env_enabled = os.environ.get("CRM_DIGEST_ENABLED")
|
||||||
|
if env_enabled is not None:
|
||||||
|
pol["enabled"] = env_enabled.lower() in ("1", "true", "yes", "on")
|
||||||
|
env_hour = os.environ.get("CRM_DIGEST_SEND_HOUR")
|
||||||
|
if env_hour:
|
||||||
|
try:
|
||||||
|
pol["send_hour"] = min(23, max(0, int(env_hour)))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT value_json FROM app_settings WHERE key = ?", (DIGEST_POLICY_KEY,)).fetchone()
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
row = None
|
||||||
|
if row:
|
||||||
|
try:
|
||||||
|
saved = json.loads(row["value_json"])
|
||||||
|
except Exception:
|
||||||
|
saved = None
|
||||||
|
if isinstance(saved, dict):
|
||||||
|
if "enabled" in saved:
|
||||||
|
pol["enabled"] = bool(saved["enabled"])
|
||||||
|
if "send_hour" in saved:
|
||||||
|
try:
|
||||||
|
pol["send_hour"] = min(23, max(0, int(saved["send_hour"])))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
return pol
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ window
|
||||||
|
|
||||||
|
# Cap a manual/preview window so an admin can't accidentally fire a build over
|
||||||
|
# years of history — each active user in the window costs one Spark call. ~3
|
||||||
|
# months covers any realistic "since last quarter" preview.
|
||||||
|
MAX_WINDOW_DAYS = 92
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_digest_window(*, hours=None, since=None, now_local=None, now_utc=None):
|
||||||
|
"""Resolve a digest content window to (since_iso, until_iso) as UTC ISO-8601.
|
||||||
|
|
||||||
|
`until` is always now. The start is driven by exactly one of:
|
||||||
|
- since: a local calendar date 'YYYY-MM-DD' -> that day's local midnight
|
||||||
|
- hours: a positive integer lookback (the default path; 24 when nothing given)
|
||||||
|
`since` wins if both are supplied. The span is clamped to MAX_WINDOW_DAYS and
|
||||||
|
the start must be strictly before now. Raises ValueError on malformed input so
|
||||||
|
the caller can return a clean 400. Pure (now_* injectable) for testing.
|
||||||
|
|
||||||
|
Used by the admin-panel preview and manual-send — neither advances the daily
|
||||||
|
cursor, so a wide window here never suppresses the scheduled digest."""
|
||||||
|
nu = (now_utc or datetime.now(timezone.utc)).astimezone(timezone.utc)
|
||||||
|
nl = now_local or datetime.now().astimezone()
|
||||||
|
floor = nu - timedelta(days=MAX_WINDOW_DAYS)
|
||||||
|
|
||||||
|
if since not in (None, ""):
|
||||||
|
try:
|
||||||
|
d = datetime.strptime(str(since).strip()[:10], "%Y-%m-%d")
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError("since must be a date in YYYY-MM-DD form")
|
||||||
|
start = d.replace(tzinfo=nl.tzinfo or timezone.utc).astimezone(timezone.utc)
|
||||||
|
else:
|
||||||
|
h = 24 if hours in (None, "") else hours
|
||||||
|
try:
|
||||||
|
h = int(h)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
raise ValueError("hours must be an integer")
|
||||||
|
if h < 1:
|
||||||
|
raise ValueError("hours must be a positive integer")
|
||||||
|
start = nu - timedelta(hours=h)
|
||||||
|
|
||||||
|
if start >= nu:
|
||||||
|
raise ValueError("window start must be before now")
|
||||||
|
if start < floor:
|
||||||
|
start = floor # clamp to the max span (the response echoes the real window)
|
||||||
|
|
||||||
|
fmt = "%Y-%m-%dT%H:%M:%SZ"
|
||||||
|
return start.strftime(fmt), nu.strftime(fmt)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ summarization
|
||||||
|
|
||||||
|
def _default_chat(prompt, system=None, max_tokens=220):
|
||||||
|
"""Lazily reach the local Qwen chat via Spark Control (ingest/llm.py)."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "ingest"))
|
||||||
|
import llm # noqa: E402
|
||||||
|
return llm.chat(prompt, system=system, max_tokens=max_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
def _user_email_block(group, max_emails=20, body_chars=500):
|
||||||
|
lines = []
|
||||||
|
for em in group["emails"][:max_emails]:
|
||||||
|
invs = ", ".join(em["investors"]) or "(unmatched)"
|
||||||
|
body = " ".join((em.get("text") or "").split())[:body_chars]
|
||||||
|
line = f"- [{em['direction']}] {invs} | subject: {em.get('subject') or '(none)'}"
|
||||||
|
if body:
|
||||||
|
line += f" | {body}"
|
||||||
|
lines.append(line)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback_narrative(group):
|
||||||
|
"""Deterministic summary when the local model is unavailable — the digest
|
||||||
|
must still send (always-send) with real counts rather than fail."""
|
||||||
|
name = group.get("full_name") or group.get("username") or "Team member"
|
||||||
|
invs = ", ".join(group["investors"]) or "no matched investors"
|
||||||
|
return (f"{name} had {group['total']} email(s) "
|
||||||
|
f"({group['sent']} sent, {group['received']} received) with {invs}. "
|
||||||
|
"(Local summary unavailable.)")
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_user_day(group, chat_fn=None):
|
||||||
|
"""One narrative paragraph for a user's day, from the local model. Falls back
|
||||||
|
to a deterministic count summary on any error or empty reply."""
|
||||||
|
fn = chat_fn or _default_chat
|
||||||
|
name = group.get("full_name") or group.get("username") or "The team member"
|
||||||
|
prompt = f"Team member: {name}\nEmails:\n{_user_email_block(group)}"
|
||||||
|
try:
|
||||||
|
out = fn(prompt, system=_SYSTEM, max_tokens=220)
|
||||||
|
out = " ".join((out or "").split()).strip()
|
||||||
|
if out:
|
||||||
|
return out
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return _fallback_narrative(group)
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------ composition
|
||||||
|
|
||||||
|
def _parse_iso(iso):
|
||||||
|
if not iso:
|
||||||
|
return None
|
||||||
|
s = str(iso).strip()
|
||||||
|
for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S"):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(s, fmt).replace(tzinfo=timezone.utc)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_local(iso):
|
||||||
|
"""UTC ISO -> human local time, e.g. 'Jun 17 2:14 PM'. Manual 12h formatting
|
||||||
|
to stay portable (no platform-specific %-I)."""
|
||||||
|
dt = _parse_iso(iso)
|
||||||
|
if dt is None:
|
||||||
|
return str(iso)
|
||||||
|
dt = dt.astimezone()
|
||||||
|
hour12 = dt.hour % 12 or 12
|
||||||
|
ampm = "AM" if dt.hour < 12 else "PM"
|
||||||
|
return f"{dt.strftime('%b')} {dt.day} {hour12}:{dt.minute:02d} {ampm}"
|
||||||
|
|
||||||
|
|
||||||
|
def _reminders_section(due_reminders):
|
||||||
|
"""Render the 'reminders due' block (overdue + due today). An empty list renders
|
||||||
|
nothing, so a clear deck adds no noise to the digest."""
|
||||||
|
if not due_reminders:
|
||||||
|
return []
|
||||||
|
overdue = [r for r in due_reminders if r["overdue"]]
|
||||||
|
due_today = [r for r in due_reminders if not r["overdue"]]
|
||||||
|
|
||||||
|
def _line(r):
|
||||||
|
inv = f"{r['investor_name']} — " if r["investor_name"] else ""
|
||||||
|
who = f" [{r['assignee']}]" if r["assignee"] else ""
|
||||||
|
return f" • {inv}{r['title']} (due {r['due_date']}){who}"
|
||||||
|
|
||||||
|
L = ["", _RULE, f"REMINDERS DUE ({len(due_reminders)})", _RULE]
|
||||||
|
if overdue:
|
||||||
|
L += ["", f"Overdue ({len(overdue)}):"] + [_line(r) for r in overdue]
|
||||||
|
if due_today:
|
||||||
|
L += ["", f"Due today ({len(due_today)}):"] + [_line(r) for r in due_today]
|
||||||
|
return L
|
||||||
|
|
||||||
|
|
||||||
|
def _compose_body(user_groups, investor_groups, narratives, since_iso, until_iso, due_reminders=None):
|
||||||
|
title_date = datetime.now().astimezone().strftime("%A, %b %d %Y")
|
||||||
|
window = f"{_fmt_local(since_iso)} – {_fmt_local(until_iso)}"
|
||||||
|
L = ["Ten31 CRM — Daily Activity Digest", title_date, f"Window: {window}", ""]
|
||||||
|
|
||||||
|
if not user_groups:
|
||||||
|
L.append("No tracked email activity from any user in this window.")
|
||||||
|
else:
|
||||||
|
total_emails = sum(g["total"] for g in user_groups)
|
||||||
|
total_invs = len({i for g in user_groups for i in g["investors"]})
|
||||||
|
L.append(f"{len(user_groups)} team member(s) active · {total_emails} email(s) "
|
||||||
|
f"· {total_invs} investor(s)")
|
||||||
|
|
||||||
|
# ── Section 1: by team member (who did what; per-user Spark narrative) ──
|
||||||
|
L += ["", _RULE, "BY TEAM MEMBER", _RULE]
|
||||||
|
for g in user_groups:
|
||||||
|
invs = ", ".join(g["investors"]) or "(no matched investor)"
|
||||||
|
L += ["",
|
||||||
|
f"{g['full_name'] or g['username']} · {g['account_email']}",
|
||||||
|
f"{g['total']} email(s) ({g['sent']} sent, {g['received']} received) "
|
||||||
|
f"· {invs}", "",
|
||||||
|
narratives.get(g["user_id"], ""), ""]
|
||||||
|
for em in g["emails"]:
|
||||||
|
arrow = "→ Sent" if em["direction"] == "sent" else "← Received"
|
||||||
|
invs_e = ", ".join(em["investors"]) or "(unmatched)"
|
||||||
|
subj = em.get("subject") or "(no subject)"
|
||||||
|
L.append(f" {arrow} · {invs_e} · \"{subj}\" ({_fmt_local(em['sent_at'])})")
|
||||||
|
|
||||||
|
# ── Section 2: by investor (team-wide; both directions, structured) ──
|
||||||
|
L += ["", _RULE, "BY INVESTOR", _RULE]
|
||||||
|
for inv in investor_groups:
|
||||||
|
L += ["",
|
||||||
|
f"{inv['name']} · {inv['total']} email(s) "
|
||||||
|
f"({inv['inbound']} in, {inv['outbound']} out)"]
|
||||||
|
for em in inv["emails"]:
|
||||||
|
subj = em.get("subject") or "(no subject)"
|
||||||
|
when = _fmt_local(em["sent_at"])
|
||||||
|
if em["direction"] == "out":
|
||||||
|
who = ", ".join(em["members"]) or "team"
|
||||||
|
L.append(f" → Sent by {who} · \"{subj}\" ({when})")
|
||||||
|
else:
|
||||||
|
L.append(f" ← Received · \"{subj}\" ({when})")
|
||||||
|
|
||||||
|
# ── Reminders due (current state — independent of the activity window) ──
|
||||||
|
L += _reminders_section(due_reminders or [])
|
||||||
|
|
||||||
|
L += ["", _RULE, _FOOTER]
|
||||||
|
return "\n".join(L)
|
||||||
|
|
||||||
|
|
||||||
|
def build_digest(conn, since_iso, until_iso, chat_fn=None):
|
||||||
|
"""Build the daily digest for [since_iso, until_iso). Returns
|
||||||
|
{subject, body, has_activity, user_count, email_count, investor_count,
|
||||||
|
reminder_count}. Always returns a body (empty windows get a 'no activity' note —
|
||||||
|
the team chose always-send). Sections: by team member (per-user Spark narrative),
|
||||||
|
by investor (structured), and reminders due (overdue + due today, current-state)."""
|
||||||
|
user_groups = collect_user_activity(conn, since_iso, until_iso)
|
||||||
|
investor_groups = collect_investor_activity(conn, since_iso, until_iso)
|
||||||
|
narratives = {g["user_id"]: summarize_user_day(g, chat_fn) for g in user_groups}
|
||||||
|
today_iso = datetime.now().astimezone().strftime("%Y-%m-%d")
|
||||||
|
due_reminders = collect_due_reminders(conn, today_iso)
|
||||||
|
body = _compose_body(user_groups, investor_groups, narratives, since_iso, until_iso, due_reminders)
|
||||||
|
stamp = datetime.now().astimezone().strftime("%b %d")
|
||||||
|
return {
|
||||||
|
"subject": f"Ten31 CRM — Daily Activity Digest · {stamp}",
|
||||||
|
"body": body,
|
||||||
|
"has_activity": bool(user_groups),
|
||||||
|
"user_count": len(user_groups),
|
||||||
|
"email_count": sum(g["total"] for g in user_groups),
|
||||||
|
"investor_count": len(investor_groups),
|
||||||
|
"reminder_count": len(due_reminders),
|
||||||
|
}
|
||||||
@@ -0,0 +1,69 @@
|
|||||||
|
"""Transport selection for CRM-originated email (daily digest, admin test sends).
|
||||||
|
|
||||||
|
Prefers Gmail-over-DWD — it reuses the service account that already powers email
|
||||||
|
capture (the grant includes gmail.compose, which can send), so there's no extra
|
||||||
|
credential to manage — and falls back to SMTP (`smtp_send`) when DWD isn't
|
||||||
|
available. One entry point so the digest and the admin test endpoint share the
|
||||||
|
same routing. Stdlib only.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class NoTransport(Exception):
|
||||||
|
"""Neither Gmail DWD nor SMTP is configured."""
|
||||||
|
|
||||||
|
|
||||||
|
def transport():
|
||||||
|
"""Return the active transport: 'gmail-dwd', 'smtp', or None."""
|
||||||
|
try:
|
||||||
|
from email_integration import gmail_send
|
||||||
|
if gmail_send.gmail_available():
|
||||||
|
return "gmail-dwd"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
import smtp_send
|
||||||
|
if smtp_send.smtp_configured():
|
||||||
|
return "smtp"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def default_sender(conn):
|
||||||
|
"""Domain user to send as for the DWD path. `CRM_DIGEST_SENDER` if set, else
|
||||||
|
the first active admin's email."""
|
||||||
|
s = os.environ.get("CRM_DIGEST_SENDER", "").strip()
|
||||||
|
if s:
|
||||||
|
return s
|
||||||
|
if conn is None:
|
||||||
|
return None
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT email FROM users WHERE role='admin' AND is_active=1 "
|
||||||
|
"AND email IS NOT NULL AND TRIM(email)!='' ORDER BY created_at LIMIT 1"
|
||||||
|
).fetchone()
|
||||||
|
return row["email"].strip() if row and row["email"] else None
|
||||||
|
|
||||||
|
|
||||||
|
def send_digest(conn, to_addrs, subject, body, sender_email=None):
|
||||||
|
"""Send via the active transport. Returns the transport's result dict with a
|
||||||
|
'transport' key added; raises NoTransport if neither is configured."""
|
||||||
|
t = transport()
|
||||||
|
if t == "gmail-dwd":
|
||||||
|
from email_integration import gmail_send
|
||||||
|
sender = sender_email or default_sender(conn)
|
||||||
|
if not sender:
|
||||||
|
# Gmail IS available but we have nobody to send as — a config gap, not a
|
||||||
|
# send failure. Surface it as NoTransport so the caller returns a clear 400.
|
||||||
|
raise NoTransport("Gmail is enabled but no sender address is set: "
|
||||||
|
"set CRM_DIGEST_SENDER or give an active admin an email.")
|
||||||
|
result = gmail_send.send_via_gmail(sender, to_addrs, subject, body, conn=conn)
|
||||||
|
result["transport"] = "gmail-dwd"
|
||||||
|
return result
|
||||||
|
if t == "smtp":
|
||||||
|
import smtp_send
|
||||||
|
result = smtp_send.send_email(to_addrs, subject, body)
|
||||||
|
result["transport"] = "smtp"
|
||||||
|
return result
|
||||||
|
raise NoTransport("No email transport configured: enable Gmail (DWD) or set "
|
||||||
|
"SMTP via the 'Configure Digest SMTP' action.")
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
"""Create a Gmail DRAFT (never send) in the sender's mailbox via domain-wide delegation
|
||||||
|
with the gmail.compose scope. Lets an approved Outreach draft land in the user's Gmail
|
||||||
|
(and therefore Superhuman) Drafts — as an in-thread reply when there's an active thread,
|
||||||
|
or a fresh email otherwise. The human reviews and sends from Gmail (guardrails #4, #6 —
|
||||||
|
our code only ever creates a draft, it never sends).
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import email.message
|
||||||
|
import json as _json
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from . import config as _cfg
|
||||||
|
from . import credentials as _creds
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_subject_body(draft_text):
|
||||||
|
"""Split a draft of the form 'Subject: ...\\n\\n<body>' into (subject, body)."""
|
||||||
|
text = (draft_text or "").strip()
|
||||||
|
lines = text.split("\n")
|
||||||
|
if lines and lines[0].lower().startswith("subject:"):
|
||||||
|
subject = lines[0].split(":", 1)[1].strip()
|
||||||
|
body = "\n".join(lines[1:]).lstrip("\n")
|
||||||
|
return subject, body
|
||||||
|
return "", text
|
||||||
|
|
||||||
|
|
||||||
|
def _reply_target(conn, investor_id):
|
||||||
|
"""LP address + active-thread headers for an in-thread reply, from the most recent
|
||||||
|
matched email with this investor. Returns {to, thread_id, in_reply_to} or None."""
|
||||||
|
try:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT e.rfc_message_id, e.gmail_thread_id, l.matched_address "
|
||||||
|
"FROM emails e JOIN email_investor_links l ON l.email_id = e.id "
|
||||||
|
"WHERE l.fundraising_investor_id = ? AND e.is_matched = 1 "
|
||||||
|
"ORDER BY e.sent_at DESC LIMIT 1", (investor_id,)).fetchone()
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
if not row or not row["matched_address"]:
|
||||||
|
return None
|
||||||
|
return {"to": row["matched_address"], "thread_id": row["gmail_thread_id"],
|
||||||
|
"in_reply_to": row["rfc_message_id"]}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_raw(from_addr, to_addr, subject, body, in_reply_to=None):
|
||||||
|
msg = email.message.EmailMessage()
|
||||||
|
msg["From"] = from_addr
|
||||||
|
msg["To"] = to_addr
|
||||||
|
msg["Subject"] = subject or "(no subject)"
|
||||||
|
if in_reply_to:
|
||||||
|
msg["In-Reply-To"] = in_reply_to
|
||||||
|
msg["References"] = in_reply_to
|
||||||
|
msg.set_content(body or "")
|
||||||
|
return base64.urlsafe_b64encode(msg.as_bytes()).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def create_outreach_draft(conn, sender_email, investor_id, draft_text):
|
||||||
|
"""Create a Gmail draft in `sender_email`'s mailbox addressed to the investor.
|
||||||
|
Returns {status, ...}. Never sends."""
|
||||||
|
if not sender_email:
|
||||||
|
return {"status": "no_sender"}
|
||||||
|
if not _cfg.CONFIG.enabled:
|
||||||
|
return {"status": "integration_disabled"}
|
||||||
|
subject, body = _parse_subject_body(draft_text)
|
||||||
|
if not body.strip():
|
||||||
|
return {"status": "empty"}
|
||||||
|
target = _reply_target(conn, investor_id)
|
||||||
|
if not target:
|
||||||
|
return {"status": "no_recipient"} # no email history -> no LP address to draft to
|
||||||
|
try:
|
||||||
|
provider = _creds.build_provider(lambda: conn)
|
||||||
|
token = provider.access_token_for(sender_email, _creds.GMAIL_COMPOSE_SCOPE).token
|
||||||
|
except Exception as exc:
|
||||||
|
return {"status": "auth_error", "reason": str(exc)}
|
||||||
|
raw = _build_raw(sender_email, target["to"], subject, body, target.get("in_reply_to"))
|
||||||
|
payload = {"message": {"raw": raw}}
|
||||||
|
if target.get("thread_id"):
|
||||||
|
payload["message"]["threadId"] = target["thread_id"]
|
||||||
|
url = f"https://gmail.googleapis.com/gmail/v1/users/{urllib.parse.quote(sender_email)}/drafts"
|
||||||
|
req = urllib.request.Request(url, data=_json.dumps(payload).encode("utf-8"), method="POST",
|
||||||
|
headers={"Authorization": f"Bearer {token}",
|
||||||
|
"Content-Type": "application/json"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||||
|
_json.loads(resp.read())
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
return {"status": "gmail_error", "reason": e.read().decode("utf-8", "replace")[:300]}
|
||||||
|
except Exception as exc:
|
||||||
|
return {"status": "gmail_error", "reason": str(exc)}
|
||||||
|
return {"status": "ok", "to": target["to"], "threaded": bool(target.get("thread_id")),
|
||||||
|
"gmail_url": "https://mail.google.com/mail/u/0/#drafts"}
|
||||||
@@ -32,6 +32,11 @@ from . import errors
|
|||||||
|
|
||||||
|
|
||||||
GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly"
|
GMAIL_READONLY_SCOPE = "https://www.googleapis.com/auth/gmail.readonly"
|
||||||
|
# Compose scope (authorized in Workspace DWD). Two consumers: outreach (compose.py)
|
||||||
|
# only CREATES drafts — the human sends from Gmail; the daily-digest mailer
|
||||||
|
# (gmail_send.py) uses this same scope to SEND, since gmail.compose authorizes
|
||||||
|
# users.messages.send. (The narrow gmail.send scope is NOT on the DWD grant.)
|
||||||
|
GMAIL_COMPOSE_SCOPE = "https://www.googleapis.com/auth/gmail.compose"
|
||||||
GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token"
|
GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token"
|
||||||
|
|
||||||
|
|
||||||
@@ -61,13 +66,14 @@ class DWDCredentialProvider:
|
|||||||
self._cache: dict[str, AccessToken] = {}
|
self._cache: dict[str, AccessToken] = {}
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
def access_token_for(self, email_address: str) -> AccessToken:
|
def access_token_for(self, email_address: str, scope: str = GMAIL_READONLY_SCOPE) -> AccessToken:
|
||||||
|
key = f"{email_address}|{scope}"
|
||||||
with self._lock:
|
with self._lock:
|
||||||
cached = self._cache.get(email_address)
|
cached = self._cache.get(key)
|
||||||
if cached and cached.expires_at - time.time() > 60:
|
if cached and cached.expires_at - time.time() > 60:
|
||||||
return cached
|
return cached
|
||||||
token = self._mint(email_address)
|
token = self._mint(email_address, scope)
|
||||||
self._cache[email_address] = token
|
self._cache[key] = token
|
||||||
return token
|
return token
|
||||||
|
|
||||||
def revoke(self, email_address: str) -> None:
|
def revoke(self, email_address: str) -> None:
|
||||||
@@ -78,7 +84,7 @@ class DWDCredentialProvider:
|
|||||||
|
|
||||||
# ------------------------------------------------------------------ helpers
|
# ------------------------------------------------------------------ helpers
|
||||||
|
|
||||||
def _mint(self, subject_email: str) -> AccessToken:
|
def _mint(self, subject_email: str, scope: str = GMAIL_READONLY_SCOPE) -> AccessToken:
|
||||||
try:
|
try:
|
||||||
from cryptography.hazmat.primitives import hashes, serialization # type: ignore
|
from cryptography.hazmat.primitives import hashes, serialization # type: ignore
|
||||||
from cryptography.hazmat.primitives.asymmetric import padding # type: ignore
|
from cryptography.hazmat.primitives.asymmetric import padding # type: ignore
|
||||||
@@ -92,7 +98,7 @@ class DWDCredentialProvider:
|
|||||||
claim = {
|
claim = {
|
||||||
"iss": self._client_email,
|
"iss": self._client_email,
|
||||||
"sub": subject_email,
|
"sub": subject_email,
|
||||||
"scope": GMAIL_READONLY_SCOPE,
|
"scope": scope,
|
||||||
"aud": GOOGLE_TOKEN_URL,
|
"aud": GOOGLE_TOKEN_URL,
|
||||||
"iat": now,
|
"iat": now,
|
||||||
"exp": now + 3600,
|
"exp": now + 3600,
|
||||||
|
|||||||
@@ -57,10 +57,20 @@ def _json(v) -> str:
|
|||||||
# ------------------------------------------------------------------ email_accounts
|
# ------------------------------------------------------------------ email_accounts
|
||||||
|
|
||||||
def list_sync_ready_accounts(conn: sqlite3.Connection) -> list[sqlite3.Row]:
|
def list_sync_ready_accounts(conn: sqlite3.Connection) -> list[sqlite3.Row]:
|
||||||
|
# Ready = healthy ('pending'/'active') + transient-failing ('retrying', retried every
|
||||||
|
# cycle for fast recovery) + 'error' accounts whose last attempt was over an hour ago.
|
||||||
|
# The hour-backoff on 'error' means a terminal failure (auth/permanent) self-heals once
|
||||||
|
# the operator fixes it WITHOUT hammering Google, and un-sticks any mailbox parked by the
|
||||||
|
# pre-v0.1.0:104 bug where one timeout dark-listed it forever. (last_synced_at is stamped
|
||||||
|
# on every attempt, success or fail, so it doubles as the last-attempt clock here.)
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"SELECT * FROM email_accounts "
|
"SELECT * FROM email_accounts "
|
||||||
"WHERE sync_enabled = 1 AND sync_status IN ('pending','active') "
|
"WHERE sync_enabled = 1 AND ("
|
||||||
|
" sync_status IN ('pending','active','retrying') "
|
||||||
|
" OR (sync_status = 'error' AND (last_synced_at IS NULL "
|
||||||
|
" OR last_synced_at < datetime('now','-1 hour')))"
|
||||||
|
") "
|
||||||
"ORDER BY last_synced_at IS NOT NULL, last_synced_at"
|
"ORDER BY last_synced_at IS NOT NULL, last_synced_at"
|
||||||
)
|
)
|
||||||
return cur.fetchall()
|
return cur.fetchall()
|
||||||
@@ -398,6 +408,275 @@ def start_sync_run(conn: sqlite3.Connection, *, account_id: str, kind: str) -> s
|
|||||||
return run_id
|
return run_id
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_entity(row) -> tuple:
|
||||||
|
"""Reduce one email_investor_links hydration row to a (key, name) identity for
|
||||||
|
the matched investor, with the same precedence the digest uses:
|
||||||
|
grid investor -> organization -> contact -> raw matched address. The key is
|
||||||
|
*typed* (`fund:`/`org:`/`contact:`/`addr:`) so the Communications filter can
|
||||||
|
target the right column. Soft-deleted org/contact rows arrive as NULL (filtered
|
||||||
|
in the join) and fall through to the next tier."""
|
||||||
|
if row["fund_id"] and (row["fund_name"] or "").strip():
|
||||||
|
return f"fund:{row['fund_id']}", row["fund_name"].strip()
|
||||||
|
if row["org_id"] and (row["org_name"] or "").strip():
|
||||||
|
return f"org:{row['org_id']}", row["org_name"].strip()
|
||||||
|
if row["contact_id"] and (row["contact_name"] or "").strip():
|
||||||
|
return f"contact:{row['contact_id']}", row["contact_name"].strip()
|
||||||
|
addr = (row["addr"] or "").strip()
|
||||||
|
return (f"addr:{addr.lower()}", addr) if addr else (None, None)
|
||||||
|
|
||||||
|
|
||||||
|
# Hydration of an email_investor_links row up to the resolvable investor identity,
|
||||||
|
# shared by the per-email tags and the facet dropdown. Soft-deleted org/contact
|
||||||
|
# rows are dropped in the join so they fall through to the next identity tier.
|
||||||
|
_LINK_IDENTITY_JOINS = """
|
||||||
|
LEFT JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id
|
||||||
|
LEFT JOIN fundraising_contacts fic ON fic.id = l.fundraising_contact_id
|
||||||
|
LEFT JOIN fundraising_investors fic_inv ON fic_inv.id = fic.investor_id
|
||||||
|
LEFT JOIN organizations o ON o.id = l.organization_id AND o.deleted_at IS NULL
|
||||||
|
LEFT JOIN contacts c ON c.id = l.contact_id AND c.deleted_at IS NULL
|
||||||
|
"""
|
||||||
|
_LINK_IDENTITY_COLS = """
|
||||||
|
l.matched_address AS addr,
|
||||||
|
COALESCE(fi.id, fic_inv.id) AS fund_id,
|
||||||
|
COALESCE(fi.investor_name, fic_inv.investor_name) AS fund_name,
|
||||||
|
COALESCE(fi.graveyard, fic_inv.graveyard) AS fund_graveyard,
|
||||||
|
o.id AS org_id, o.name AS org_name,
|
||||||
|
c.id AS contact_id,
|
||||||
|
NULLIF(TRIM(COALESCE(c.first_name,'') || ' ' || COALESCE(c.last_name,'')), '') AS contact_name
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def query_email_activity(conn: sqlite3.Connection, *, investor_id: Optional[str] = None,
|
||||||
|
account_id: Optional[str] = None, search: Optional[str] = None,
|
||||||
|
direction: Optional[str] = None, since: Optional[str] = None,
|
||||||
|
until: Optional[str] = None, limit: int = 100) -> dict:
|
||||||
|
"""Captured-Gmail activity for the admin Communications panel, filterable by
|
||||||
|
matched investor entity, mailbox, direction and date range, with free-text
|
||||||
|
search over subject/snippet/sender. Returns the email rows plus the filter facets.
|
||||||
|
|
||||||
|
Matched-only: the panel shows ONLY email that links to a known investor/contact
|
||||||
|
(an `email_investor_links` row exists). Unmatched cold/unknown-sender email is
|
||||||
|
still captured for completeness but is never surfaced here.
|
||||||
|
|
||||||
|
Investor identity is *typed* (`fund:`/`org:`/`contact:`/`addr:`) and resolved with
|
||||||
|
the digest's precedence (grid investor -> organization -> contact -> raw address),
|
||||||
|
so an email matched only to a classic contact or an org domain — not yet wired to a
|
||||||
|
grid investor — still shows a real name and is selectable in the dropdown, instead
|
||||||
|
of the facet coming back empty. `investor_id` accepts a typed key (a bare id is
|
||||||
|
treated as `fund:` for backward compatibility).
|
||||||
|
|
||||||
|
Soft-delete: an email is live only if it still has a non-tombstoned per-mailbox
|
||||||
|
sighting (`email_account_messages.deleted_at IS NULL`) — the `emails` row itself
|
||||||
|
carries no deleted_at, so deletion lives on the sighting. Direction is decided at
|
||||||
|
the email level (outbound if the sender is one of our mailboxes), mirroring the
|
||||||
|
digest builder, so a thread reads consistently regardless of which mailbox saw it.
|
||||||
|
"""
|
||||||
|
limit = max(1, min(int(limit or 100), 500))
|
||||||
|
cur = conn.cursor()
|
||||||
|
own = {(r["email_address"] or "").lower().strip()
|
||||||
|
for r in cur.execute("SELECT email_address FROM email_accounts")}
|
||||||
|
own.discard("")
|
||||||
|
|
||||||
|
where = ["EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||||
|
"WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)",
|
||||||
|
# Matched-only: surface email that links to a known investor/contact.
|
||||||
|
# Unmatched (unknown-sender) email is captured but never shown here.
|
||||||
|
"EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id)"]
|
||||||
|
params: list = []
|
||||||
|
if account_id:
|
||||||
|
where.append("EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||||
|
"WHERE eam.email_id = e.id AND eam.account_id = ? "
|
||||||
|
"AND eam.deleted_at IS NULL)")
|
||||||
|
params.append(account_id)
|
||||||
|
if investor_id:
|
||||||
|
kind, _, val = str(investor_id).partition(":")
|
||||||
|
if not val: # bare id (legacy) -> grid investor
|
||||||
|
kind, val = "fund", str(investor_id)
|
||||||
|
if kind == "fund":
|
||||||
|
where.append("EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id "
|
||||||
|
"AND (l.fundraising_investor_id = ? OR l.fundraising_contact_id IN "
|
||||||
|
"(SELECT id FROM fundraising_contacts WHERE investor_id = ?)))")
|
||||||
|
params.extend([val, val])
|
||||||
|
elif kind == "org":
|
||||||
|
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
|
||||||
|
"WHERE l.email_id = e.id AND l.organization_id = ?)")
|
||||||
|
params.append(val)
|
||||||
|
elif kind == "contact":
|
||||||
|
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
|
||||||
|
"WHERE l.email_id = e.id AND l.contact_id = ?)")
|
||||||
|
params.append(val)
|
||||||
|
elif kind == "addr":
|
||||||
|
where.append("EXISTS (SELECT 1 FROM email_investor_links l "
|
||||||
|
"WHERE l.email_id = e.id AND LOWER(l.matched_address) = ?)")
|
||||||
|
params.append(val.lower())
|
||||||
|
else:
|
||||||
|
# Unknown key prefix (malformed input) -> match nothing, never silently
|
||||||
|
# fall through to an unfiltered list.
|
||||||
|
where.append("1 = 0")
|
||||||
|
if search:
|
||||||
|
like = f"%{search.strip()}%"
|
||||||
|
where.append("(e.subject LIKE ? OR e.snippet LIKE ? "
|
||||||
|
"OR e.from_email LIKE ? OR e.from_name LIKE ?)")
|
||||||
|
params.extend([like, like, like, like])
|
||||||
|
# Date range over the send time (ISO-8601 strings sort lexically). [since, until).
|
||||||
|
if since:
|
||||||
|
where.append("e.sent_at >= ?")
|
||||||
|
params.append(since)
|
||||||
|
if until:
|
||||||
|
where.append("e.sent_at < ?")
|
||||||
|
params.append(until)
|
||||||
|
direction = (direction or "").strip().lower()
|
||||||
|
if direction in ("inbound", "outbound") and own:
|
||||||
|
marks = ",".join("?" for _ in own)
|
||||||
|
op = "IN" if direction == "outbound" else "NOT IN"
|
||||||
|
where.append(f"LOWER(e.from_email) {op} ({marks})")
|
||||||
|
params.extend(sorted(own))
|
||||||
|
|
||||||
|
sql = ("SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.snippet, "
|
||||||
|
"e.has_attachments, e.is_matched, e.match_status FROM emails e WHERE "
|
||||||
|
+ " AND ".join(where) + " ORDER BY e.sent_at DESC LIMIT ?")
|
||||||
|
rows = [dict(r) for r in cur.execute(sql, params + [limit + 1])]
|
||||||
|
truncated = len(rows) > limit
|
||||||
|
rows = rows[:limit]
|
||||||
|
by_id = {r["id"]: r for r in rows}
|
||||||
|
for r in rows:
|
||||||
|
r["direction"] = "outbound" if (r["from_email"] or "").lower().strip() in own else "inbound"
|
||||||
|
r["mailboxes"] = []
|
||||||
|
r["investors"] = [] # [{id: typed-key, name}] — resolved identities
|
||||||
|
|
||||||
|
ids = list(by_id)
|
||||||
|
if ids:
|
||||||
|
marks = ",".join("?" for _ in ids)
|
||||||
|
for s in cur.execute(
|
||||||
|
"SELECT eam.email_id AS eid, ea.email_address AS addr "
|
||||||
|
"FROM email_account_messages eam JOIN email_accounts ea ON ea.id = eam.account_id "
|
||||||
|
f"WHERE eam.deleted_at IS NULL AND eam.email_id IN ({marks}) "
|
||||||
|
"ORDER BY ea.email_address", ids):
|
||||||
|
mb = by_id[s["eid"]]["mailboxes"]
|
||||||
|
if s["addr"] and s["addr"] not in mb:
|
||||||
|
mb.append(s["addr"])
|
||||||
|
for lnk in cur.execute(
|
||||||
|
f"SELECT l.email_id AS eid, {_LINK_IDENTITY_COLS} "
|
||||||
|
f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
|
||||||
|
f"WHERE l.email_id IN ({marks})", ids):
|
||||||
|
# No graveyard filter here on purpose: a graveyarded investor's *email*
|
||||||
|
# still shows in the list with its chip (audit completeness, direct or
|
||||||
|
# via-contact); only the facet dropdown below hides graveyard from the picker.
|
||||||
|
key, name = _resolve_entity(lnk)
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
invs = by_id[lnk["eid"]]["investors"]
|
||||||
|
if not any(iv["id"] == key for iv in invs):
|
||||||
|
invs.append({"id": key, "name": name})
|
||||||
|
|
||||||
|
accounts = [dict(r) for r in cur.execute(
|
||||||
|
"SELECT id, email_address FROM email_accounts ORDER BY email_address")]
|
||||||
|
|
||||||
|
# Facet dropdown mirrors what the list resolves: one entry per distinct matched
|
||||||
|
# entity (grid investor / org / contact), across all live matched email — not just
|
||||||
|
# the current page — so the picker is stable under filtering. Excluded from the
|
||||||
|
# picker: graveyarded grid investors (CRM-wide convention) and raw-address-only
|
||||||
|
# matches (too many, too noisy). Both still appear in the list and remain findable
|
||||||
|
# by free-text search — this is an audit surface, so history is never hidden, only
|
||||||
|
# the picker is.
|
||||||
|
facet: dict[str, str] = {}
|
||||||
|
for r in cur.execute(
|
||||||
|
f"SELECT DISTINCT {_LINK_IDENTITY_COLS} "
|
||||||
|
f"FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
|
||||||
|
"WHERE EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||||
|
"WHERE eam.email_id = l.email_id AND eam.deleted_at IS NULL)"):
|
||||||
|
key, name = _resolve_entity(r)
|
||||||
|
if not key or key.startswith("addr:"):
|
||||||
|
continue
|
||||||
|
if key.startswith("fund:") and (r["fund_graveyard"] or 0):
|
||||||
|
continue
|
||||||
|
facet.setdefault(key, name)
|
||||||
|
investors = [{"id": k, "name": v}
|
||||||
|
for k, v in sorted(facet.items(), key=lambda kv: kv[1].lower())]
|
||||||
|
|
||||||
|
return {"emails": rows, "accounts": accounts, "investors": investors,
|
||||||
|
"count": len(rows), "truncated": truncated}
|
||||||
|
|
||||||
|
|
||||||
|
def search_hit_emails(conn: sqlite3.Connection, email_ids) -> dict:
|
||||||
|
"""Display fields for the given email ids that are still live (have a
|
||||||
|
non-tombstoned sighting), keyed by id, with email-level direction.
|
||||||
|
|
||||||
|
Used to hydrate + soft-delete-filter semantic-search hits: the Qdrant index can
|
||||||
|
lag a deletion, and SQLite is canonical (never trust the derived index), so a hit
|
||||||
|
whose email no longer has a live sighting is dropped here rather than shown."""
|
||||||
|
ids = [i for i in dict.fromkeys(email_ids) if i]
|
||||||
|
if not ids:
|
||||||
|
return {}
|
||||||
|
cur = conn.cursor()
|
||||||
|
own = {(r["email_address"] or "").lower().strip()
|
||||||
|
for r in cur.execute("SELECT email_address FROM email_accounts")}
|
||||||
|
own.discard("")
|
||||||
|
marks = ",".join("?" for _ in ids)
|
||||||
|
out: dict = {}
|
||||||
|
for e in cur.execute(
|
||||||
|
"SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.has_attachments "
|
||||||
|
f"FROM emails e WHERE e.id IN ({marks}) AND EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||||
|
"WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", ids):
|
||||||
|
d = dict(e)
|
||||||
|
d["direction"] = "outbound" if (d["from_email"] or "").lower().strip() in own else "inbound"
|
||||||
|
out[d["id"]] = d
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def query_email_detail(conn: sqlite3.Connection, email_id: str) -> Optional[dict]:
|
||||||
|
"""Full record for one captured email — the Communications detail view (full
|
||||||
|
body + recipients + matched investor identities + mailboxes + attachments).
|
||||||
|
|
||||||
|
Returns None if the email doesn't exist or has no live (non-tombstoned) sighting:
|
||||||
|
soft-delete lives on the per-mailbox `email_account_messages` row, not on `emails`,
|
||||||
|
so an email is only "live" while at least one sighting survives. Direction is set
|
||||||
|
at the email level (outbound if the sender is one of our mailboxes), matching the
|
||||||
|
list. The raw remote `body_html` is NOT returned (XSS); the response carries the
|
||||||
|
plain-text `body_text` plus a `has_html` flag so the UI can note an HTML-only email."""
|
||||||
|
cur = conn.cursor()
|
||||||
|
e = cur.execute(
|
||||||
|
"SELECT e.id, e.subject, e.from_name, e.from_email, e.sent_at, e.snippet, "
|
||||||
|
"e.body_text, e.body_html, e.has_attachments, e.match_status, e.gmail_thread_id "
|
||||||
|
"FROM emails e WHERE e.id = ? AND EXISTS (SELECT 1 FROM email_account_messages eam "
|
||||||
|
"WHERE eam.email_id = e.id AND eam.deleted_at IS NULL)", (email_id,)).fetchone()
|
||||||
|
if not e:
|
||||||
|
return None
|
||||||
|
row = dict(e)
|
||||||
|
# Don't ship the raw remote HTML to the client (XSS if any consumer ever renders
|
||||||
|
# it); the UI shows the plain-text body and only needs to know HTML exists.
|
||||||
|
row["has_html"] = bool((row.pop("body_html", None) or "").strip())
|
||||||
|
|
||||||
|
own = {(r["email_address"] or "").lower().strip()
|
||||||
|
for r in cur.execute("SELECT email_address FROM email_accounts")}
|
||||||
|
own.discard("")
|
||||||
|
row["direction"] = "outbound" if (row["from_email"] or "").lower().strip() in own else "inbound"
|
||||||
|
|
||||||
|
row["mailboxes"] = [r["addr"] for r in cur.execute(
|
||||||
|
"SELECT DISTINCT ea.email_address AS addr FROM email_account_messages eam "
|
||||||
|
"JOIN email_accounts ea ON ea.id = eam.account_id "
|
||||||
|
"WHERE eam.email_id = ? AND eam.deleted_at IS NULL ORDER BY ea.email_address", (email_id,))]
|
||||||
|
|
||||||
|
row["recipients"] = [dict(r) for r in cur.execute(
|
||||||
|
"SELECT address, display_name, kind FROM email_recipients "
|
||||||
|
"WHERE email_id = ? AND kind IN ('to','cc') "
|
||||||
|
"ORDER BY CASE kind WHEN 'to' THEN 0 ELSE 1 END, address", (email_id,))]
|
||||||
|
|
||||||
|
row["attachments"] = [dict(r) for r in cur.execute(
|
||||||
|
"SELECT filename, mime_type, size_bytes, download_status FROM email_attachments "
|
||||||
|
"WHERE email_id = ? ORDER BY filename", (email_id,))]
|
||||||
|
|
||||||
|
investors: dict[str, str] = {}
|
||||||
|
for lnk in cur.execute(
|
||||||
|
f"SELECT {_LINK_IDENTITY_COLS} FROM email_investor_links l {_LINK_IDENTITY_JOINS} "
|
||||||
|
"WHERE l.email_id = ?", (email_id,)):
|
||||||
|
key, name = _resolve_entity(lnk)
|
||||||
|
if key:
|
||||||
|
investors.setdefault(key, name)
|
||||||
|
row["investors"] = [{"id": k, "name": v} for k, v in investors.items()]
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
def finish_sync_run(conn: sqlite3.Connection, run_id: str, *, status: str,
|
def finish_sync_run(conn: sqlite3.Connection, run_id: str, *, status: str,
|
||||||
stats: Optional[dict] = None, error: Optional[str] = None) -> None:
|
stats: Optional[dict] = None, error: Optional[str] = None) -> None:
|
||||||
stats = stats or {}
|
stats = stats or {}
|
||||||
|
|||||||
@@ -0,0 +1,206 @@
|
|||||||
|
"""Daily activity-digest scheduler (Phase B).
|
||||||
|
|
||||||
|
Co-located with the Gmail sync scheduler (it shares the same conn-factory and
|
||||||
|
daemon-thread idiom). One daemon thread wakes every 60s and fires the daily
|
||||||
|
activity digest once per local day, at/after the configured send hour.
|
||||||
|
|
||||||
|
Control lives in the DB, set from Settings -> Admin (digest_builder.load_digest_policy
|
||||||
|
-> app_settings 'digest_policy'): {enabled, send_hour}. The thread always runs and
|
||||||
|
re-reads the policy each cycle, so toggling the digest on/off or changing the time
|
||||||
|
takes effect on the next loop — no restart. CRM_DIGEST_ENABLED/SEND_HOUR only seed
|
||||||
|
the first-boot default before an admin sets the policy.
|
||||||
|
|
||||||
|
The send is an internal ops email to the team's own admins — exempt from the
|
||||||
|
"agents draft, humans send" rule (which governs outward LP/prospect contact).
|
||||||
|
Digest content is summarized on Spark (local), never Claude — see digest_builder.
|
||||||
|
|
||||||
|
Window: the content covers (last successful send, now]. Tracked in app_settings
|
||||||
|
so a missed day's activity rolls into the next digest rather than being dropped;
|
||||||
|
the first-ever run covers the prior 24h. The once-per-day guard is a separate
|
||||||
|
local-date key. The transport (Gmail-DWD -> SMTP) is digest_mailer's job.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
import threading
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
|
from .scheduler import _conn_factory_from_env
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger("email_integration.digest_scheduler")
|
||||||
|
|
||||||
|
_LAST_DATE_KEY = "digest_last_sent_date" # local YYYY-MM-DD — once-per-day guard
|
||||||
|
_LAST_AT_KEY = "digest_last_sent_at" # UTC ISO — content-window cursor
|
||||||
|
|
||||||
|
_state: dict[str, object] = {"thread": None, "stop": threading.Event()}
|
||||||
|
|
||||||
|
|
||||||
|
def _utc_iso(dt: datetime) -> str:
|
||||||
|
return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
|
|
||||||
|
# app_settings access kept local (no server.py import — avoid the startup cycle);
|
||||||
|
# the value_json/JSON encoding matches server.get/set_app_setting exactly.
|
||||||
|
def _get_setting(conn, key):
|
||||||
|
try:
|
||||||
|
row = conn.execute("SELECT value_json FROM app_settings WHERE key = ?", (key,)).fetchone()
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
return None
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return json.loads(row["value_json"])
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _set_setting(conn, key, value) -> None:
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO app_settings (key, value_json, updated_at) VALUES (?, ?, ?) "
|
||||||
|
"ON CONFLICT(key) DO UPDATE SET value_json = excluded.value_json, "
|
||||||
|
"updated_at = excluded.updated_at",
|
||||||
|
(key, json.dumps(value), _utc_iso(datetime.now(timezone.utc))),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _admin_recipients(conn) -> list[str]:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT email FROM users WHERE role = 'admin' AND is_active = 1 "
|
||||||
|
"AND email IS NOT NULL AND TRIM(email) != ''"
|
||||||
|
).fetchall()
|
||||||
|
return [str(r["email"]).strip() for r in rows if str(r["email"]).strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def _build_and_send(conn, since_iso, until_iso, *, build_fn=None, send_fn=None):
|
||||||
|
"""Build the digest and hand it to the transport. Raises digest_mailer.NoTransport
|
||||||
|
(no transport / no recipient) — callers map that to a clear 400; the daily loop
|
||||||
|
logs it. build_fn/send_fn are injectable for tests."""
|
||||||
|
import digest_builder
|
||||||
|
import digest_mailer
|
||||||
|
|
||||||
|
bf = build_fn or digest_builder.build_digest
|
||||||
|
sf = send_fn or digest_mailer.send_digest
|
||||||
|
|
||||||
|
recipients = _admin_recipients(conn)
|
||||||
|
if not recipients:
|
||||||
|
raise digest_mailer.NoTransport(
|
||||||
|
"No active admin has an email address — give one an address to receive the digest.")
|
||||||
|
|
||||||
|
digest = bf(conn, since_iso, until_iso)
|
||||||
|
result = sf(conn, recipients, digest["subject"], digest["body"])
|
||||||
|
return {
|
||||||
|
"recipients": recipients,
|
||||||
|
"transport": (result or {}).get("transport"),
|
||||||
|
"has_activity": digest["has_activity"],
|
||||||
|
"user_count": digest["user_count"],
|
||||||
|
"email_count": digest["email_count"],
|
||||||
|
"investor_count": digest.get("investor_count"),
|
||||||
|
"window": [since_iso, until_iso],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def send_digest_window(conn_factory=None, *, since_iso, until_iso,
|
||||||
|
build_fn=None, send_fn=None):
|
||||||
|
"""Build the digest for an explicit (since_iso, until_iso] window and send it
|
||||||
|
to the active-admin set now, WITHOUT advancing the daily cursor — a manual or
|
||||||
|
preview send must never suppress the scheduled daily digest. Same transport +
|
||||||
|
recipient rules as the daily path (raises digest_mailer.NoTransport when none
|
||||||
|
is configured / no admin has an address). Backs the admin 'send now' endpoint.
|
||||||
|
|
||||||
|
No DB writes happen here (the cursor is deliberately untouched), so the connection
|
||||||
|
is opened and closed without a commit — don't add one without revisiting that."""
|
||||||
|
factory = conn_factory or _conn_factory_from_env()
|
||||||
|
conn = factory()
|
||||||
|
try:
|
||||||
|
result = _build_and_send(conn, since_iso, until_iso,
|
||||||
|
build_fn=build_fn, send_fn=send_fn)
|
||||||
|
return {"status": "sent", **result}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def maybe_send_digest(conn_factory=None, *, force=False,
|
||||||
|
now_local=None, now_utc=None, build_fn=None, send_fn=None):
|
||||||
|
"""Send the daily digest if it is due (or unconditionally when force=True).
|
||||||
|
|
||||||
|
Daily path (the scheduler loop): skips before the send hour and if already sent
|
||||||
|
today; content window runs from the last send to now and the cursor advances on
|
||||||
|
success. force path: ignores the policy and the guards, uses a fixed last-24h
|
||||||
|
window, and does NOT advance the daily cursor. (The admin 'send now' / preview
|
||||||
|
endpoints now use send_digest_window for an arbitrary window; force is retained
|
||||||
|
for the fixed last-24h case and its tests.)"""
|
||||||
|
import digest_builder
|
||||||
|
|
||||||
|
factory = conn_factory or _conn_factory_from_env()
|
||||||
|
conn = factory()
|
||||||
|
try:
|
||||||
|
policy = digest_builder.load_digest_policy(conn)
|
||||||
|
if not force and not policy["enabled"]:
|
||||||
|
return {"status": "disabled"}
|
||||||
|
|
||||||
|
nl = now_local or datetime.now()
|
||||||
|
nu = now_utc or datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
if not force:
|
||||||
|
today = nl.strftime("%Y-%m-%d")
|
||||||
|
if nl.hour < policy["send_hour"]:
|
||||||
|
return {"status": "before_send_hour", "send_hour": policy["send_hour"]}
|
||||||
|
if _get_setting(conn, _LAST_DATE_KEY) == today:
|
||||||
|
return {"status": "already_sent_today"}
|
||||||
|
|
||||||
|
until_iso = _utc_iso(nu)
|
||||||
|
last_at = None if force else _get_setting(conn, _LAST_AT_KEY)
|
||||||
|
since_iso = last_at or _utc_iso(nu - timedelta(hours=24))
|
||||||
|
|
||||||
|
result = _build_and_send(conn, since_iso, until_iso, build_fn=build_fn, send_fn=send_fn)
|
||||||
|
|
||||||
|
if not force:
|
||||||
|
_set_setting(conn, _LAST_DATE_KEY, nl.strftime("%Y-%m-%d"))
|
||||||
|
_set_setting(conn, _LAST_AT_KEY, until_iso)
|
||||||
|
conn.commit()
|
||||||
|
return {"status": "sent", **result}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def start_digest_scheduler(conn_factory=None) -> None:
|
||||||
|
"""Start the daily digest loop (idempotent). The thread always runs and reads
|
||||||
|
the DB policy each cycle (admin-panel control), so it sends only when the policy
|
||||||
|
is enabled — no env gate, no restart needed to toggle."""
|
||||||
|
if _state["thread"] is not None:
|
||||||
|
return
|
||||||
|
|
||||||
|
factory = conn_factory or _conn_factory_from_env()
|
||||||
|
stop = threading.Event()
|
||||||
|
_state["stop"] = stop
|
||||||
|
|
||||||
|
def _loop():
|
||||||
|
log.info("digest scheduler started (policy-controlled via Settings -> Admin)")
|
||||||
|
if stop.wait(15): # let server finish startup
|
||||||
|
return
|
||||||
|
while not stop.is_set():
|
||||||
|
try:
|
||||||
|
res = maybe_send_digest(factory)
|
||||||
|
if res.get("status") == "sent":
|
||||||
|
log.info("daily digest sent: %s", res)
|
||||||
|
except Exception:
|
||||||
|
log.exception("digest send failed; will retry next cycle")
|
||||||
|
if stop.wait(60):
|
||||||
|
return
|
||||||
|
|
||||||
|
t = threading.Thread(target=_loop, name="digest", daemon=True)
|
||||||
|
t.start()
|
||||||
|
_state["thread"] = t
|
||||||
|
|
||||||
|
|
||||||
|
def stop_digest_scheduler() -> None:
|
||||||
|
ev: threading.Event = _state["stop"] # type: ignore
|
||||||
|
ev.set()
|
||||||
|
t = _state.get("thread")
|
||||||
|
if t:
|
||||||
|
try:
|
||||||
|
t.join(timeout=5) # type: ignore
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
_state["thread"] = None
|
||||||
@@ -0,0 +1,69 @@
|
|||||||
|
"""Send an email via the Gmail API using the same domain-wide delegation that
|
||||||
|
powers capture and draft creation.
|
||||||
|
|
||||||
|
The DWD grant on this deployment includes the `gmail.compose` scope (verified
|
||||||
|
2026-06-15: token mint + a live messages.send both succeed), and `gmail.compose`
|
||||||
|
authorizes `users.messages.send`. So CRM-originated mail (the daily digest) can
|
||||||
|
send through the existing service account — no SMTP account, no app password, no
|
||||||
|
admin change. Sends impersonating `sender_email`, which must be a Workspace user
|
||||||
|
in the delegated domain. Mirrors the REST pattern in compose.py; stdlib only.
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import email.message
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from . import config as _cfg
|
||||||
|
from . import credentials as _creds
|
||||||
|
|
||||||
|
|
||||||
|
def gmail_available():
|
||||||
|
"""True when DWD send is usable: integration enabled, DWD auth, key present."""
|
||||||
|
cfg = _cfg.CONFIG
|
||||||
|
if not cfg.enabled or cfg.primary_auth != "dwd":
|
||||||
|
return False
|
||||||
|
return bool(cfg.dwd_key_path) and os.path.exists(cfg.dwd_key_path)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_raw(from_addr, to_addrs, subject, body):
|
||||||
|
msg = email.message.EmailMessage()
|
||||||
|
msg["From"] = from_addr
|
||||||
|
msg["To"] = ", ".join(to_addrs)
|
||||||
|
msg["Subject"] = subject or "(no subject)"
|
||||||
|
msg.set_content(body or "")
|
||||||
|
return base64.urlsafe_b64encode(msg.as_bytes()).decode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def send_via_gmail(sender_email, to_addrs, subject, body, conn=None):
|
||||||
|
"""Send one message as `sender_email` to `to_addrs` via the Gmail API (DWD).
|
||||||
|
Returns {'sent_to', 'from', 'message_id'}; raises on failure."""
|
||||||
|
if isinstance(to_addrs, str):
|
||||||
|
to_addrs = [to_addrs]
|
||||||
|
to_addrs = [a for a in (str(x).strip() for x in to_addrs) if a]
|
||||||
|
if not to_addrs:
|
||||||
|
raise ValueError("no recipients")
|
||||||
|
if not sender_email:
|
||||||
|
raise ValueError("no sender_email (DWD impersonation needs a domain user)")
|
||||||
|
|
||||||
|
# conn is only consulted by the OAuth provider path; the DWD provider (the one
|
||||||
|
# used here) reads the service-account key from disk and ignores it.
|
||||||
|
provider = _creds.build_provider(lambda: conn)
|
||||||
|
token = provider.access_token_for(sender_email, _creds.GMAIL_COMPOSE_SCOPE).token
|
||||||
|
raw = _build_raw(sender_email, to_addrs, subject, body)
|
||||||
|
url = ("https://gmail.googleapis.com/gmail/v1/users/"
|
||||||
|
f"{urllib.parse.quote(sender_email)}/messages/send")
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url, data=json.dumps({"raw": raw}).encode("utf-8"), method="POST",
|
||||||
|
headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"})
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||||
|
result = json.loads(resp.read())
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
detail = e.read().decode("utf-8", "replace")[:300]
|
||||||
|
raise RuntimeError(f"Gmail API send failed for {sender_email} (HTTP {e.code}): {detail}")
|
||||||
|
except OSError as e: # URLError/timeout/DNS (URLError subclasses OSError)
|
||||||
|
raise RuntimeError(f"Gmail API unreachable: {e}")
|
||||||
|
return {"sent_to": to_addrs, "from": sender_email, "message_id": result.get("id")}
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
-- ============================================================================
|
||||||
|
-- email_proposal_matrix — Matrix-review state for an email_activity_proposal,
|
||||||
|
-- kept 1:1 with the proposal (proposal_id PK). The CRM runs on the box and has
|
||||||
|
-- no matrix-nio, so it cannot post to Matrix itself: the intake bot (on the Spark)
|
||||||
|
-- PULLS pending proposals, posts a review card to the dedicated Matrix review room,
|
||||||
|
-- and writes the thread-root event_id back here. Persisting it CRM-side (not just in
|
||||||
|
-- the bot's memory) keeps both surfaces in sync and survives a bot restart.
|
||||||
|
--
|
||||||
|
-- A SIDE TABLE rather than new columns on email_activity_proposals because the
|
||||||
|
-- email-integration migration runner (email_integration/db.py:apply_migrations)
|
||||||
|
-- re-runs every .sql file on every boot via executescript with no ledger — so
|
||||||
|
-- CREATE TABLE IF NOT EXISTS is idempotent, whereas ALTER ... ADD COLUMN would throw
|
||||||
|
-- "duplicate column" on the second boot and abort startup. Reversal: DROP TABLE
|
||||||
|
-- (this runner has no .down.sql convention; cf. 0001/0002).
|
||||||
|
--
|
||||||
|
-- posted_at — set once the bot has posted the review card (event_id = thread root).
|
||||||
|
-- closed_at — set when the thread is resolved: either the bot decided in-thread, OR
|
||||||
|
-- the bot announced a web-side decision. A posted+decided proposal with
|
||||||
|
-- closed_at NULL is exactly the bot's signal to post "decided on the web"
|
||||||
|
-- into the thread and then close it.
|
||||||
|
-- ============================================================================
|
||||||
|
CREATE TABLE IF NOT EXISTS email_proposal_matrix (
|
||||||
|
proposal_id TEXT PRIMARY KEY,
|
||||||
|
event_id TEXT, -- Matrix thread-root event id of the posted review card
|
||||||
|
posted_at TEXT,
|
||||||
|
closed_at TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
FOREIGN KEY(proposal_id) REFERENCES email_activity_proposals(id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_email_proposal_matrix_event ON email_proposal_matrix(event_id);
|
||||||
@@ -33,6 +33,9 @@ from . import scheduler as _sched
|
|||||||
_GET_ROUTES = {
|
_GET_ROUTES = {
|
||||||
"/api/email/status": "status",
|
"/api/email/status": "status",
|
||||||
"/api/email/accounts": "list_accounts",
|
"/api/email/accounts": "list_accounts",
|
||||||
|
"/api/email/activity": "activity",
|
||||||
|
"/api/email/detail": "detail",
|
||||||
|
"/api/email/search": "search",
|
||||||
"/api/email/threads": "list_threads",
|
"/api/email/threads": "list_threads",
|
||||||
"/api/email/oauth/start": "oauth_start",
|
"/api/email/oauth/start": "oauth_start",
|
||||||
"/api/email/oauth/callback": "oauth_callback",
|
"/api/email/oauth/callback": "oauth_callback",
|
||||||
@@ -115,7 +118,9 @@ def _require_admin(handler) -> Optional[dict]:
|
|||||||
# ---------------------------------------------------------------------------- GET handlers
|
# ---------------------------------------------------------------------------- GET handlers
|
||||||
|
|
||||||
def _h_status(handler):
|
def _h_status(handler):
|
||||||
user = _require_auth(handler)
|
# Email Capture is an admin-only surface (nav-hidden from members); these read
|
||||||
|
# endpoints expose mailbox/sync metadata, so enforce admin server-side too.
|
||||||
|
user = _require_admin(handler)
|
||||||
if not user:
|
if not user:
|
||||||
return
|
return
|
||||||
snap = _sched.status_snapshot()
|
snap = _sched.status_snapshot()
|
||||||
@@ -150,7 +155,9 @@ def _h_status(handler):
|
|||||||
|
|
||||||
|
|
||||||
def _h_list_accounts(handler):
|
def _h_list_accounts(handler):
|
||||||
user = _require_auth(handler)
|
# Admin-only: the mailbox list (addresses, sync state, errors) belongs to the
|
||||||
|
# admin-only Email Capture surface. Enforced server-side, not just nav-hidden.
|
||||||
|
user = _require_admin(handler)
|
||||||
if not user:
|
if not user:
|
||||||
return
|
return
|
||||||
conn = _conn()
|
conn = _conn()
|
||||||
@@ -162,14 +169,147 @@ def _h_list_accounts(handler):
|
|||||||
"FROM email_accounts ORDER BY email_address"
|
"FROM email_accounts ORDER BY email_address"
|
||||||
)
|
)
|
||||||
rows = [dict(r) for r in cur.fetchall()]
|
rows = [dict(r) for r in cur.fetchall()]
|
||||||
|
# Per-mailbox counts: emails are de-duplicated globally, so "captured per
|
||||||
|
# mailbox" comes from the per-account sighting table; "matched" joins to emails.
|
||||||
|
captured, matched = {}, {}
|
||||||
|
try:
|
||||||
|
captured = {r["account_id"]: r["n"] for r in cur.execute(
|
||||||
|
"SELECT account_id, COUNT(*) AS n FROM email_account_messages "
|
||||||
|
"WHERE deleted_at IS NULL GROUP BY account_id")}
|
||||||
|
matched = {r["account_id"]: r["n"] for r in cur.execute(
|
||||||
|
"SELECT eam.account_id AS account_id, COUNT(*) AS n FROM email_account_messages eam "
|
||||||
|
"JOIN emails e ON e.id = eam.email_id "
|
||||||
|
"WHERE eam.deleted_at IS NULL AND e.is_matched = 1 GROUP BY eam.account_id")}
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
for r in rows:
|
||||||
|
r["captured"] = captured.get(r["id"], 0)
|
||||||
|
r["matched"] = matched.get(r["id"], 0)
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
# Non-admins only see their own row
|
|
||||||
if user.get("role") != "admin":
|
|
||||||
rows = [r for r in rows if r["user_id"] == user["user_id"]]
|
|
||||||
handler.send_json({"accounts": rows})
|
handler.send_json({"accounts": rows})
|
||||||
|
|
||||||
|
|
||||||
|
def _h_activity(handler):
|
||||||
|
# Admin-only: the Communications page renders captured-Gmail activity (the classic
|
||||||
|
# manual-log surface was retired). Mailbox/investor substance is admin-scoped, so
|
||||||
|
# enforce admin server-side, not just nav-hide.
|
||||||
|
user = _require_admin(handler)
|
||||||
|
if not user:
|
||||||
|
return
|
||||||
|
q = handler.get_query_params()
|
||||||
|
try:
|
||||||
|
limit = int(q.get("limit", 100))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
limit = 100
|
||||||
|
conn = _conn()
|
||||||
|
try:
|
||||||
|
result = _db.query_email_activity(
|
||||||
|
conn,
|
||||||
|
investor_id=(q.get("investor_id") or "").strip() or None,
|
||||||
|
account_id=(q.get("account_id") or "").strip() or None,
|
||||||
|
search=(q.get("q") or q.get("search") or "").strip() or None,
|
||||||
|
direction=(q.get("direction") or "").strip() or None,
|
||||||
|
since=(q.get("since") or "").strip() or None,
|
||||||
|
until=(q.get("until") or "").strip() or None,
|
||||||
|
limit=limit,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
handler.send_json(result)
|
||||||
|
|
||||||
|
|
||||||
|
def _h_detail(handler):
|
||||||
|
# Admin-only: the full body + recipients of a captured email is admin-scoped
|
||||||
|
# substance, same as the activity list it expands from.
|
||||||
|
user = _require_admin(handler)
|
||||||
|
if not user:
|
||||||
|
return
|
||||||
|
email_id = (handler.get_query_params().get("id") or "").strip()
|
||||||
|
if not email_id:
|
||||||
|
return handler.send_error_json("id required", 400)
|
||||||
|
conn = _conn()
|
||||||
|
try:
|
||||||
|
detail = _db.query_email_detail(conn, email_id)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
if detail is None:
|
||||||
|
return handler.send_error_json("Not found", 404)
|
||||||
|
handler.send_json(detail)
|
||||||
|
|
||||||
|
|
||||||
|
def _semantic_email_search(query: str, top_k: int) -> list:
|
||||||
|
"""Hybrid (dense + BM25, reranked) retrieval over the email bodies indexed in
|
||||||
|
Qdrant, pre-filtered to doc_type='email'. Returns raw ranked hits (payload carries
|
||||||
|
source_id=email_id, lp_name, date_ts, text). The ingest stack (Spark Control +
|
||||||
|
Qdrant + the sparse encoder) lives in the Docker image, so it's imported lazily —
|
||||||
|
a bare CRM without it raises, and the caller maps that to a 503."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
ingest_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest")
|
||||||
|
if ingest_dir not in sys.path:
|
||||||
|
sys.path.insert(0, ingest_dir)
|
||||||
|
import search as _ingest_search # ingest/search.py
|
||||||
|
filt = {"must": [{"key": "doc_type", "match": {"value": "email"}}]}
|
||||||
|
return _ingest_search.hybrid_search(query, top_k=top_k, rerank=True, filt=filt)
|
||||||
|
|
||||||
|
|
||||||
|
def _h_search(handler):
|
||||||
|
# Admin-only semantic search over captured email *content* (bodies), distinct from
|
||||||
|
# the structured subject/sender filters in _h_activity. Matched email bodies are the
|
||||||
|
# only email indexed in Qdrant (see ingest/chunking). Soft-delete-filtered + hydrated
|
||||||
|
# against SQLite (canonical) so a deleted email never surfaces from the stale index.
|
||||||
|
user = _require_admin(handler)
|
||||||
|
if not user:
|
||||||
|
return
|
||||||
|
q = handler.get_query_params()
|
||||||
|
query = (q.get("q") or q.get("query") or "").strip()
|
||||||
|
if not query:
|
||||||
|
return handler.send_json({"query": "", "results": []})
|
||||||
|
try:
|
||||||
|
top_k = min(50, max(1, int(q.get("top_k", 25))))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
top_k = 25
|
||||||
|
|
||||||
|
try:
|
||||||
|
hits = _semantic_email_search(query, top_k)
|
||||||
|
except Exception as e:
|
||||||
|
# Spark Control / Qdrant unreachable, or the ingest stack isn't installed.
|
||||||
|
# Log server-side (an error can carry a URL/host); give the UI a clean 503.
|
||||||
|
import sys
|
||||||
|
print(f"[email-search] retrieval failed: {type(e).__name__}: {e}", file=sys.stderr)
|
||||||
|
return handler.send_error_json("Content search is unavailable (Spark/Qdrant not reachable).", 503)
|
||||||
|
|
||||||
|
# Hydrate + soft-delete-filter against SQLite (canonical), preserving rank order.
|
||||||
|
payloads = [(h.get("payload", {}) or {}, h) for h in hits]
|
||||||
|
ids = [p.get("source_id") for p, _ in payloads]
|
||||||
|
conn = _conn()
|
||||||
|
try:
|
||||||
|
live = _db.search_hit_emails(conn, ids)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for p, h in payloads:
|
||||||
|
eid = p.get("source_id")
|
||||||
|
e = live.get(eid)
|
||||||
|
if not e:
|
||||||
|
continue # deleted since indexing, or not matched-resolvable -> drop
|
||||||
|
results.append({
|
||||||
|
"email_id": eid,
|
||||||
|
"subject": e["subject"],
|
||||||
|
"from_name": e["from_name"],
|
||||||
|
"from_email": e["from_email"],
|
||||||
|
"sent_at": e["sent_at"],
|
||||||
|
"direction": e["direction"],
|
||||||
|
"has_attachments": e["has_attachments"],
|
||||||
|
"lp_name": p.get("lp_name"),
|
||||||
|
"score": h.get("score"),
|
||||||
|
"excerpt": (h.get("text") or p.get("text") or "").replace("\n", " ").strip()[:300],
|
||||||
|
})
|
||||||
|
handler.send_json({"query": query, "results": results, "count": len(results)})
|
||||||
|
|
||||||
|
|
||||||
def _h_list_threads(handler):
|
def _h_list_threads(handler):
|
||||||
user = _require_auth(handler)
|
user = _require_auth(handler)
|
||||||
if not user:
|
if not user:
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import logging
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from urllib.error import URLError
|
||||||
|
|
||||||
from . import attachments as _attach
|
from . import attachments as _attach
|
||||||
from . import config as _cfg
|
from . import config as _cfg
|
||||||
@@ -112,6 +113,15 @@ def sync_account(conn_factory, credential_provider, account,
|
|||||||
error_str = "history expired; fallback to date backfill"
|
error_str = "history expired; fallback to date backfill"
|
||||||
status = "partial"
|
status = "partial"
|
||||||
_fallback_date_backfill(conn_factory, client, account, index, run_stats)
|
_fallback_date_backfill(conn_factory, client, account, index, run_stats)
|
||||||
|
except (_errors.RateLimitError, _errors.TransientError, URLError, TimeoutError) as e:
|
||||||
|
# A network / 5xx / rate-limit error that outlived the in-pass retry loop.
|
||||||
|
# This is TRANSIENT, not terminal: park it as 'retrying' (which the scheduler
|
||||||
|
# still picks up every cycle) instead of 'error' (which it excludes). Fixes the
|
||||||
|
# v<=0.1.0:103 bug where a single timeout dark-listed a mailbox until a manual
|
||||||
|
# kick. Terminal causes (auth, permanent, unexpected) still fall through to 'error'.
|
||||||
|
error_str = f"transient: {type(e).__name__}: {e}"
|
||||||
|
status = "retrying"
|
||||||
|
log.warning("transient error during sync of %s: %s", email_addr, e)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_str = f"unexpected: {type(e).__name__}: {e}"
|
error_str = f"unexpected: {type(e).__name__}: {e}"
|
||||||
status = "error"
|
status = "error"
|
||||||
|
|||||||
@@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test the Gmail-draft message construction (the part that doesn't need live Gmail):
|
||||||
|
subject/body parsing, reply-target resolution, and the RFC822 build incl. threading
|
||||||
|
headers. The actual drafts.create call is exercised on the box. Synthetic data only.
|
||||||
|
Run: cd backend && python3 email_integration/test_compose.py
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from email_integration import compose as cp # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
s, b = cp._parse_subject_body("Subject: Following up\n\nHi Sarah,\n\nthanks for the call.")
|
||||||
|
check(s == "Following up" and b.startswith("Hi Sarah"), "parses 'Subject:' line + body")
|
||||||
|
s2, b2 = cp._parse_subject_body("No subject prefix here")
|
||||||
|
check(s2 == "" and b2 == "No subject prefix here", "no subject line -> empty subject, full body")
|
||||||
|
|
||||||
|
c = sqlite3.connect(":memory:")
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
c.executescript("""
|
||||||
|
CREATE TABLE emails(id TEXT, rfc_message_id TEXT, gmail_thread_id TEXT, sent_at TEXT, is_matched INT);
|
||||||
|
CREATE TABLE email_investor_links(email_id TEXT, fundraising_investor_id TEXT, matched_address TEXT);
|
||||||
|
""")
|
||||||
|
c.execute("INSERT INTO emails VALUES('e1','<m1@x>','t1','2026-06-01',1)")
|
||||||
|
c.execute("INSERT INTO email_investor_links VALUES('e1','inv1','lp@harborvine.example')")
|
||||||
|
c.commit()
|
||||||
|
t = cp._reply_target(c, "inv1")
|
||||||
|
check(t and t["to"] == "lp@harborvine.example" and t["thread_id"] == "t1" and t["in_reply_to"] == "<m1@x>",
|
||||||
|
"reply target resolves LP address + thread + in-reply-to")
|
||||||
|
check(cp._reply_target(c, "nope") is None, "no history -> no reply target")
|
||||||
|
|
||||||
|
raw = cp._build_raw("grant@ten31.xyz", "lp@x.example", "Hi", "Body text here", "<m1@x>")
|
||||||
|
dec = base64.urlsafe_b64decode(raw).decode("utf-8", "replace")
|
||||||
|
check("From: grant@ten31.xyz" in dec and "To: lp@x.example" in dec, "RFC822 has From + To")
|
||||||
|
check("Subject: Hi" in dec and "Body text here" in dec, "RFC822 has Subject + body")
|
||||||
|
check("In-Reply-To: <m1@x>" in dec and "References: <m1@x>" in dec, "threading headers set for replies")
|
||||||
|
raw2 = cp._build_raw("a@b.co", "c@d.co", "", "body", None)
|
||||||
|
dec2 = base64.urlsafe_b64decode(raw2).decode("utf-8", "replace")
|
||||||
|
check("Subject: (no subject)" in dec2 and "In-Reply-To" not in dec2, "no subject / no thread -> fresh email")
|
||||||
|
|
||||||
|
if FAILS:
|
||||||
|
print(f"\nFAILED ({len(FAILS)})")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("\nALL PASS (gmail compose message construction)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,335 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test the admin-only email-activity panel (Communications tab, v0.1.0:80).
|
||||||
|
|
||||||
|
Covers the pure query (`db.query_email_activity`): matched-only scope (unmatched
|
||||||
|
cold/unknown-sender email is never surfaced), investor/mailbox/search/direction/
|
||||||
|
date-range filters, per-sighting soft-delete, direction at the email level, mailbox
|
||||||
|
roll-ups, and the *typed* investor facet (grid investor / org / contact), including
|
||||||
|
the v83 fix where an email matched only to a classic contact or org domain — not yet
|
||||||
|
wired to a grid investor — still resolves to a real name and appears in the dropdown
|
||||||
|
(previously the facet came back empty). Also asserts the route handler enforces admin
|
||||||
|
server-side. Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 email_integration/test_email_activity_panel.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from email_integration import db as _db # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def make_db():
|
||||||
|
conn = sqlite3.connect(":memory:")
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.executescript("""
|
||||||
|
CREATE TABLE email_accounts (id TEXT PRIMARY KEY, email_address TEXT);
|
||||||
|
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, from_name TEXT, from_email TEXT,
|
||||||
|
sent_at TEXT, snippet TEXT, body_text TEXT, body_html TEXT, gmail_thread_id TEXT,
|
||||||
|
has_attachments INT DEFAULT 0, is_matched INT DEFAULT 0,
|
||||||
|
match_status TEXT DEFAULT 'unmatched');
|
||||||
|
CREATE TABLE email_account_messages (id TEXT PRIMARY KEY, email_id TEXT, account_id TEXT,
|
||||||
|
is_sent INT DEFAULT 0, deleted_at TEXT);
|
||||||
|
CREATE TABLE email_recipients (id TEXT PRIMARY KEY, email_id TEXT, address TEXT,
|
||||||
|
display_name TEXT, kind TEXT);
|
||||||
|
CREATE TABLE email_attachments (id TEXT PRIMARY KEY, email_id TEXT, filename TEXT,
|
||||||
|
mime_type TEXT, size_bytes INTEGER, download_status TEXT);
|
||||||
|
CREATE TABLE email_investor_links (id TEXT PRIMARY KEY, email_id TEXT,
|
||||||
|
fundraising_investor_id TEXT, fundraising_contact_id TEXT,
|
||||||
|
organization_id TEXT, contact_id TEXT, matched_address TEXT);
|
||||||
|
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, graveyard INTEGER DEFAULT 0);
|
||||||
|
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, investor_id TEXT, full_name TEXT);
|
||||||
|
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, deleted_at TEXT);
|
||||||
|
CREATE TABLE contacts (id TEXT PRIMARY KEY, first_name TEXT, last_name TEXT,
|
||||||
|
organization_id TEXT, deleted_at TEXT);
|
||||||
|
""")
|
||||||
|
# Two mailboxes (us); investors reached different ways: a grid investor directly,
|
||||||
|
# a grid investor only via a contact link, a graveyarded grid investor, an org-only
|
||||||
|
# (domain) match, and a classic-contact-only match (the case that left the dropdown
|
||||||
|
# empty before v83 — neither carries a grid id).
|
||||||
|
conn.executemany("INSERT INTO email_accounts VALUES (?,?)", [
|
||||||
|
("acc-grant", "grant@ten31.xyz"),
|
||||||
|
("acc-jon", "jonathan@ten31.xyz"),
|
||||||
|
])
|
||||||
|
conn.executemany("INSERT INTO fundraising_investors VALUES (?,?,?)", [
|
||||||
|
("inv-harbor", "Harbor & Vine", 0),
|
||||||
|
("inv-pacific", "Pacific Capital", 0),
|
||||||
|
("inv-dead", "Dead Deal LP", 1),
|
||||||
|
])
|
||||||
|
conn.execute("INSERT INTO fundraising_contacts VALUES ('fc-1','inv-pacific','Sarah Williams')")
|
||||||
|
conn.execute("INSERT INTO organizations VALUES ('org-bridge','Bridgewater',NULL)")
|
||||||
|
conn.execute("INSERT INTO contacts VALUES ('c-solo','Nina','Park',NULL,NULL)")
|
||||||
|
# Emails:
|
||||||
|
# e1 outbound -> Harbor (grid), seen by grant
|
||||||
|
# e2 inbound -> Harbor (grid), seen by grant + jonathan
|
||||||
|
# e3 inbound -> Pacific via grid contact link, seen by jonathan
|
||||||
|
# e4 inbound, UNMATCHED -> excluded (matched-only)
|
||||||
|
# e5 inbound, only sighting tombstoned -> excluded
|
||||||
|
# e6 inbound -> Dead Deal LP (graveyard grid investor)
|
||||||
|
# e7 inbound -> Bridgewater via ORG-domain match (no grid id)
|
||||||
|
# e8 inbound -> Nina Park via CLASSIC-contact match (no grid id, no org)
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO emails (id,subject,from_name,from_email,sent_at,snippet,has_attachments,is_matched,match_status) VALUES (?,?,?,?,?,?,?,?,?)",
|
||||||
|
[
|
||||||
|
("e1", "Fund III update", "Grant", "grant@ten31.xyz", "2026-06-05T10:00:00", "here is the deck", 1, 1, "matched"),
|
||||||
|
("e2", "Re: Fund III update", "LP Harbor", "lp@harborvine.example", "2026-06-06T09:00:00", "thanks, one question", 0, 1, "matched"),
|
||||||
|
("e3", "Intro", "Sarah Williams", "sarah@pacificcap.example", "2026-06-07T08:00:00", "would love to chat", 0, 1, "matched"),
|
||||||
|
("e4", "Cold inbound", "Random", "noreply@spam.example", "2026-06-08T08:00:00", "buy now", 0, 0, "unmatched"),
|
||||||
|
("e5", "Deleted thread", "Ghost", "ghost@x.example", "2026-06-09T08:00:00", "gone", 0, 1, "matched"),
|
||||||
|
("e6", "Old dead-deal thread", "Dead LP", "lp@deaddeal.example", "2026-06-01T00:00:00", "we passed", 0, 1, "matched"),
|
||||||
|
("e7", "Macro view", "Ray", "ray@bridgewater.example", "2026-06-10T08:00:00", "rates outlook", 0, 1, "matched"),
|
||||||
|
("e8", "Coffee?", "Nina Park", "nina@solo.example", "2026-06-11T08:00:00", "in town next week", 0, 1, "matched"),
|
||||||
|
])
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO email_account_messages (id,email_id,account_id,is_sent,deleted_at) VALUES (?,?,?,?,?)",
|
||||||
|
[
|
||||||
|
("m1", "e1", "acc-grant", 1, None),
|
||||||
|
("m2", "e2", "acc-grant", 0, None),
|
||||||
|
("m3", "e2", "acc-jon", 0, None),
|
||||||
|
("m4", "e3", "acc-jon", 0, None),
|
||||||
|
("m5", "e4", "acc-grant", 0, None),
|
||||||
|
("m6", "e5", "acc-grant", 0, "2026-06-10T00:00:00"), # tombstoned
|
||||||
|
("m7", "e6", "acc-grant", 0, None),
|
||||||
|
("m8", "e7", "acc-grant", 0, None),
|
||||||
|
("m9", "e8", "acc-jon", 0, None),
|
||||||
|
])
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,fundraising_contact_id,organization_id,contact_id,matched_address) VALUES (?,?,?,?,?,?,?)",
|
||||||
|
[
|
||||||
|
("l1", "e1", "inv-harbor", None, None, None, "lp@harborvine.example"),
|
||||||
|
("l2", "e2", "inv-harbor", None, None, None, "lp@harborvine.example"),
|
||||||
|
("l3", "e3", None, "fc-1", None, None, "sarah@pacificcap.example"),
|
||||||
|
("l5", "e5", "inv-harbor", None, None, None, "lp@harborvine.example"),
|
||||||
|
("l6", "e6", "inv-dead", None, None, None, "lp@deaddeal.example"),
|
||||||
|
("l7", "e7", None, None, "org-bridge", None, "ray@bridgewater.example"),
|
||||||
|
("l8", "e8", None, None, None, "c-solo", "nina@solo.example"),
|
||||||
|
])
|
||||||
|
# Full body + recipients + an attachment on e2, for the detail view.
|
||||||
|
conn.execute("UPDATE emails SET body_text = ?, gmail_thread_id = ?, has_attachments = 1 WHERE id = 'e2'",
|
||||||
|
("Thanks for the deck — one question on the carry.", "thr-harbor"))
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO email_recipients (id,email_id,address,display_name,kind) VALUES (?,?,?,?,?)",
|
||||||
|
[
|
||||||
|
("r1", "e2", "grant@ten31.xyz", "Grant", "to"),
|
||||||
|
("r2", "e2", "jonathan@ten31.xyz", "Jonathan", "cc"),
|
||||||
|
("r3", "e2", "lp@harborvine.example", "LP Harbor", "from"), # from -> not surfaced
|
||||||
|
])
|
||||||
|
conn.execute("INSERT INTO email_attachments (id,email_id,filename,mime_type,size_bytes,download_status) "
|
||||||
|
"VALUES ('a1','e2','term_sheet.pdf','application/pdf',20480,'downloaded')")
|
||||||
|
conn.commit()
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def ids(res):
|
||||||
|
return [e["id"] for e in res["emails"]]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
conn = make_db()
|
||||||
|
|
||||||
|
# --- baseline: matched live emails only, newest first, tombstoned excluded ---
|
||||||
|
res = _db.query_email_activity(conn)
|
||||||
|
check(ids(res) == ["e8", "e7", "e3", "e2", "e1", "e6"],
|
||||||
|
f"matched live emails newest-first; e5 (tombstoned) + e4 (unmatched) excluded; got {ids(res)}")
|
||||||
|
check(res["count"] == 6 and res["truncated"] is False, "count + not truncated")
|
||||||
|
check("e4" not in ids(res), "unmatched email (no investor link) never surfaces in the panel")
|
||||||
|
|
||||||
|
# --- direction at the email level ---
|
||||||
|
e1 = next(e for e in res["emails"] if e["id"] == "e1")
|
||||||
|
e2 = next(e for e in res["emails"] if e["id"] == "e2")
|
||||||
|
check(e1["direction"] == "outbound", "e1 from our mailbox -> outbound")
|
||||||
|
check(e2["direction"] == "inbound", "e2 from LP -> inbound")
|
||||||
|
check(_db.query_email_activity(conn, direction="outbound")["emails"][0]["id"] == "e1"
|
||||||
|
and len(_db.query_email_activity(conn, direction="outbound")["emails"]) == 1,
|
||||||
|
"direction=outbound returns only e1")
|
||||||
|
check(ids(_db.query_email_activity(conn, direction="inbound")) == ["e8", "e7", "e3", "e2", "e6"],
|
||||||
|
"direction=inbound excludes the outbound e1 (and unmatched e4)")
|
||||||
|
|
||||||
|
# --- mailbox roll-up + per-account filter ---
|
||||||
|
check(set(e2["mailboxes"]) == {"grant@ten31.xyz", "jonathan@ten31.xyz"}, "e2 seen by both mailboxes")
|
||||||
|
check(ids(_db.query_email_activity(conn, account_id="acc-jon")) == ["e8", "e3", "e2"],
|
||||||
|
"account_id=acc-jon returns only emails that mailbox saw")
|
||||||
|
|
||||||
|
# --- date-range filter [since, until) over sent_at ---
|
||||||
|
check(ids(_db.query_email_activity(conn, since="2026-06-07T00:00:00", until="2026-06-11T00:00:00")) == ["e7", "e3"],
|
||||||
|
"date range [06-07, 06-11) -> e7,e3 (excludes 06-11 e8 and earlier e2/e1/e6)")
|
||||||
|
check(ids(_db.query_email_activity(conn, since="2026-06-10T00:00:00")) == ["e8", "e7"],
|
||||||
|
"since=06-10 -> e8,e7 only")
|
||||||
|
|
||||||
|
# --- investor filter: typed keys + legacy bare-id back-compat ---
|
||||||
|
check(set(ids(_db.query_email_activity(conn, investor_id="fund:inv-harbor"))) == {"e2", "e1"},
|
||||||
|
"investor_id=fund:inv-harbor -> e1,e2")
|
||||||
|
check(set(ids(_db.query_email_activity(conn, investor_id="inv-harbor"))) == {"e2", "e1"},
|
||||||
|
"legacy bare id treated as fund: -> e1,e2")
|
||||||
|
check(ids(_db.query_email_activity(conn, investor_id="fund:inv-pacific")) == ["e3"],
|
||||||
|
"fund:inv-pacific resolved through fundraising_contacts -> e3")
|
||||||
|
check(ids(_db.query_email_activity(conn, investor_id="org:org-bridge")) == ["e7"],
|
||||||
|
"org:org-bridge -> e7 (org-domain match)")
|
||||||
|
check(ids(_db.query_email_activity(conn, investor_id="contact:c-solo")) == ["e8"],
|
||||||
|
"contact:c-solo -> e8 (classic-contact match)")
|
||||||
|
check(_db.query_email_activity(conn, investor_id="bogus:x")["emails"] == [],
|
||||||
|
"unknown investor_id key prefix -> match nothing (never silently unfiltered)")
|
||||||
|
|
||||||
|
# --- investor identity roll-up, typed + resolved name ---
|
||||||
|
check(e1["investors"] == [{"id": "fund:inv-harbor", "name": "Harbor & Vine"}], "e1 grid investor resolved")
|
||||||
|
e3 = next(e for e in res["emails"] if e["id"] == "e3")
|
||||||
|
check(e3["investors"] == [{"id": "fund:inv-pacific", "name": "Pacific Capital"}], "e3 resolved via grid contact")
|
||||||
|
e7 = next(e for e in res["emails"] if e["id"] == "e7")
|
||||||
|
check(e7["investors"] == [{"id": "org:org-bridge", "name": "Bridgewater"}],
|
||||||
|
"e7 org-domain match resolves to the org name (not a raw address)")
|
||||||
|
e8 = next(e for e in res["emails"] if e["id"] == "e8")
|
||||||
|
check(e8["investors"] == [{"id": "contact:c-solo", "name": "Nina Park"}],
|
||||||
|
"e8 classic-contact match resolves to the contact name")
|
||||||
|
|
||||||
|
# --- free-text search over subject / snippet / sender ---
|
||||||
|
check(set(ids(_db.query_email_activity(conn, search="Fund III"))) == {"e1", "e2"}, "search subject")
|
||||||
|
check(ids(_db.query_email_activity(conn, search="pacificcap")) == ["e3"], "search sender address")
|
||||||
|
check(ids(_db.query_email_activity(conn, search="deck")) == ["e1"], "search snippet (matched email)")
|
||||||
|
check(ids(_db.query_email_activity(conn, search="buy now")) == [],
|
||||||
|
"unmatched email never surfaces, even by free-text search")
|
||||||
|
|
||||||
|
# --- facets: typed entries spanning grid / org / contact matches ---
|
||||||
|
check([a["email_address"] for a in res["accounts"]] == ["grant@ten31.xyz", "jonathan@ten31.xyz"],
|
||||||
|
"accounts facet sorted")
|
||||||
|
facet_inv = {i["id"] for i in res["investors"]}
|
||||||
|
check(facet_inv == {"fund:inv-harbor", "fund:inv-pacific", "org:org-bridge", "contact:c-solo"},
|
||||||
|
f"investor facet now mirrors the list (grid + org + contact), not just grid; got {facet_inv}")
|
||||||
|
check([i["name"] for i in res["investors"]] == sorted(i["name"] for i in res["investors"]),
|
||||||
|
"facet sorted by display name")
|
||||||
|
|
||||||
|
# --- graveyard: hidden from the picker, but its email stays visible + findable ---
|
||||||
|
check("fund:inv-dead" not in facet_inv, "graveyard investor excluded from the facet dropdown")
|
||||||
|
check("e6" in ids(res), "graveyard investor's email still shows in the unfiltered list (audit completeness)")
|
||||||
|
e6 = next(e for e in res["emails"] if e["id"] == "e6")
|
||||||
|
check(e6["investors"] == [{"id": "fund:inv-dead", "name": "Dead Deal LP"}], "graveyard email still shows its investor chip")
|
||||||
|
check(ids(_db.query_email_activity(conn, investor_id="fund:inv-dead")) == ["e6"],
|
||||||
|
"explicit investor_id filter still works for a graveyard investor")
|
||||||
|
check(ids(_db.query_email_activity(conn, search="deaddeal")) == ["e6"],
|
||||||
|
"graveyard email remains findable by free-text search")
|
||||||
|
|
||||||
|
# --- truncation ---
|
||||||
|
tr = _db.query_email_activity(conn, limit=2)
|
||||||
|
check(tr["count"] == 2 and tr["truncated"] is True, "limit=2 -> truncated")
|
||||||
|
|
||||||
|
# --- detail view (full body + recipients + attachments + identity) ---
|
||||||
|
d = _db.query_email_detail(conn, "e2")
|
||||||
|
check(d is not None and d["body_text"] == "Thanks for the deck — one question on the carry.",
|
||||||
|
"detail returns the full body")
|
||||||
|
check(d["direction"] == "inbound" and set(d["mailboxes"]) == {"grant@ten31.xyz", "jonathan@ten31.xyz"},
|
||||||
|
"detail direction + mailboxes")
|
||||||
|
check([(r["address"], r["kind"]) for r in d["recipients"]] ==
|
||||||
|
[("grant@ten31.xyz", "to"), ("jonathan@ten31.xyz", "cc")],
|
||||||
|
"detail recipients = to/cc only (from is excluded)")
|
||||||
|
check([a["filename"] for a in d["attachments"]] == ["term_sheet.pdf"], "detail lists attachments")
|
||||||
|
check(d["investors"] == [{"id": "fund:inv-harbor", "name": "Harbor & Vine"}], "detail resolves investor identity")
|
||||||
|
check(_db.query_email_detail(conn, "e5") is None,
|
||||||
|
"detail of a tombstoned-only email -> None (soft-delete on the sighting)")
|
||||||
|
check(_db.query_email_detail(conn, "nope") is None, "detail of a missing id -> None")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# --- route enforces admin server-side ---
|
||||||
|
test_route_admin_only()
|
||||||
|
# --- semantic content-search route (hydrate + soft-delete + 503) ---
|
||||||
|
test_search_route()
|
||||||
|
|
||||||
|
if FAILS:
|
||||||
|
print(f"\nFAILED ({len(FAILS)})")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("\nALL PASS (email-activity panel)")
|
||||||
|
|
||||||
|
|
||||||
|
class FakeHandler:
|
||||||
|
def __init__(self, user, params=None):
|
||||||
|
self._user = user
|
||||||
|
self._params = params or {}
|
||||||
|
self.json = None
|
||||||
|
self.err = None
|
||||||
|
self.code = None
|
||||||
|
|
||||||
|
def get_user(self):
|
||||||
|
return self._user
|
||||||
|
|
||||||
|
def get_query_params(self):
|
||||||
|
return self._params
|
||||||
|
|
||||||
|
def send_json(self, obj):
|
||||||
|
self.json = obj
|
||||||
|
|
||||||
|
def send_error_json(self, msg, code):
|
||||||
|
self.err = msg
|
||||||
|
self.code = code
|
||||||
|
|
||||||
|
|
||||||
|
def test_route_admin_only():
|
||||||
|
try:
|
||||||
|
from email_integration import routes
|
||||||
|
except Exception as e: # pragma: no cover - optional deps missing in some dev envs
|
||||||
|
print(f" SKIP route admin test (routes import failed: {e})")
|
||||||
|
return
|
||||||
|
|
||||||
|
h = FakeHandler(None)
|
||||||
|
routes._h_activity(h)
|
||||||
|
check(h.code == 401 and h.json is None, "route: no user -> 401")
|
||||||
|
|
||||||
|
h = FakeHandler({"role": "member", "user_id": "u1"})
|
||||||
|
routes._h_activity(h)
|
||||||
|
check(h.code == 403 and h.json is None, "route: member -> 403 (admin enforced server-side)")
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_route():
|
||||||
|
try:
|
||||||
|
from email_integration import routes
|
||||||
|
except Exception as e: # pragma: no cover
|
||||||
|
print(f" SKIP search route test (routes import failed: {e})")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Hydration source = a fresh fully-populated in-memory DB each call (the handler
|
||||||
|
# opens + closes its own conn). Retrieval is stubbed — no Spark/Qdrant in tests.
|
||||||
|
routes._conn = make_db
|
||||||
|
routes._semantic_email_search = lambda query, top_k: [
|
||||||
|
{"score": 0.91, "text": "carry discussion\nand terms", "payload": {"source_id": "e2", "lp_name": "Harbor & Vine"}},
|
||||||
|
{"score": 0.80, "text": "gone", "payload": {"source_id": "e5", "lp_name": "Ghost"}}, # tombstoned -> drop
|
||||||
|
{"score": 0.70, "text": "n/a", "payload": {"source_id": "missing", "lp_name": "Nobody"}}, # missing -> drop
|
||||||
|
]
|
||||||
|
h = FakeHandler({"role": "admin"}, {"q": "carry"})
|
||||||
|
routes._h_search(h)
|
||||||
|
check(h.json and [r["email_id"] for r in h.json["results"]] == ["e2"],
|
||||||
|
f"content search drops tombstoned + missing, keeps live e2; got {h.json and [r['email_id'] for r in h.json['results']]}")
|
||||||
|
top = h.json["results"][0]
|
||||||
|
check(top["lp_name"] == "Harbor & Vine" and top["score"] == 0.91 and top["subject"] == "Re: Fund III update",
|
||||||
|
"hit carries lp_name + score + hydrated subject")
|
||||||
|
check("\n" not in top["excerpt"], "excerpt is newline-flattened")
|
||||||
|
|
||||||
|
# empty query short-circuits (no retrieval call)
|
||||||
|
h = FakeHandler({"role": "admin"}, {"q": ""})
|
||||||
|
routes._h_search(h)
|
||||||
|
check(h.json == {"query": "", "results": []}, "empty query -> empty results")
|
||||||
|
|
||||||
|
# retrieval failure -> clean 503 (Spark/Qdrant down)
|
||||||
|
def _boom(query, top_k):
|
||||||
|
raise RuntimeError("spark down")
|
||||||
|
routes._semantic_email_search = _boom
|
||||||
|
h = FakeHandler({"role": "admin"}, {"q": "x"})
|
||||||
|
routes._h_search(h)
|
||||||
|
check(h.code == 503, f"retrieval failure -> 503, got {h.code}")
|
||||||
|
|
||||||
|
# admin enforced
|
||||||
|
h = FakeHandler({"role": "member"}, {"q": "x"})
|
||||||
|
routes._h_search(h)
|
||||||
|
check(h.code == 403, "content search admin-enforced server-side")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Regression test for list_sync_ready_accounts (v0.1.0:104).
|
||||||
|
|
||||||
|
Guards the Bug-B fix: a transient network timeout used to flip a mailbox to terminal
|
||||||
|
sync_status='error', which the old `IN ('pending','active')` filter excluded forever —
|
||||||
|
so a single blip dark-listed a mailbox until a manual kick. The new filter:
|
||||||
|
* always includes 'pending' / 'active' / 'retrying' (the transient-retry state), and
|
||||||
|
* re-includes 'error' accounts whose last attempt was over an hour ago (gentle backoff,
|
||||||
|
so a fixed credential self-heals and the pre-fix stuck mailboxes recover on deploy).
|
||||||
|
Synthetic data only (guardrail #9).
|
||||||
|
Run: cd backend && python3 email_integration/test_sync_ready.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
from email_integration import db as edb # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def make_account(conn, email, status, *, enabled=1, last_synced_sql="NULL"):
|
||||||
|
aid = edb.upsert_account(conn, user_id="u-" + email,
|
||||||
|
email_address=email, auth_method="dwd")
|
||||||
|
conn.execute(
|
||||||
|
f"UPDATE email_accounts SET sync_status=?, sync_enabled=?, "
|
||||||
|
f"last_synced_at={last_synced_sql} WHERE id=?",
|
||||||
|
(status, enabled, aid),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return aid
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
conn = sqlite3.connect(":memory:")
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
edb.apply_migrations(conn.cursor())
|
||||||
|
|
||||||
|
make_account(conn, "active@t.xyz", "active", last_synced_sql="datetime('now','-5 minutes')")
|
||||||
|
make_account(conn, "retrying@t.xyz", "retrying", last_synced_sql="datetime('now','-5 minutes')")
|
||||||
|
make_account(conn, "pending@t.xyz", "pending", last_synced_sql="NULL")
|
||||||
|
make_account(conn, "error_stale@t.xyz", "error", last_synced_sql="datetime('now','-2 hours')")
|
||||||
|
make_account(conn, "error_recent@t.xyz", "error", last_synced_sql="datetime('now','-5 minutes')")
|
||||||
|
make_account(conn, "error_neversync@t.xyz", "error", last_synced_sql="NULL")
|
||||||
|
make_account(conn, "disabled@t.xyz", "active", enabled=0, last_synced_sql="datetime('now','-5 minutes')")
|
||||||
|
|
||||||
|
ready = {r["email_address"] for r in edb.list_sync_ready_accounts(conn)}
|
||||||
|
|
||||||
|
check("active@t.xyz" in ready, "healthy 'active' is ready")
|
||||||
|
check("retrying@t.xyz" in ready, "transient 'retrying' is ready (fast retry)")
|
||||||
|
check("pending@t.xyz" in ready, "'pending' is ready")
|
||||||
|
check("error_stale@t.xyz" in ready, "'error' last attempted >1h ago is ready (backoff elapsed → recovers stuck mailbox)")
|
||||||
|
check("error_neversync@t.xyz" in ready, "'error' never synced (NULL last attempt) is ready")
|
||||||
|
check("error_recent@t.xyz" not in ready, "'error' attempted <1h ago is NOT ready (gentle backoff, no hammering)")
|
||||||
|
check("disabled@t.xyz" not in ready, "sync_enabled=0 is never ready")
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"{len(FAILS)} FAILED")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (sync ready filter)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -4,7 +4,7 @@ Maps each CRM record type to one or more chunks per docs/EMBEDDINGS.md:
|
|||||||
* one chunk per communications row (doc_type = the comm type)
|
* one chunk per communications row (doc_type = the comm type)
|
||||||
* one chunk per MATCHED email (doc_type = email; body only when matched)
|
* one chunk per MATCHED email (doc_type = email; body only when matched)
|
||||||
* one chunk per fundraising_investors notes LINE (the outreach log; split per line)
|
* one chunk per fundraising_investors notes LINE (the outreach log; split per line)
|
||||||
* one chunk each for free-text fields: contacts.notes, lp_profiles.notes,
|
* one chunk each for free-text fields: contacts.notes,
|
||||||
opportunities (description + next_step), organizations.description
|
opportunities (description + next_step), organizations.description
|
||||||
|
|
||||||
Each chunk carries a canonical `lp_id` (resolved via entity_links) and a `date_ts`
|
Each chunk carries a canonical `lp_id` (resolved via entity_links) and a `date_ts`
|
||||||
@@ -104,13 +104,6 @@ def build_chunks(conn):
|
|||||||
chunks.append(_mk(f"contacts.notes:{r['id']}", lp, lp_name, person,
|
chunks.append(_mk(f"contacts.notes:{r['id']}", lp, lp_name, person,
|
||||||
"contact_note", to_epoch(r["updated_at"]), r["notes"], "contacts", r["id"]))
|
"contact_note", to_epoch(r["updated_at"]), r["notes"], "contacts", r["id"]))
|
||||||
|
|
||||||
# lp_profiles.notes
|
|
||||||
for r in conn.execute("""SELECT lp.id, lp.contact_id, lp.notes, lp.updated_at
|
|
||||||
FROM lp_profiles lp WHERE lp.notes IS NOT NULL AND lp.notes <> '' AND lp.deleted_at IS NULL"""):
|
|
||||||
lp, lp_name, person = _contact_lp(r["contact_id"], person_canon, org_canon, name, contact_org)
|
|
||||||
chunks.append(_mk(f"lp_profiles.notes:{r['id']}", lp, lp_name, person,
|
|
||||||
"lp_note", to_epoch(r["updated_at"]), r["notes"], "lp_profiles", r["id"]))
|
|
||||||
|
|
||||||
# opportunities (description + next_step)
|
# opportunities (description + next_step)
|
||||||
for r in conn.execute("""SELECT id, contact_id, name, description, next_step, updated_at
|
for r in conn.execute("""SELECT id, contact_id, name, description, next_step, updated_at
|
||||||
FROM opportunities WHERE deleted_at IS NULL"""):
|
FROM opportunities WHERE deleted_at IS NULL"""):
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ layer created by migration 0001:
|
|||||||
fundraising_investors ─┴─► canonical_entities (entity_kind = lp | organization)
|
fundraising_investors ─┴─► canonical_entities (entity_kind = lp | organization)
|
||||||
contacts ─┐
|
contacts ─┐
|
||||||
fundraising_contacts ─┴─► canonical_entities (entity_kind = person)
|
fundraising_contacts ─┴─► canonical_entities (entity_kind = person)
|
||||||
lp_profiles ───► linked to its contact's person entity
|
|
||||||
|
|
||||||
Every source row is recorded in `entity_links` so any name variant resolves to
|
Every source row is recorded in `entity_links` so any name variant resolves to
|
||||||
one canonical id. This is the DETERMINISTIC tier — it merges only what we can
|
one canonical id. This is the DETERMINISTIC tier — it merges only what we can
|
||||||
@@ -184,7 +183,7 @@ def resolve_people(conn, org_canon_by_orgid, org_canon_by_fundinv, merge_map=Non
|
|||||||
people — each is matched to a contact-person and recorded only as a member_of
|
people — each is matched to a contact-person and recorded only as a member_of
|
||||||
edge to its investor entity (the grid's 'Contacts' column says who belongs to
|
edge to its investor entity (the grid's 'Contacts' column says who belongs to
|
||||||
which investor). This is what stops the double-count.
|
which investor). This is what stops the double-count.
|
||||||
Returns contact_id -> person canonical id (for lp_profiles)."""
|
Returns contact_id -> person canonical id."""
|
||||||
merge_map = merge_map or {}
|
merge_map = merge_map or {}
|
||||||
contact_to_person = {}
|
contact_to_person = {}
|
||||||
person_meta = {}
|
person_meta = {}
|
||||||
@@ -245,12 +244,6 @@ def resolve_people(conn, org_canon_by_orgid, org_canon_by_fundinv, merge_map=Non
|
|||||||
_link(conn, cid, "fundraising_contacts", r["id"], email or name_norm, mk, 0.95 if mk == "grid_link" else 0.9)
|
_link(conn, cid, "fundraising_contacts", r["id"], email or name_norm, mk, 0.95 if mk == "grid_link" else 0.9)
|
||||||
_member_of(conn, cid, inv_canon)
|
_member_of(conn, cid, inv_canon)
|
||||||
|
|
||||||
# lp_profiles -> the person entity of its contact
|
|
||||||
for r in conn.execute("SELECT id, contact_id FROM lp_profiles WHERE deleted_at IS NULL"):
|
|
||||||
cid = contact_to_person.get(r["contact_id"])
|
|
||||||
if cid:
|
|
||||||
_link(conn, cid, "lp_profiles", r["id"], r["contact_id"], "contact_fk", 1.0)
|
|
||||||
|
|
||||||
return person_meta
|
return person_meta
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -26,6 +26,31 @@ def chat(prompt, system=None, max_tokens=200, temperature=0.0):
|
|||||||
return (data["choices"][0]["message"].get("content") or "").strip()
|
return (data["choices"][0]["message"].get("content") or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def chat_vision(prompt, image_b64, mime="image/jpeg", system=None, max_tokens=600, temperature=0.0):
|
||||||
|
"""Multimodal chat: a text prompt + one base64 image to the local VL model via Spark Control.
|
||||||
|
|
||||||
|
Same endpoint and model as chat() — the daily-driver Qwen is vision-capable (capabilities
|
||||||
|
[vision, reasoning]); the only difference is the user message's `content` is the OpenAI
|
||||||
|
multimodal array (a text part + an image_url data-URI), which Spark Control forwards to vLLM
|
||||||
|
unchanged (it's a dumb passthrough). The server downscales to its max_pixels cap, so a
|
||||||
|
full-res phone photo is fine. Thinking stays off for fast, literal output."""
|
||||||
|
messages = []
|
||||||
|
if system:
|
||||||
|
messages.append({"role": "system", "content": system})
|
||||||
|
messages.append({"role": "user", "content": [
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{image_b64}"}},
|
||||||
|
]})
|
||||||
|
body = {"model": config.CHAT_MODEL, "messages": messages,
|
||||||
|
"temperature": temperature, "max_tokens": max_tokens,
|
||||||
|
"chat_template_kwargs": {"enable_thinking": False}}
|
||||||
|
status, data = http_util.request("POST", f"{config.SPARK_CONTROL_URL}/v1/chat/completions",
|
||||||
|
body, verify=config.SPARK_VERIFY_TLS)
|
||||||
|
if status != 200:
|
||||||
|
raise RuntimeError(f"/v1/chat/completions (vision) -> {status}: {data}")
|
||||||
|
return (data["choices"][0]["message"].get("content") or "").strip()
|
||||||
|
|
||||||
|
|
||||||
def chat_json(prompt, system=None, max_tokens=200):
|
def chat_json(prompt, system=None, max_tokens=200):
|
||||||
"""Chat and parse the first JSON object from the reply (tolerant of fences)."""
|
"""Chat and parse the first JSON object from the reply (tolerant of fences)."""
|
||||||
raw = chat(prompt, system=system, max_tokens=max_tokens)
|
raw = chat(prompt, system=system, max_tokens=max_tokens)
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ import entity_resolution as er
|
|||||||
import qdrant_io
|
import qdrant_io
|
||||||
|
|
||||||
_CHANGE_TABLES = [("communications", "communications"), ("contacts", "contacts"),
|
_CHANGE_TABLES = [("communications", "communications"), ("contacts", "contacts"),
|
||||||
("lp_profiles", "lp_profiles"), ("opportunities", "opportunities"),
|
("opportunities", "opportunities"),
|
||||||
("organizations", "organizations"), ("fundraising_investors", "fundraising_investors")]
|
("organizations", "organizations"), ("fundraising_investors", "fundraising_investors")]
|
||||||
|
|
||||||
|
|
||||||
@@ -63,7 +63,7 @@ def _state_set(conn, key, value):
|
|||||||
def _deleted_source_ids(conn, since):
|
def _deleted_source_ids(conn, since):
|
||||||
"""CRM records soft-deleted since the watermark — their chunks get pruned."""
|
"""CRM records soft-deleted since the watermark — their chunks get pruned."""
|
||||||
ids = set()
|
ids = set()
|
||||||
for tbl in ("contacts", "organizations", "opportunities", "communications", "lp_profiles"):
|
for tbl in ("contacts", "organizations", "opportunities", "communications"):
|
||||||
try:
|
try:
|
||||||
for r in conn.execute(f"SELECT id FROM {tbl} WHERE deleted_at IS NOT NULL AND deleted_at > ?", (since,)):
|
for r in conn.execute(f"SELECT id FROM {tbl} WHERE deleted_at IS NOT NULL AND deleted_at > ?", (since,)):
|
||||||
ids.add(r["id"])
|
ids.add(r["id"])
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ Asserts the SAFE fix:
|
|||||||
3. a grid contact that can't be PROVABLY matched mints NOTHING (no duplicate
|
3. a grid contact that can't be PROVABLY matched mints NOTHING (no duplicate
|
||||||
person, no cross-firm name guess) — the count stays correct,
|
person, no cross-firm name guess) — the count stays correct,
|
||||||
4. targeted cleanup soft-deletes a stale grid-only "twin" (person with no
|
4. targeted cleanup soft-deletes a stale grid-only "twin" (person with no
|
||||||
contacts link) and a superseded 'lp'/'organization' row, with no enrichment,
|
contacts link), with no enrichment,
|
||||||
5. cleanup PRESERVES a grid-only person that carries enrichment (guardrail #3),
|
5. cleanup PRESERVES a grid-only person that carries enrichment (guardrail #3),
|
||||||
6. a re-emitted id is UN-tombstoned (no permanent burial),
|
6. a re-emitted id is UN-tombstoned (no permanent burial),
|
||||||
7. re-running is idempotent.
|
7. re-running is idempotent.
|
||||||
@@ -58,10 +58,9 @@ CREATE TABLE contacts (
|
|||||||
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, email TEXT);
|
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, email TEXT);
|
||||||
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT);
|
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT);
|
||||||
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT, contact_id TEXT);
|
CREATE TABLE fundraising_contacts (id TEXT PRIMARY KEY, full_name TEXT, email TEXT, investor_id TEXT, contact_id TEXT);
|
||||||
CREATE TABLE lp_profiles (id TEXT PRIMARY KEY, contact_id TEXT, deleted_at TEXT);
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SEEDED = ("per_TWIN", "per_ENR", "lp_OLD")
|
SEEDED = ("per_TWIN", "per_ENR")
|
||||||
|
|
||||||
|
|
||||||
def seed(db):
|
def seed(db):
|
||||||
@@ -94,16 +93,14 @@ def seed(db):
|
|||||||
"('per_ENR','person','Enriched Orphan','entity_resolution','warm')")
|
"('per_ENR','person','Enriched Orphan','entity_resolution','warm')")
|
||||||
c.execute("INSERT INTO entity_links (id, canonical_id, source_model, source_id, match_value, match_kind, confidence, created_at) "
|
c.execute("INSERT INTO entity_links (id, canonical_id, source_model, source_id, match_value, match_kind, confidence, created_at) "
|
||||||
"VALUES ('l_enr','per_ENR','fundraising_contacts','gy','enr','name_org',0.8,'t')")
|
"VALUES ('l_enr','per_ENR','fundraising_contacts','gy','enr','name_org',0.8,'t')")
|
||||||
# Superseded pre-:48 kind -> prune
|
|
||||||
c.execute("INSERT INTO canonical_entities (id, entity_kind, display_name, source) VALUES "
|
|
||||||
"('lp_OLD','lp','Old LP Row','entity_resolution')")
|
|
||||||
c.commit()
|
c.commit()
|
||||||
c.close()
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
def resolved_persons(db):
|
def resolved_persons(db):
|
||||||
c = sqlite3.connect(db)
|
c = sqlite3.connect(db)
|
||||||
q = "SELECT COUNT(*) FROM canonical_entities WHERE entity_kind='person' AND deleted_at IS NULL AND id NOT IN (?,?,?)"
|
ph = ",".join("?" * len(SEEDED))
|
||||||
|
q = f"SELECT COUNT(*) FROM canonical_entities WHERE entity_kind='person' AND deleted_at IS NULL AND id NOT IN ({ph})"
|
||||||
n = c.execute(q, SEEDED).fetchone()[0]
|
n = c.execute(q, SEEDED).fetchone()[0]
|
||||||
c.close()
|
c.close()
|
||||||
return n
|
return n
|
||||||
@@ -127,10 +124,11 @@ def grid_match_kinds(db):
|
|||||||
def minted_from_grid(db):
|
def minted_from_grid(db):
|
||||||
"""Persons minted directly from a grid row (the bug). Should be 0 after the fix."""
|
"""Persons minted directly from a grid row (the bug). Should be 0 after the fix."""
|
||||||
c = sqlite3.connect(db)
|
c = sqlite3.connect(db)
|
||||||
n = c.execute("""SELECT COUNT(DISTINCT l.canonical_id) FROM entity_links l
|
ph = ",".join("?" * len(SEEDED))
|
||||||
|
n = c.execute(f"""SELECT COUNT(DISTINCT l.canonical_id) FROM entity_links l
|
||||||
JOIN canonical_entities ce ON ce.id=l.canonical_id AND ce.deleted_at IS NULL
|
JOIN canonical_entities ce ON ce.id=l.canonical_id AND ce.deleted_at IS NULL
|
||||||
WHERE l.source_model='fundraising_contacts' AND l.match_kind IN ('name_org','exact_email')
|
WHERE l.source_model='fundraising_contacts' AND l.match_kind IN ('name_org','exact_email')
|
||||||
AND l.canonical_id NOT IN (?,?,?)""", SEEDED).fetchone()[0]
|
AND l.canonical_id NOT IN ({ph})""", SEEDED).fetchone()[0]
|
||||||
c.close()
|
c.close()
|
||||||
return n
|
return n
|
||||||
|
|
||||||
@@ -162,12 +160,11 @@ def main():
|
|||||||
check(mk.get("grid_assoc", 0) == 2, f"two grid contacts matched back via grid_assoc (got {mk.get('grid_assoc',0)})")
|
check(mk.get("grid_assoc", 0) == 2, f"two grid contacts matched back via grid_assoc (got {mk.get('grid_assoc',0)})")
|
||||||
check(mk.get("grid_link", 0) == 1, f"one grid contact linked via explicit contact_id (grid_link==1, got {mk.get('grid_link',0)})")
|
check(mk.get("grid_link", 0) == 1, f"one grid contact linked via explicit contact_id (grid_link==1, got {mk.get('grid_link',0)})")
|
||||||
|
|
||||||
# Targeted cleanup: stale grid-only twin + superseded 'lp' row tombstoned...
|
# Targeted cleanup: stale grid-only twin tombstoned...
|
||||||
check(deleted_at(db, "per_TWIN") is not None, "stale grid-only twin 'per_TWIN' tombstoned")
|
check(deleted_at(db, "per_TWIN") is not None, "stale grid-only twin 'per_TWIN' tombstoned")
|
||||||
check(deleted_at(db, "lp_OLD") is not None, "superseded 'lp' row 'lp_OLD' tombstoned")
|
|
||||||
# ...enriched grid-only person preserved.
|
# ...enriched grid-only person preserved.
|
||||||
check(deleted_at(db, "per_ENR") is None, "enriched grid-only person 'per_ENR' PRESERVED (has segment)")
|
check(deleted_at(db, "per_ENR") is None, "enriched grid-only person 'per_ENR' PRESERVED (has segment)")
|
||||||
check(counts1.get("pruned_stale", 0) == 2, f"exactly 2 stale rows pruned (got {counts1.get('pruned_stale')})")
|
check(counts1.get("pruned_stale", 0) == 1, f"exactly 1 stale row pruned (got {counts1.get('pruned_stale')})")
|
||||||
|
|
||||||
# Un-tombstone: soft-delete a real contact-person, then re-run -> it comes back.
|
# Un-tombstone: soft-delete a real contact-person, then re-run -> it comes back.
|
||||||
alice = er._eid("per", "e|alice@x.com")
|
alice = er._eid("per", "e|alice@x.com")
|
||||||
|
|||||||
@@ -0,0 +1,22 @@
|
|||||||
|
# Container image for the Matrix intake bot — turns it from a bare nohup process into a managed
|
||||||
|
# service (docker compose `restart: unless-stopped` survives a Spark reboot).
|
||||||
|
#
|
||||||
|
# Build context is the REPO ROOT (see ../../docker-compose.yml), not this directory: the bot is
|
||||||
|
# NOT self-contained — spark.py reaches into backend/ingest/{llm,config,http_util}.py (stdlib
|
||||||
|
# only) via sys.path, so the image must carry both trees with the repo layout preserved. That
|
||||||
|
# keeps settings.load_env's REPO_ROOT (three dirs up from settings.py) = /app and spark.py's
|
||||||
|
# ingest path = /app/backend/ingest both correct at runtime.
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# The only third-party dep is matrix-nio; the reused ingest Spark client is pure stdlib.
|
||||||
|
COPY backend/matrix_intake/requirements.txt ./requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY backend/matrix_intake/ ./backend/matrix_intake/
|
||||||
|
COPY backend/ingest/ ./backend/ingest/
|
||||||
|
|
||||||
|
# .env (Matrix + CRM + Spark creds) is mounted read-only at /app/.env at runtime — never baked.
|
||||||
|
# `-u` keeps stdout/stderr unbuffered so `docker logs` shows the bot's lifecycle lines live.
|
||||||
|
CMD ["python", "-u", "backend/matrix_intake/bot.py"]
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
# Matrix intake bot
|
||||||
|
|
||||||
|
Turns a typed message in a dedicated Matrix room into a proposed fundraising-grid add/edit,
|
||||||
|
gated on in-thread human approval before any write. Runs as its own process (on the Spark),
|
||||||
|
separate from the CRM. Full design + rules: `docs/guides/matrix-intake.md`.
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Install the one third-party dep (isolated to this component — NOT the CRM runtime)
|
||||||
|
python3 -m pip install -r requirements.txt # matrix-nio
|
||||||
|
|
||||||
|
# 2. Fill the MATRIX_* and CRM_BOT_* vars in the repo .env (see ../../.env.example),
|
||||||
|
# and create a dedicated CRM user for CRM_BOT_USERNAME/PASSWORD (admin → invite user).
|
||||||
|
|
||||||
|
# 3. Start the listener
|
||||||
|
python3 bot.py
|
||||||
|
```
|
||||||
|
|
||||||
|
It primes the Matrix sync past history (no backlog replay), then listens. Post a message in
|
||||||
|
the intake room; it replies in a thread with the parsed proposal. Reply **yes** to commit,
|
||||||
|
**edit field=value** to change a field, or **no** to discard.
|
||||||
|
|
||||||
|
## Layout
|
||||||
|
|
||||||
|
- `bot.py` — entrypoint: connect, prime-then-listen, dispatch (lifts matrix-bridge's plumbing).
|
||||||
|
- `parse.py` — message → structured proposal via local Qwen (`spark.py` → `backend/ingest/llm.py`).
|
||||||
|
- `proposals.py` — in-memory pending-proposal store + the yes/edit/no state machine.
|
||||||
|
- `crm_client.py` — login + `GET /api/intake/match` + write via `POST /api/fundraising/log-communication`.
|
||||||
|
- `matrix_io.py` — message splitting, thread-root detection, threaded-reply sender.
|
||||||
|
- `settings.py` — Matrix + CRM-API config (named `settings`, not `config`, to avoid shadowing `ingest/config`).
|
||||||
|
|
||||||
|
## Test (offline)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 test_parse.py && python3 test_proposals.py && python3 test_crm_client.py
|
||||||
|
# endpoint + create→match contract (boots the real server against a temp DB):
|
||||||
|
cd ../ && python3 test_intake_endpoints.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Live Matrix behavior needs creds + `matrix-nio` and can only be smoke-tested on the Spark.
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
"""Matrix intake bot — a dedicated Matrix room that turns a typed message into a
|
||||||
|
proposed fundraising-grid add/edit, gated on in-thread human approval before any write.
|
||||||
|
|
||||||
|
Separate process from the CRM (its only third-party dep, matrix-nio, lives here, never
|
||||||
|
in the stdlib CRM runtime). Parses with local Qwen via Spark Control; on approval, writes
|
||||||
|
through the CRM's own API. See docs/guides/matrix-intake.md and ROADMAP.md.
|
||||||
|
"""
|
||||||
@@ -0,0 +1,441 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Matrix intake bot — entrypoint.
|
||||||
|
|
||||||
|
A top-level message in the dedicated intake room is parsed (local Qwen via Spark Control)
|
||||||
|
into a proposed fundraising-grid add/edit and posted back IN A THREAD. The team member
|
||||||
|
replies in that thread — **yes** / **edit field=value** / **no** — and only on **yes** does
|
||||||
|
the bot write, through the CRM's own API. Nothing is ever written autonomously.
|
||||||
|
|
||||||
|
Runs as its own process (its matrix-nio dep is isolated here, never in the CRM runtime).
|
||||||
|
Lifts matrix-bridge's prime-then-listen + threaded-reply plumbing. Config: repo .env.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
|
||||||
|
from nio import AsyncClient, MatrixRoom, MessageDirection, RoomMessageImage, RoomMessageText
|
||||||
|
|
||||||
|
import crm_client
|
||||||
|
import email_proposals
|
||||||
|
import matrix_io
|
||||||
|
import parse
|
||||||
|
import proposals
|
||||||
|
import query
|
||||||
|
import settings
|
||||||
|
import spark
|
||||||
|
|
||||||
|
UNCLEAR_HELP = (
|
||||||
|
"🤔 I couldn't tell what to record. Try e.g.\n"
|
||||||
|
"`New investor: Acme Capital — Jane Doe <jane@acme.com>, met at the Austin conf`\n"
|
||||||
|
"or a note like `Note for Acme Capital: wants the Q3 deck, follow up next week`."
|
||||||
|
)
|
||||||
|
|
||||||
|
EMAIL_POLL_SEC = 20 # how often the bot polls the CRM for new/decided email-activity proposals
|
||||||
|
MAX_THREAD_SCAN_PAGES = 8 # how far back to scan for a resolved thread's replies before redacting
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
mx = settings.matrix_settings()
|
||||||
|
client = AsyncClient(mx["homeserver"], mx["user_id"])
|
||||||
|
client.restore_login(user_id=mx["user_id"], device_id=mx["device_id"], access_token=mx["token"])
|
||||||
|
say = matrix_io.make_say(client)
|
||||||
|
nudge = matrix_io.make_reply(client)
|
||||||
|
store = proposals.ProposalStore()
|
||||||
|
intake_room = mx["intake_room"]
|
||||||
|
roster = settings.team_roster() # frames the parse: teammates do outreach, aren't prospects
|
||||||
|
if roster:
|
||||||
|
print(f"matrix-intake: team roster loaded ({len(roster)} names)", flush=True)
|
||||||
|
review_room = settings.email_review_room() # CRM-drafted email proposals (empty → feature off)
|
||||||
|
query_room = settings.query_room() # dedicated read-only Q&A room (empty → use the intake trigger)
|
||||||
|
email_threads = {} # Matrix thread-root event_id -> {id, investor_name, note} for an email proposal
|
||||||
|
|
||||||
|
async def handle_intake(room_id, root, text, source="matrix_intake"):
|
||||||
|
# `source` tags provenance for the eventual commit: "matrix_intake" for a typed note,
|
||||||
|
# "matrix_card" when the text came from a scanned business card (on_image). Everything
|
||||||
|
# else about the flow is identical — that's the whole point of transcribe-then-reuse.
|
||||||
|
# A bare yes/no/approve typed in the MAIN timeline (not inside a proposal's thread) is
|
||||||
|
# an easy slip — point the user back to the thread rather than parse it as a new intake.
|
||||||
|
action, _ = proposals.interpret_reply(text)
|
||||||
|
if action in ("approve", "reject") and store.any_pending():
|
||||||
|
await nudge(room_id, "👉 To approve, reject, or edit a proposal, open its **thread** "
|
||||||
|
"and reply there — the note is in the thread.", root)
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
proposal = await asyncio.to_thread(parse.parse_message, text, roster=roster)
|
||||||
|
except Exception as exc: # Spark/Qwen unreachable or bad response
|
||||||
|
await say(room_id, f"⚠️ couldn't reach the local parser: {str(exc)[:200]}", root)
|
||||||
|
return
|
||||||
|
if proposal["intent"] == "unclear":
|
||||||
|
await say(room_id, UNCLEAR_HELP, root)
|
||||||
|
return
|
||||||
|
proposal["_source"] = source # rides through to commit (control key, survives dict() copies)
|
||||||
|
# Resolve new-vs-existing against the CRM matcher (read-only). Degrade gracefully if the
|
||||||
|
# CRM is unreachable — still propose as new, just without match/candidate hints.
|
||||||
|
match, candidates = None, []
|
||||||
|
try:
|
||||||
|
res = await asyncio.to_thread(crm_client.match, proposal)
|
||||||
|
match = res.get("match")
|
||||||
|
candidates = res.get("candidates") or []
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
if match:
|
||||||
|
# Confident exact match → auto-attach the note to that investor (no disambiguation).
|
||||||
|
proposal["intent"] = "meeting_note"
|
||||||
|
proposal["_match_id"] = match["id"]
|
||||||
|
proposal["_stage"] = "approval"
|
||||||
|
store.put(root, proposal)
|
||||||
|
hint = (f"\n\n🔎 Looks like an existing investor: **{match['name']}** — "
|
||||||
|
"this will append a note to them.")
|
||||||
|
await say(room_id, proposals.render(proposal) + hint, root)
|
||||||
|
await nudge(room_id, proposals.summary_line(proposal), root)
|
||||||
|
return
|
||||||
|
if candidates:
|
||||||
|
# No exact match but near-misses exist → make the human pick one or confirm "new",
|
||||||
|
# so a typo'd/near-duplicate name can't silently create a second investor.
|
||||||
|
proposal["_stage"] = "disambiguate"
|
||||||
|
proposal["_candidates"] = candidates
|
||||||
|
store.put(root, proposal)
|
||||||
|
await say(room_id, proposals.render_disambiguation(proposal), root)
|
||||||
|
await nudge(room_id, proposals.disambiguation_nudge(proposal), root)
|
||||||
|
return
|
||||||
|
# Genuinely new — straight to the new-investor approval card.
|
||||||
|
proposal["_stage"] = "approval"
|
||||||
|
store.put(root, proposal)
|
||||||
|
await say(room_id, proposals.render(proposal), root)
|
||||||
|
# Also drop a brief, un-threaded reply in the main timeline so the proposal isn't
|
||||||
|
# easy to miss inside a thread (the full card + yes/edit/no stay in the thread).
|
||||||
|
await nudge(room_id, proposals.summary_line(proposal), root)
|
||||||
|
|
||||||
|
async def handle_card(room_id, event):
|
||||||
|
"""A photo in the intake room → transcribe the business card on the local VL model, then
|
||||||
|
hand the transcription to the SAME intake flow as a typed note (parse → match → approve).
|
||||||
|
The only new step is image → text; everything downstream is reused. The transcription is
|
||||||
|
also the source text the email-integrity check runs against, so a mis-read address can't
|
||||||
|
slip in unapproved."""
|
||||||
|
mxc = getattr(event, "url", None)
|
||||||
|
if not mxc:
|
||||||
|
# Unencrypted images carry a plain mxc:// url; an encrypted room delivers a different
|
||||||
|
# event class entirely (we don't register for it), so this only guards the odd case.
|
||||||
|
await say(room_id, "📇 I can only read unencrypted images right now.", event.event_id)
|
||||||
|
return
|
||||||
|
await say(room_id, "📇 Reading the card…", event.event_id) # vision is slower — ack first
|
||||||
|
try:
|
||||||
|
resp = await client.download(mxc=mxc)
|
||||||
|
data = getattr(resp, "body", None)
|
||||||
|
if not isinstance(data, (bytes, bytearray)): # a DownloadError carries no bytes
|
||||||
|
raise RuntimeError(getattr(resp, "message", None) or "image download failed")
|
||||||
|
mime = getattr(resp, "content_type", None) or "image/jpeg"
|
||||||
|
b64 = base64.b64encode(data).decode("ascii")
|
||||||
|
text = await asyncio.to_thread(spark.transcribe_card, b64, mime)
|
||||||
|
except Exception as exc:
|
||||||
|
await say(room_id, f"⚠️ couldn't read the card: {str(exc)[:200]}", event.event_id)
|
||||||
|
return
|
||||||
|
if len(text.strip()) < 5:
|
||||||
|
await say(room_id, "📇 I couldn't read any text on that card — try a clearer, "
|
||||||
|
"well-lit photo taken straight-on.", event.event_id)
|
||||||
|
return
|
||||||
|
# Frame the raw transcription so the existing extractor reads it as a new-investor intake;
|
||||||
|
# the transcription itself is what email-integrity is checked against.
|
||||||
|
framed = "New investor — from a business card:\n" + text.strip()
|
||||||
|
await handle_intake(room_id, event.event_id, framed, source="matrix_card")
|
||||||
|
|
||||||
|
async def handle_query(room_id, root, question):
|
||||||
|
"""A read-only NL question ('@bot …' / '?…') — translate + run it on the BOX (local Qwen,
|
||||||
|
nothing leaves the box) and post the answer in a thread. No write path, no approval gate:
|
||||||
|
it only reads curated, parameterized queries. The endpoint returns its structured result
|
||||||
|
even on a soft no-match / model-down, so we render that; a transport/auth failure raises
|
||||||
|
and we show a brief error."""
|
||||||
|
try:
|
||||||
|
result = await asyncio.to_thread(crm_client.nl_query, question)
|
||||||
|
except Exception as exc:
|
||||||
|
await say(room_id, f"⚠️ couldn't run that query: {str(exc)[:200]}", root)
|
||||||
|
return
|
||||||
|
await say(room_id, query.render_answer(result), root)
|
||||||
|
|
||||||
|
async def handle_reply(room_id, root, text):
|
||||||
|
# Claim the proposal synchronously — BEFORE any await — so a second reply that
|
||||||
|
# arrives while a commit is in flight can't double-process it. asyncio is
|
||||||
|
# cooperative: nothing else runs between here and the first await below, so the
|
||||||
|
# pop is atomic w.r.t. other Matrix events.
|
||||||
|
proposal = store.pop(root)
|
||||||
|
if proposal is None:
|
||||||
|
return
|
||||||
|
if proposal.get("_stage") == "disambiguate":
|
||||||
|
await handle_disambiguation(room_id, root, text, proposal)
|
||||||
|
return
|
||||||
|
|
||||||
|
action, payload = proposals.interpret_reply(text)
|
||||||
|
if action == "approve":
|
||||||
|
try:
|
||||||
|
summary = await asyncio.to_thread(crm_client.commit, proposal)
|
||||||
|
except Exception as exc:
|
||||||
|
store.put(root, proposal) # commit failed — restore so the user can retry
|
||||||
|
await say(room_id, f"⚠️ write failed, nothing committed: {exc}", root)
|
||||||
|
return
|
||||||
|
# Committed → clear the whole thread (card + ack + nudge + the user's note/photo),
|
||||||
|
# like the email-review room. The thread vanishing is the acknowledgment; a confirmation
|
||||||
|
# reply would just keep it alive (and need redacting too). Needs the bot's redact/mod
|
||||||
|
# power in the intake room to clear the user's own messages — else those linger.
|
||||||
|
await redact_thread(room_id, root)
|
||||||
|
elif action == "reject":
|
||||||
|
await redact_thread(room_id, root)
|
||||||
|
elif action == "edit":
|
||||||
|
field, value = payload
|
||||||
|
proposal = proposals.apply_edit(proposal, field, value)
|
||||||
|
store.put(root, proposal) # keep it pending (edited) for the next reply
|
||||||
|
await say(room_id, "✏️ Updated:\n\n" + proposals.render(proposal), root)
|
||||||
|
else:
|
||||||
|
# Not yes/no/edit-grammar → treat it as a natural-language revision instruction and
|
||||||
|
# re-run it through local Qwen (no Claude, no scrub). The human still approves the
|
||||||
|
# revised card, so the draft→approve gate holds.
|
||||||
|
try:
|
||||||
|
revised = await asyncio.to_thread(parse.revise, proposal, text, roster=roster)
|
||||||
|
except Exception as exc:
|
||||||
|
store.put(root, proposal)
|
||||||
|
await say(room_id, f"⚠️ couldn't apply that change ({str(exc)[:200]}).\n\nReply **yes** "
|
||||||
|
"to commit, **no** to discard, **edit field=value**, or rephrase.", root)
|
||||||
|
return
|
||||||
|
if proposals.same_fields(proposal, revised):
|
||||||
|
store.put(root, proposal)
|
||||||
|
await say(room_id, "I didn't catch a change there. Reply **yes** to commit, **no** "
|
||||||
|
"to discard, **edit field=value**, or tell me what to change.", root)
|
||||||
|
return
|
||||||
|
store.put(root, revised)
|
||||||
|
await say(room_id, "✏️ Updated:\n\n" + proposals.render(revised), root)
|
||||||
|
|
||||||
|
async def handle_disambiguation(room_id, root, text, proposal):
|
||||||
|
cands = proposal.get("_candidates") or []
|
||||||
|
action, payload = proposals.interpret_disambiguation(text, len(cands))
|
||||||
|
if action == "pick":
|
||||||
|
updated = proposals.attach_to_candidate(proposal, cands[payload])
|
||||||
|
store.put(root, updated)
|
||||||
|
await say(room_id, "✏️ Will log against the existing investor:\n\n"
|
||||||
|
+ proposals.render(updated), root)
|
||||||
|
elif action == "new":
|
||||||
|
updated = proposals.promote_to_new(proposal)
|
||||||
|
store.put(root, updated)
|
||||||
|
await say(room_id, "➕ OK — adding as a new investor:\n\n"
|
||||||
|
+ proposals.render(updated), root)
|
||||||
|
elif action == "reject":
|
||||||
|
await redact_thread(room_id, root) # discard → clear the thread, like an approve
|
||||||
|
else: # unrecognized — re-show the shortlist
|
||||||
|
store.put(root, proposal)
|
||||||
|
await say(room_id, "I didn't catch that.\n\n" + proposals.render_disambiguation(proposal), root)
|
||||||
|
|
||||||
|
async def redact_card(room_id, event_id):
|
||||||
|
"""Redact one event in `room_id` (best-effort). Redacting our OWN message needs no special
|
||||||
|
power; redacting someone else's message (a human reply, or the user's original card photo /
|
||||||
|
intake note) needs the bot to hold a redact/mod power level in that room."""
|
||||||
|
try:
|
||||||
|
await client.room_redact(room_id, event_id, reason="proposal resolved")
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"matrix-intake: could not redact {event_id}: {exc}", flush=True)
|
||||||
|
|
||||||
|
async def redact_thread(room_id, root):
|
||||||
|
"""Clear a resolved thread in `room_id`: redact the root AND every message that hangs off it
|
||||||
|
— the m.thread children (cards/acks/human replies) AND the main-timeline **nudge** (a plain
|
||||||
|
m.in_reply_to reply, not a thread child), so the thread drops out of both the threads view
|
||||||
|
and the timeline. For email-review the root is the bot's card; for intake it's the USER'S
|
||||||
|
own note/photo, so clearing it (and the human reply) needs the bot's redact/mod power in that
|
||||||
|
room — without it those just no-op and linger. Replies are found by scanning recent history
|
||||||
|
from the current sync token (the triggering reply is already synced, so a backward scan
|
||||||
|
includes it)."""
|
||||||
|
await redact_card(room_id, root)
|
||||||
|
token = getattr(client, "next_batch", None)
|
||||||
|
if not token:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
scanned = 0
|
||||||
|
for _ in range(MAX_THREAD_SCAN_PAGES):
|
||||||
|
resp = await client.room_messages(room_id, start=token,
|
||||||
|
direction=MessageDirection.back, limit=100)
|
||||||
|
chunk = getattr(resp, "chunk", None)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
for ev in chunk:
|
||||||
|
rel = ((getattr(ev, "source", None) or {}).get("content", {}) or {}).get("m.relates_to") or {}
|
||||||
|
in_reply = (rel.get("m.in_reply_to") or {}).get("event_id")
|
||||||
|
# A thread child carries rel_type=m.thread + event_id==root (the cards/acks +
|
||||||
|
# the human's yes/no replies — any sender). The un-threaded nudge is the BOT's
|
||||||
|
# own plain reply to root (only m.in_reply_to==root, no rel_type); gate that
|
||||||
|
# clause to our sender so we don't also redact an unrelated human plain-reply
|
||||||
|
# to the same root (root itself is already redacted above).
|
||||||
|
is_thread_child = rel.get("rel_type") == "m.thread" and rel.get("event_id") == root
|
||||||
|
is_own_nudge = in_reply == root and getattr(ev, "sender", None) == mx["user_id"]
|
||||||
|
if is_thread_child or is_own_nudge:
|
||||||
|
await redact_card(room_id, ev.event_id)
|
||||||
|
token = getattr(resp, "end", None)
|
||||||
|
scanned += len(chunk)
|
||||||
|
if not token or scanned > 1000:
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"matrix-intake: thread reply cleanup failed for {root}: {exc}", flush=True)
|
||||||
|
|
||||||
|
async def handle_email_reply(room_id, root, text):
|
||||||
|
"""An in-thread reply to a CRM-drafted email-proposal card: yes commits, no dismisses, and
|
||||||
|
anything else is a natural-language revision of the note (re-drafted by local Qwen; the
|
||||||
|
human still approves the revised note, so the draft→approve gate holds). On a conclusive
|
||||||
|
decision the card is redacted so the room clears down to only what still needs handling."""
|
||||||
|
item = email_threads.get(root)
|
||||||
|
if item is None:
|
||||||
|
return # a threaded reply we don't own (or already resolved)
|
||||||
|
decision = email_proposals.interpret(text)
|
||||||
|
if decision == "approve":
|
||||||
|
# Claim before the await (double-approve guard, like the intake commit path).
|
||||||
|
email_threads.pop(root, None)
|
||||||
|
try:
|
||||||
|
await asyncio.to_thread(crm_client.decide_email_proposal, item["id"], "approve", item.get("note"))
|
||||||
|
except Exception as exc:
|
||||||
|
email_threads[root] = item # restore for retry
|
||||||
|
await say(room_id, email_proposals.frame(f"⚠️ couldn't add it ({str(exc)[:200]}). Reply **yes** to retry, **no** to dismiss."), root)
|
||||||
|
return
|
||||||
|
# Success → clear the whole thread (card + replies). No confirmation: the thread
|
||||||
|
# vanishing is the acknowledgment, and a confirmation reply would keep it alive.
|
||||||
|
await redact_thread(review_room, root)
|
||||||
|
elif decision == "reject":
|
||||||
|
email_threads.pop(root, None)
|
||||||
|
try:
|
||||||
|
await asyncio.to_thread(crm_client.decide_email_proposal, item["id"], "dismiss")
|
||||||
|
except Exception as exc:
|
||||||
|
email_threads[root] = item
|
||||||
|
await say(room_id, email_proposals.frame(f"⚠️ couldn't dismiss it ({str(exc)[:200]}). Try again."), root)
|
||||||
|
return
|
||||||
|
await redact_thread(review_room, root)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
new_note = await asyncio.to_thread(email_proposals.revise_note, item.get("note") or "", text)
|
||||||
|
except Exception as exc:
|
||||||
|
await say(room_id, email_proposals.frame(f"⚠️ couldn't revise that ({str(exc)[:200]}). Reply **yes** to add as-is, "
|
||||||
|
"**no** to dismiss, or rephrase."), root)
|
||||||
|
return
|
||||||
|
if not new_note:
|
||||||
|
await say(room_id, email_proposals.frame("I didn't catch a change. Reply **yes** to add the note as-is, **no** to "
|
||||||
|
"dismiss, or tell me how to change it."), root)
|
||||||
|
return
|
||||||
|
item["note"] = new_note
|
||||||
|
email_threads[root] = item
|
||||||
|
await say(room_id, email_proposals.frame(f"✏️ Updated draft note:\n\n{new_note}\n\nReply **yes** to add it, **no** to "
|
||||||
|
"dismiss, or refine again."), root)
|
||||||
|
|
||||||
|
async def poll_email_proposals():
|
||||||
|
"""Poll the CRM for email-activity proposals: post a review card for each new one, rebuild
|
||||||
|
the reply-routing map from already-posted threads (so replies still route after a restart),
|
||||||
|
and announce+close any decided on the web. One failing cycle logs and retries next tick."""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
lists = await asyncio.to_thread(crm_client.list_email_proposals)
|
||||||
|
for it in lists["open"]: # rebuild routing for threads posted before (e.g. a restart)
|
||||||
|
ev = it.get("event_id")
|
||||||
|
if ev and ev not in email_threads:
|
||||||
|
email_threads[ev] = {"id": it["id"], "investor_name": it.get("investor_name"),
|
||||||
|
"note": it.get("proposed_note") or ""}
|
||||||
|
for it in lists["to_post"]:
|
||||||
|
try:
|
||||||
|
resp = await client.room_send(
|
||||||
|
review_room, "m.room.message",
|
||||||
|
matrix_io.thread_content(email_proposals.render_card(it), None))
|
||||||
|
ev = getattr(resp, "event_id", None)
|
||||||
|
if not ev:
|
||||||
|
print(f"matrix-intake: card send returned no event_id for {it['id']}", flush=True)
|
||||||
|
continue
|
||||||
|
await asyncio.to_thread(crm_client.mark_email_proposal_posted, it["id"], ev)
|
||||||
|
email_threads[ev] = {"id": it["id"], "investor_name": it.get("investor_name"),
|
||||||
|
"note": it.get("proposed_note") or ""}
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"matrix-intake: failed to post email proposal {it.get('id')}: {exc}", flush=True)
|
||||||
|
for it in lists["to_close"]: # decided on the web → clear the thread, then close
|
||||||
|
ev = it.get("event_id")
|
||||||
|
if not ev:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
await redact_thread(review_room, ev)
|
||||||
|
await asyncio.to_thread(crm_client.mark_email_proposal_closed, it["id"])
|
||||||
|
email_threads.pop(ev, None)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"matrix-intake: failed to close email proposal {it.get('id')}: {exc}", flush=True)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"matrix-intake: email-proposal poll error: {exc}", flush=True)
|
||||||
|
await asyncio.sleep(EMAIL_POLL_SEC)
|
||||||
|
|
||||||
|
async def on_message(room: MatrixRoom, event: RoomMessageText):
|
||||||
|
if event.sender == mx["user_id"]:
|
||||||
|
return # never react to our own messages (we post in-thread — this prevents loops)
|
||||||
|
text = (event.body or "").strip()
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
root = matrix_io.thread_root_of(event)
|
||||||
|
# Email-proposal review room: only a threaded reply to a card we posted is actionable.
|
||||||
|
if review_room and room.room_id == review_room:
|
||||||
|
if root and root in email_threads:
|
||||||
|
await handle_email_reply(room.room_id, root, text)
|
||||||
|
return
|
||||||
|
# Dedicated Q&A room: every top-level message IS a question — no trigger needed. Threaded
|
||||||
|
# messages (the answers we post, or follow-ups) aren't acted on in v1.
|
||||||
|
if query_room and room.room_id == query_room:
|
||||||
|
if not root:
|
||||||
|
await handle_query(room.room_id, event.event_id, text)
|
||||||
|
return
|
||||||
|
if room.room_id != intake_room:
|
||||||
|
return
|
||||||
|
if root and store.has(root):
|
||||||
|
await handle_reply(room.room_id, root, text)
|
||||||
|
elif root:
|
||||||
|
return # threaded message not tied to a live proposal — ignore
|
||||||
|
else:
|
||||||
|
# A top-level message is either an NL question (explicitly addressed with '?'/'@bot')
|
||||||
|
# or an intake note. The trigger is required, so plain notes still flow to intake.
|
||||||
|
q = query.parse_trigger(text)
|
||||||
|
if q is None:
|
||||||
|
await handle_intake(room.room_id, event.event_id, text)
|
||||||
|
elif not q:
|
||||||
|
await say(room.room_id, query.HELP, event.event_id)
|
||||||
|
else:
|
||||||
|
await handle_query(room.room_id, event.event_id, q)
|
||||||
|
|
||||||
|
async def on_image(room: MatrixRoom, event: RoomMessageImage):
|
||||||
|
# Business-card capture is intake-only: ignore our own uploads, images in the Q&A /
|
||||||
|
# email-review rooms, and an image dropped inside an existing thread (not a fresh card).
|
||||||
|
if event.sender == mx["user_id"]:
|
||||||
|
return
|
||||||
|
if room.room_id != intake_room:
|
||||||
|
return
|
||||||
|
if matrix_io.thread_root_of(event):
|
||||||
|
return
|
||||||
|
await handle_card(room.room_id, event)
|
||||||
|
|
||||||
|
# Prime the sync token past history, THEN register the callbacks — only react to messages
|
||||||
|
# arriving after startup (no backlog replay). (matrix-bridge pattern.)
|
||||||
|
print("matrix-intake: priming sync (skipping backlog)...", flush=True)
|
||||||
|
await client.sync(timeout=30000, full_state=False)
|
||||||
|
client.add_event_callback(on_message, RoomMessageText)
|
||||||
|
client.add_event_callback(on_image, RoomMessageImage)
|
||||||
|
who = await client.whoami()
|
||||||
|
print(f"matrix-intake: listening as {who.user_id} in room {intake_room}", flush=True)
|
||||||
|
tasks = [asyncio.create_task(client.sync_forever(timeout=30000))]
|
||||||
|
if review_room:
|
||||||
|
# "Invited" isn't "joined" — the bot must join before it can post cards (room_send to a
|
||||||
|
# room we're only invited to fails M_FORBIDDEN). Idempotent if already a member.
|
||||||
|
try:
|
||||||
|
await client.join(review_room)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"matrix-intake: could not join review room {review_room}: {exc}", flush=True)
|
||||||
|
tasks.append(asyncio.create_task(poll_email_proposals()))
|
||||||
|
print(f"matrix-intake: reviewing email proposals in room {review_room} (every {EMAIL_POLL_SEC}s)", flush=True)
|
||||||
|
if query_room:
|
||||||
|
# Read-only Q&A room — just join and listen (no poll task; questions are interactive).
|
||||||
|
# "Invited" isn't "joined": the bot must join before it can post answers (idempotent).
|
||||||
|
try:
|
||||||
|
await client.join(query_room)
|
||||||
|
except Exception as exc:
|
||||||
|
print(f"matrix-intake: could not join Q&A room {query_room}: {exc}", flush=True)
|
||||||
|
print(f"matrix-intake: answering questions in room {query_room}", flush=True)
|
||||||
|
try:
|
||||||
|
await asyncio.gather(*tasks)
|
||||||
|
finally:
|
||||||
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
@@ -0,0 +1,209 @@
|
|||||||
|
"""CRM API client for the intake bot's write-back leg.
|
||||||
|
|
||||||
|
The bot authenticates as a dedicated service user (Bearer JWT via /api/auth/login — the CRM
|
||||||
|
has no service-key path) and reuses the CRM's OWN canonical write endpoint
|
||||||
|
(/api/fundraising/log-communication) for both new-investor and existing-note cases, rather
|
||||||
|
than mutating the grid itself. That endpoint creates the grid row (create_investor_if_missing),
|
||||||
|
upserts the contact, logs the communication, appends a visible note, and re-syncs the
|
||||||
|
relational tables + audit — exactly as a UI grid edit would. We only tag provenance
|
||||||
|
(source="matrix_intake"). The payload builder is a pure function so it's unit-tested offline.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import ssl
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
import settings
|
||||||
|
|
||||||
|
_token = None
|
||||||
|
|
||||||
|
|
||||||
|
def _http(method, path, body=None, token=None):
|
||||||
|
s = settings.crm_settings()
|
||||||
|
url = s["base"] + path
|
||||||
|
data = json.dumps(body).encode("utf-8") if body is not None else None
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
|
req = urllib.request.Request(url, data=data, method=method, headers=headers)
|
||||||
|
ctx = None
|
||||||
|
if url.lower().startswith("https") and not s["verify_tls"]:
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
ctx.check_hostname = False
|
||||||
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=30, context=ctx) as resp:
|
||||||
|
raw = resp.read()
|
||||||
|
return resp.status, (json.loads(raw) if raw else {})
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
raw = exc.read()
|
||||||
|
try:
|
||||||
|
payload = json.loads(raw) if raw else {}
|
||||||
|
except Exception:
|
||||||
|
payload = {"raw": raw.decode("utf-8", "replace")}
|
||||||
|
return exc.code, payload
|
||||||
|
|
||||||
|
|
||||||
|
def _login():
|
||||||
|
global _token
|
||||||
|
s = settings.crm_settings()
|
||||||
|
if not s["username"] or not s["password"]:
|
||||||
|
raise RuntimeError("CRM bot creds not set (CRM_BOT_USERNAME / CRM_BOT_PASSWORD)")
|
||||||
|
status, data = _http("POST", "/api/auth/login",
|
||||||
|
{"username": s["username"], "password": s["password"]})
|
||||||
|
if status != 200 or not data.get("token"):
|
||||||
|
raise RuntimeError(f"CRM login failed ({status}): {data.get('error') or data}")
|
||||||
|
_token = data["token"]
|
||||||
|
return _token
|
||||||
|
|
||||||
|
|
||||||
|
def _authed(method, path, body=None):
|
||||||
|
"""Call the CRM with the cached token; re-login once on a 401 (token expiry)."""
|
||||||
|
global _token
|
||||||
|
token = _token or _login()
|
||||||
|
status, data = _http(method, path, body, token=token)
|
||||||
|
if status == 401:
|
||||||
|
token = _login()
|
||||||
|
status, data = _http(method, path, body, token=token)
|
||||||
|
return status, data
|
||||||
|
|
||||||
|
|
||||||
|
def match(proposal):
|
||||||
|
"""Resolve new-vs-existing for this proposal against the CRM matcher.
|
||||||
|
|
||||||
|
Returns {'match': {...}|None, 'candidates': [...]}:
|
||||||
|
- `match` is a confident EXACT existing investor — {'id', 'name'} — that the bot
|
||||||
|
auto-attaches a note to (no human disambiguation needed).
|
||||||
|
- `candidates` is a ranked list of fuzzy NEAR-matches — each {'id', 'name', 'score',
|
||||||
|
'matched_on'} — surfaced in-thread for the human to pick from (or confirm "new")
|
||||||
|
when there is no exact match, so a typo'd/near-duplicate name doesn't silently
|
||||||
|
create a second investor."""
|
||||||
|
q = proposal.get("investor_name") or proposal.get("contact_name") or ""
|
||||||
|
email = proposal.get("contact_email") or ""
|
||||||
|
if not q and not email:
|
||||||
|
return {"match": None, "candidates": []}
|
||||||
|
qs = urlencode({"q": q, "email": email})
|
||||||
|
status, data = _authed("GET", f"/api/intake/match?{qs}")
|
||||||
|
if status != 200:
|
||||||
|
raise RuntimeError(f"intake match failed ({status}): {data.get('error') or data}")
|
||||||
|
payload = data.get("data") or {}
|
||||||
|
m = payload.get("match")
|
||||||
|
match_out = {"id": m["id"], "name": m.get("investor_name") or q} if m else None
|
||||||
|
candidates = [
|
||||||
|
{"id": c["id"], "name": c.get("investor_name") or "?",
|
||||||
|
"score": c.get("score"), "matched_on": c.get("matched_on")}
|
||||||
|
for c in (payload.get("candidates") or []) if c.get("id")
|
||||||
|
]
|
||||||
|
return {"match": match_out, "candidates": candidates}
|
||||||
|
|
||||||
|
|
||||||
|
def list_email_proposals():
|
||||||
|
"""Pull the email-activity review work-lists for the poll loop: {to_post, open, to_close}.
|
||||||
|
to_post = pending, un-posted (post a card); open = posted, awaiting a decision (rebuild the
|
||||||
|
reply-routing map after a restart); to_close = decided on the web (announce in-thread + close)."""
|
||||||
|
status, data = _authed("GET", "/api/intake/email-proposals")
|
||||||
|
if status != 200:
|
||||||
|
raise RuntimeError(f"email-proposals list failed ({status}): {data.get('error') or data}")
|
||||||
|
payload = data.get("data") or {}
|
||||||
|
return {k: (payload.get(k) or []) for k in ("to_post", "open", "to_close")}
|
||||||
|
|
||||||
|
|
||||||
|
def mark_email_proposal_posted(proposal_id, event_id):
|
||||||
|
"""Record the Matrix thread-root event id so the proposal's review state survives a restart."""
|
||||||
|
status, data = _authed("POST", f"/api/intake/email-proposals/{proposal_id}/matrix",
|
||||||
|
{"event_id": event_id})
|
||||||
|
if status != 200:
|
||||||
|
raise RuntimeError(f"mark posted failed ({status}): {data.get('error') or data}")
|
||||||
|
return data.get("data") or {}
|
||||||
|
|
||||||
|
|
||||||
|
def mark_email_proposal_closed(proposal_id):
|
||||||
|
"""Mark the review thread resolved after announcing a web-side decision in it."""
|
||||||
|
status, data = _authed("POST", f"/api/intake/email-proposals/{proposal_id}/matrix",
|
||||||
|
{"closed": True})
|
||||||
|
if status != 200:
|
||||||
|
raise RuntimeError(f"mark closed failed ({status}): {data.get('error') or data}")
|
||||||
|
return data.get("data") or {}
|
||||||
|
|
||||||
|
|
||||||
|
def decide_email_proposal(proposal_id, decision, note=None):
|
||||||
|
"""Relay an in-thread approve/dismiss (with the possibly-revised note) to the CRM. The server
|
||||||
|
appends the note to the grid on approve, tags source='matrix', and closes the thread."""
|
||||||
|
body = {"decision": decision}
|
||||||
|
if note is not None:
|
||||||
|
body["note"] = note
|
||||||
|
status, data = _authed("POST", f"/api/intake/email-proposals/{proposal_id}/decide", body)
|
||||||
|
if status not in (200, 201):
|
||||||
|
raise RuntimeError(f"email-proposal decide failed ({status}): {data.get('error') or data}")
|
||||||
|
return data.get("data") or {}
|
||||||
|
|
||||||
|
|
||||||
|
def nl_query(question):
|
||||||
|
"""Ask the read-only NL-query endpoint (POST /api/query/nl). Translation runs on the box's
|
||||||
|
LOCAL model — the question never leaves the box and no write is possible. Returns the
|
||||||
|
endpoint's structured result dict ({intent, slots, rows, summary, ...} or {error, detail});
|
||||||
|
the server returns that same body on a hit AND on the soft 503 (model down) / 500 (query
|
||||||
|
fault) status codes, so we hand it straight to the renderer. Any OTHER status — auth (403),
|
||||||
|
a malformed request (400), an unexpected shape — raises so the caller posts a brief error."""
|
||||||
|
status, data = _authed("POST", "/api/query/nl", {"question": question, "source": "matrix"})
|
||||||
|
if status not in (200, 500, 503):
|
||||||
|
raise RuntimeError(f"nl-query failed ({status}): {data.get('error') or data}")
|
||||||
|
return data.get("data") or {}
|
||||||
|
|
||||||
|
|
||||||
|
def build_commit_payload(proposal):
|
||||||
|
"""Pure: map a proposal to the /api/fundraising/log-communication request body.
|
||||||
|
|
||||||
|
Existing investor (carries _match_id) → target that exact grid row. Otherwise create the
|
||||||
|
investor if missing. The note becomes the communication body; the email is only sent when
|
||||||
|
it survived parse's source-text integrity check."""
|
||||||
|
contact = {
|
||||||
|
"name": proposal.get("contact_name") or proposal.get("investor_name") or "",
|
||||||
|
"email": proposal.get("contact_email") or "",
|
||||||
|
"title": proposal.get("contact_title") or "",
|
||||||
|
# city + linkedin_url + phone + mobile are honored by the server's contact upsert
|
||||||
|
# (_upsert_contact_from_fundraising); city also syncs to the grid contact pill, the
|
||||||
|
# rest land on the canonical contact record. phone = office/main line, mobile = cell.
|
||||||
|
"city": proposal.get("city") or "",
|
||||||
|
"linkedin_url": proposal.get("linkedin_url") or "",
|
||||||
|
"phone": proposal.get("phone") or "",
|
||||||
|
"mobile": proposal.get("mobile") or "",
|
||||||
|
}
|
||||||
|
note = proposal.get("note") or ""
|
||||||
|
# The CRM's grid note line uses subject-or-body for its one-line summary, so a non-empty
|
||||||
|
# subject hides the actual note text. Send a blank subject when there's a note (let the note
|
||||||
|
# itself show in the grid); fall back to a provenance label only when there's nothing to
|
||||||
|
# show. Provenance is recorded via source="matrix_intake" either way.
|
||||||
|
intent_label = "Note (Matrix)" if proposal.get("intent") == "meeting_note" else "Intake (Matrix)"
|
||||||
|
payload = {
|
||||||
|
"contact": contact,
|
||||||
|
"type": "note",
|
||||||
|
"body": note,
|
||||||
|
"subject": "" if note.strip() else intent_label,
|
||||||
|
"append_note": True,
|
||||||
|
# Provenance for the audit log: a typed note is "matrix_intake"; a scanned business card
|
||||||
|
# rides in on _source="matrix_card" (set by the bot's image handler). Default preserves
|
||||||
|
# the text path.
|
||||||
|
"source": proposal.get("_source") or "matrix_intake",
|
||||||
|
}
|
||||||
|
match_id = proposal.get("_match_id")
|
||||||
|
if match_id:
|
||||||
|
payload["row_id"] = match_id
|
||||||
|
else:
|
||||||
|
payload["investor_name"] = proposal.get("investor_name") or proposal.get("contact_name") or ""
|
||||||
|
payload["create_investor_if_missing"] = True
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def commit(proposal):
|
||||||
|
"""Write the approved proposal to the CRM; return a short human summary for the thread."""
|
||||||
|
payload = build_commit_payload(proposal)
|
||||||
|
status, data = _authed("POST", "/api/fundraising/log-communication", payload)
|
||||||
|
if status not in (200, 201):
|
||||||
|
raise RuntimeError(f"log-communication failed ({status}): {data.get('error') or data}")
|
||||||
|
row = (data.get("data") or {}).get("row") or {}
|
||||||
|
name = row.get("investor_name") or payload.get("investor_name") or "investor"
|
||||||
|
if proposal.get("_match_id"):
|
||||||
|
return f"Logged a note on **{name}** (existing grid entry)."
|
||||||
|
return f"Created a new grid entry for **{name}**" + (" and logged a note." if payload.get("body") else ".")
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
"""Email-activity proposal review over Matrix — the CRM→Matrix leg of the email-capture flow.
|
||||||
|
|
||||||
|
The CRM (on the box) drafts a proposed grid note per newly-matched email (local model, no Claude)
|
||||||
|
and queues it for human review. The CRM is stdlib-only and can't post to Matrix itself, so this
|
||||||
|
bot PULLS the pending proposals (crm_client.list_email_proposals), posts a review card to the
|
||||||
|
dedicated review room, and relays the human's in-thread reply back to the CRM. Same draft→approve
|
||||||
|
discipline as the intake bot: nothing is appended to the grid until a human approves — here OR on
|
||||||
|
the web Email Capture panel, the two surfaces kept in sync via the CRM's email_proposal_matrix row.
|
||||||
|
|
||||||
|
This module is the PURE logic (card rendering, reply grammar, note revision) so it's unit-tested
|
||||||
|
offline; the async poll/post/reply wiring lives in bot.py (network + Matrix, live-smoke only).
|
||||||
|
"""
|
||||||
|
import spark
|
||||||
|
|
||||||
|
_YES = {"yes", "y", "approve", "approved", "ok", "confirm", "go", "add", "👍", "✅"}
|
||||||
|
_NO = {"no", "n", "cancel", "discard", "reject", "skip", "stop", "👎", "❌"}
|
||||||
|
|
||||||
|
_SNIPPET_MAX = 400 # email snippet shown on the card; the full body is in the web popup
|
||||||
|
RULE = "-----------------------" # top/bottom rule so threads don't bleed together on mobile
|
||||||
|
|
||||||
|
|
||||||
|
def frame(text):
|
||||||
|
"""Wrap a message in dash rules so each card/reply is visually bounded in the room."""
|
||||||
|
return f"{RULE}\n{text}\n{RULE}"
|
||||||
|
|
||||||
|
|
||||||
|
def _truncate(s, n):
|
||||||
|
s = (s or "").strip()
|
||||||
|
return s if len(s) <= n else s[:n].rstrip() + "…"
|
||||||
|
|
||||||
|
|
||||||
|
def render_card(item):
|
||||||
|
"""The review card posted to the Matrix review room: who/when + a short email snippet + the
|
||||||
|
drafted note. Deliberately compact for mobile — the full scrollable body is in the web Email
|
||||||
|
Capture popup. Direction isn't a bare label anymore — the note itself names who emailed whom."""
|
||||||
|
name = item.get("investor_name") or "Unknown investor"
|
||||||
|
frm = item.get("from_name") or item.get("from_email") or "?"
|
||||||
|
lines = [f"📧 Proposed **grid note** for **{name}**"]
|
||||||
|
if item.get("email_subject"):
|
||||||
|
lines.append(f"· Subject: {item['email_subject']}")
|
||||||
|
if item.get("email_date"):
|
||||||
|
lines.append(f"· Date: {item['email_date']}")
|
||||||
|
lines.append(f"· From: {frm}")
|
||||||
|
snippet = _truncate(item.get("snippet"), _SNIPPET_MAX)
|
||||||
|
if snippet:
|
||||||
|
lines.append(f"· Email: {snippet}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append(f"📝 Draft note: {item.get('proposed_note') or '(empty)'}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("Reply **yes** to add it to the grid, **no** to dismiss, or just tell me how to "
|
||||||
|
"change the note (e.g. *say we discussed the Q3 raise*).")
|
||||||
|
return frame("\n".join(lines))
|
||||||
|
|
||||||
|
|
||||||
|
def interpret(text):
|
||||||
|
"""Classify an in-thread reply: 'approve' | 'reject' | 'revise' (anything else → revise the note)."""
|
||||||
|
t = (text or "").strip().lower()
|
||||||
|
if t in _YES:
|
||||||
|
return "approve"
|
||||||
|
if t in _NO:
|
||||||
|
return "reject"
|
||||||
|
return "revise"
|
||||||
|
|
||||||
|
|
||||||
|
REVISE_SYSTEM = (
|
||||||
|
"You revise a single CRM note from a short instruction a venture-fund team member typed. "
|
||||||
|
"You are given the CURRENT note and an INSTRUCTION. Apply the instruction and reply with "
|
||||||
|
"ONLY a JSON object of the form {\"note\": \"<the full revised note>\"}. Keep it to one or two "
|
||||||
|
"factual sentences, no preamble. Output JSON only."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def revise_note(note, instruction, parse_fn=spark.parse_json):
|
||||||
|
"""Re-draft the note via local Qwen from a free-form instruction (no Claude, no scrub — same
|
||||||
|
local-only basis as the intake parse). Returns the new note text, or None if the model gave
|
||||||
|
nothing usable / unchanged, in which case the caller re-prompts. `parse_fn` is injectable for
|
||||||
|
tests."""
|
||||||
|
prompt = "CURRENT:\n" + (note or "") + "\n\nINSTRUCTION:\n" + (instruction or "").strip()
|
||||||
|
raw = parse_fn(prompt, system=REVISE_SYSTEM, max_tokens=400) or {}
|
||||||
|
new = raw.get("note") if isinstance(raw, dict) else None
|
||||||
|
new = (new or "").strip()
|
||||||
|
if not new or new == (note or "").strip():
|
||||||
|
return None
|
||||||
|
return new
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
"""Matrix plumbing lifted from matrix-bridge (src/bot.py): message splitting, thread-root
|
||||||
|
detection, and a threaded-reply sender. Kept dependency-light so the rest of the bot is
|
||||||
|
testable without a live homeserver."""
|
||||||
|
|
||||||
|
MAX_MSG_CHARS = 30000 # well under Matrix's ~64KB event cap
|
||||||
|
|
||||||
|
|
||||||
|
def split_message(text, limit=MAX_MSG_CHARS):
|
||||||
|
"""Split text into <=limit-char chunks on newline boundaries (no truncation)."""
|
||||||
|
if len(text) <= limit:
|
||||||
|
return [text]
|
||||||
|
chunks, buf = [], ""
|
||||||
|
for line in text.splitlines(keepends=True):
|
||||||
|
while len(line) > limit:
|
||||||
|
if buf:
|
||||||
|
chunks.append(buf)
|
||||||
|
buf = ""
|
||||||
|
chunks.append(line[:limit])
|
||||||
|
line = line[limit:]
|
||||||
|
if len(buf) + len(line) > limit:
|
||||||
|
chunks.append(buf)
|
||||||
|
buf = ""
|
||||||
|
buf += line
|
||||||
|
if buf:
|
||||||
|
chunks.append(buf)
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def thread_root_of(event):
|
||||||
|
"""Return the thread root event_id if this message is a threaded reply, else None."""
|
||||||
|
relates = (getattr(event, "source", None) or {}).get("content", {}).get("m.relates_to") or {}
|
||||||
|
if relates.get("rel_type") == "m.thread":
|
||||||
|
return relates.get("event_id")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def thread_content(text, thread_root):
|
||||||
|
"""Build an m.room.message content dict, threaded under thread_root when given."""
|
||||||
|
content = {"msgtype": "m.text", "body": text}
|
||||||
|
if thread_root:
|
||||||
|
content["m.relates_to"] = {
|
||||||
|
"rel_type": "m.thread",
|
||||||
|
"event_id": thread_root,
|
||||||
|
"is_falling_back": True,
|
||||||
|
"m.in_reply_to": {"event_id": thread_root},
|
||||||
|
}
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def make_say(client):
|
||||||
|
"""Return an async say(room_id, text, thread_root=None) bound to a matrix-nio client."""
|
||||||
|
async def say(room_id, text, thread_root=None):
|
||||||
|
for chunk in split_message(text):
|
||||||
|
await client.room_send(room_id, "m.room.message", thread_content(chunk, thread_root))
|
||||||
|
return say
|
||||||
|
|
||||||
|
|
||||||
|
def reply_content(text, reply_to_event_id):
|
||||||
|
"""Build a plain (non-threaded) reply: shows in the MAIN timeline as a reply to
|
||||||
|
reply_to_event_id, unlike thread_content() which lands the message inside a thread."""
|
||||||
|
content = {"msgtype": "m.text", "body": text}
|
||||||
|
if reply_to_event_id:
|
||||||
|
content["m.relates_to"] = {"m.in_reply_to": {"event_id": reply_to_event_id}}
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def make_reply(client):
|
||||||
|
"""Return an async reply(room_id, text, reply_to) that posts a plain main-timeline reply —
|
||||||
|
the brief 'proposed X — see thread' nudge alongside the in-thread proposal card."""
|
||||||
|
async def reply(room_id, text, reply_to):
|
||||||
|
for chunk in split_message(text):
|
||||||
|
await client.room_send(room_id, "m.room.message", reply_content(chunk, reply_to))
|
||||||
|
return reply
|
||||||
@@ -0,0 +1,187 @@
|
|||||||
|
"""Turn a free-text intake message into a normalized proposal via local Qwen.
|
||||||
|
|
||||||
|
The model only EXTRACTS structure; it never decides to write anything. New-vs-existing is
|
||||||
|
finalized in M2 against the CRM matcher — here `intent` is the model's first read.
|
||||||
|
|
||||||
|
`revise()` is the conversational-edit leg: a free-form correction the human types in the
|
||||||
|
proposal thread (e.g. "add that we met June 14") is applied to the pending proposal via the
|
||||||
|
same local Qwen — no Claude, no scrub. Email integrity is preserved: a changed address must
|
||||||
|
literally appear in the instruction (or the original message); the model can never mint one.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
import spark
|
||||||
|
|
||||||
|
SYSTEM = (
|
||||||
|
"You extract structured investor-intake data from a short message a venture-fund "
|
||||||
|
"team member typed about their fundraising outreach. The message is a note FROM a "
|
||||||
|
"team member ABOUT an investor or prospect they are contacting. Reply with ONLY a JSON "
|
||||||
|
"object, no prose, with these keys:\n"
|
||||||
|
' "intent": "new_investor" if the message introduces a new investor or prospect, '
|
||||||
|
'"meeting_note" if it logs a note/update about an investor, else "unclear".\n'
|
||||||
|
' "investor_name": the investing firm or entity name (e.g. "Acme Capital"), or null.\n'
|
||||||
|
' "contact_name": the individual person mentioned, or null.\n'
|
||||||
|
' "contact_email": the person\'s email if explicitly present, else null. Never invent one.\n'
|
||||||
|
' "contact_title": the person\'s role/title if stated, else null.\n'
|
||||||
|
' "city": the person\'s city or location if stated (e.g. "New York"), else null.\n'
|
||||||
|
' "linkedin_url": the person\'s LinkedIn URL if explicitly present, else null. Never invent one.\n'
|
||||||
|
' "phone": the office/main/direct phone number if present (a line labeled Phone/Tel/Office/'
|
||||||
|
'Direct, or a single unlabeled number); never a fax or a cell. Else null.\n'
|
||||||
|
' "mobile": the cell/mobile number if present (a line labeled Cell/Mobile); never a fax. Else null.\n'
|
||||||
|
' "note": any meeting note, context, or next step, else null.\n'
|
||||||
|
"Use null (not empty string) for anything not present."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Appended when the team roster is known, so the model reads a teammate's name as the person
|
||||||
|
# DOING the outreach, not the investor — fixes "Jonathan is chatting with Wyoming" extracting
|
||||||
|
# the teammate instead of the prospect. Names come from settings.team_roster() (INTAKE_TEAM_ROSTER).
|
||||||
|
ROSTER_FRAME = (
|
||||||
|
"These names and initials (case-insensitive) are our OWN team members — the people doing "
|
||||||
|
"the outreach, NOT investors or prospects. Never extract one as investor_name or "
|
||||||
|
"contact_name: {names}. When a team member is described talking with, meeting, or chasing "
|
||||||
|
'someone (e.g. "Jonathan is chatting with Wyoming"), the OTHER party (here "Wyoming") is '
|
||||||
|
"the investor or prospect to extract."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def build_system(roster=None, base=SYSTEM):
|
||||||
|
"""Assemble the extraction system prompt. With a `roster` (team-member names) it appends
|
||||||
|
the outreach frame so a teammate's name is read as the person doing outreach, not the
|
||||||
|
investor. JSON-only stays the last line for recency. Pure + offline-testable."""
|
||||||
|
parts = [base]
|
||||||
|
if roster:
|
||||||
|
parts.append(ROSTER_FRAME.format(names=", ".join(roster)))
|
||||||
|
parts.append("Output JSON only.")
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")
|
||||||
|
_LINKEDIN_RE = re.compile(r"(?:https?://)?(?:[a-z]{2,3}\.)?linkedin\.com/[A-Za-z0-9_%/\-.]+", re.I)
|
||||||
|
_VALID_INTENTS = {"new_investor", "meeting_note", "unclear"}
|
||||||
|
_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title",
|
||||||
|
"city", "linkedin_url", "phone", "mobile", "note")
|
||||||
|
|
||||||
|
|
||||||
|
def _digits(s):
|
||||||
|
"""Bare digit run of a string (drops spaces/dashes/parens/dots), for phone-integrity checks."""
|
||||||
|
return re.sub(r"\D", "", str(s or ""))
|
||||||
|
|
||||||
|
|
||||||
|
def _clean(v):
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
s = str(v).strip()
|
||||||
|
if not s or s.lower() in ("null", "none", "n/a", "na", "unknown"):
|
||||||
|
return None
|
||||||
|
return s
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(raw, source_text=""):
|
||||||
|
"""Coerce the model's dict into a stable proposal shape; salvage an email from the
|
||||||
|
source text if the model missed one. Returns a dict with all _FIELDS keys."""
|
||||||
|
raw = raw or {}
|
||||||
|
out = {k: _clean(raw.get(k)) for k in _FIELDS}
|
||||||
|
|
||||||
|
intent = (out["intent"] or "").lower().replace("-", "_").replace(" ", "_")
|
||||||
|
out["intent"] = intent if intent in _VALID_INTENTS else "unclear"
|
||||||
|
|
||||||
|
# Email integrity: only accept an address that literally appears in the source message.
|
||||||
|
# The model is unreliable for verbatim strings and must never mint an address — anything
|
||||||
|
# not present in what the human typed is dropped (a wrong email in the CRM is worse than
|
||||||
|
# none). This both salvages a missed address and rejects a hallucinated one.
|
||||||
|
m = _EMAIL_RE.search(source_text or "")
|
||||||
|
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'") if m else None
|
||||||
|
|
||||||
|
# LinkedIn integrity: same rule as email — a profile URL identifies a specific person, so
|
||||||
|
# never let the model mint one; keep only a linkedin.com URL literally present in the source.
|
||||||
|
lm = _LINKEDIN_RE.search(source_text or "")
|
||||||
|
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'") if lm else None
|
||||||
|
|
||||||
|
# Phone integrity: keep a number (in its printed formatting) only if its digit run actually
|
||||||
|
# appears in the source — the model must never mint or "complete" a number. phone = the
|
||||||
|
# office/main line, mobile = the cell; which is which is the model's call (prompted), this
|
||||||
|
# only validates that the number is real. (≥7 digits avoids matching a stray short run.)
|
||||||
|
src_digits = _digits(source_text)
|
||||||
|
for f in ("phone", "mobile"):
|
||||||
|
d = _digits(out.get(f))
|
||||||
|
out[f] = out.get(f) if (len(d) >= 7 and d in src_digits) else None
|
||||||
|
|
||||||
|
# City is left as a plain extracted field (no source gate): a wrong city is low-harm and the
|
||||||
|
# human sees it on the card before approving, unlike a wrong email/LinkedIn.
|
||||||
|
|
||||||
|
# An intake with no firm AND no person is not actionable.
|
||||||
|
if not out["investor_name"] and not out["contact_name"]:
|
||||||
|
out["intent"] = "unclear"
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def parse_message(text, parse_fn=spark.parse_json, roster=None):
|
||||||
|
"""Parse one intake message. `parse_fn` is injectable for tests (defaults to Spark/Qwen);
|
||||||
|
`roster` is the team-member names that frame the extraction (see build_system).
|
||||||
|
Returns a normalized proposal dict. On a model/transport failure, raises (caller decides)."""
|
||||||
|
raw = parse_fn(text, system=build_system(roster), max_tokens=400)
|
||||||
|
proposal = normalize(raw, source_text=text)
|
||||||
|
# Stash the original message so a later revise() can re-check email integrity against it.
|
||||||
|
proposal["_source_text"] = text
|
||||||
|
return proposal
|
||||||
|
|
||||||
|
|
||||||
|
REVISE_SYSTEM = (
|
||||||
|
"You revise a structured investor-intake proposal from a short correction a venture-fund "
|
||||||
|
"team member typed. You are given the CURRENT proposal as JSON and an INSTRUCTION. Apply "
|
||||||
|
"the instruction and reply with ONLY the full revised JSON object, these keys:\n"
|
||||||
|
' "investor_name", "contact_name", "contact_email", "contact_title", "city", '
|
||||||
|
'"linkedin_url", "phone", "mobile", "note".\n'
|
||||||
|
"Change ONLY what the instruction asks; copy every other field through unchanged. Use null "
|
||||||
|
"for a field the instruction clears or that is genuinely absent. Never invent an email "
|
||||||
|
"address, a LinkedIn URL, or a phone number."
|
||||||
|
)
|
||||||
|
|
||||||
|
_REVISABLE = ("investor_name", "contact_name", "contact_title", "city", "note")
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_revision(proposal, model_out, instruction):
|
||||||
|
"""Merge the model's revised fields onto the proposal. Pure + offline-testable.
|
||||||
|
|
||||||
|
Preserves control keys (_match_id / _stage / intent / _source_text). Enforces email
|
||||||
|
integrity: a revised address is taken only if it literally appears in the INSTRUCTION the
|
||||||
|
human typed; otherwise the existing (already integrity-checked) address is kept. The model's
|
||||||
|
own email field is never trusted — it must not mint an address."""
|
||||||
|
model_out = model_out or {}
|
||||||
|
out = dict(proposal)
|
||||||
|
for k in _REVISABLE:
|
||||||
|
if k in model_out:
|
||||||
|
out[k] = _clean(model_out.get(k))
|
||||||
|
m = _EMAIL_RE.search(instruction or "")
|
||||||
|
if m:
|
||||||
|
out["contact_email"] = m.group(0).rstrip(".,;:!?)]}>\"'")
|
||||||
|
# else: keep proposal's current contact_email (untouched above; control key copied by dict())
|
||||||
|
# LinkedIn follows the same rule: a revised URL is taken only if it appears in the instruction.
|
||||||
|
lm = _LINKEDIN_RE.search(instruction or "")
|
||||||
|
if lm:
|
||||||
|
out["linkedin_url"] = lm.group(0).rstrip(".,;:!?)]}>\"'")
|
||||||
|
# Phone/mobile too: a revised number is accepted only if its digits appear in the instruction
|
||||||
|
# (never let the model mint one); otherwise the existing value is kept.
|
||||||
|
instr_digits = _digits(instruction)
|
||||||
|
for f in ("phone", "mobile"):
|
||||||
|
if f in model_out:
|
||||||
|
cand = _clean(model_out.get(f))
|
||||||
|
d = _digits(cand)
|
||||||
|
out[f] = cand if (cand and len(d) >= 7 and d in instr_digits) else out.get(f)
|
||||||
|
# Don't let a revision strip the proposal down to nothing actionable.
|
||||||
|
if not out.get("investor_name") and not out.get("contact_name"):
|
||||||
|
out["investor_name"] = proposal.get("investor_name")
|
||||||
|
out["contact_name"] = proposal.get("contact_name")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def revise(proposal, instruction, parse_fn=spark.parse_json, roster=None):
|
||||||
|
"""Apply a natural-language correction to a pending proposal via local Qwen; return the
|
||||||
|
revised proposal dict. `parse_fn` is injectable for tests (defaults to Spark/Qwen);
|
||||||
|
`roster` frames the revision the same way parse_message does (see build_system)."""
|
||||||
|
current = {k: proposal.get(k) for k in
|
||||||
|
("investor_name", "contact_name", "contact_email", "contact_title", "note")}
|
||||||
|
prompt = ("CURRENT:\n" + json.dumps(current, ensure_ascii=False)
|
||||||
|
+ "\n\nINSTRUCTION:\n" + (instruction or "").strip())
|
||||||
|
raw = parse_fn(prompt, system=build_system(roster, base=REVISE_SYSTEM), max_tokens=400)
|
||||||
|
return _apply_revision(proposal, raw, instruction)
|
||||||
@@ -0,0 +1,202 @@
|
|||||||
|
"""Pending-proposal store + the in-thread approval state machine.
|
||||||
|
|
||||||
|
The one piece of state in the bot: a proposal awaiting a human's yes/edit/no, keyed by the
|
||||||
|
Matrix thread root (the bot's proposal lives in a thread rooted at the user's message, and
|
||||||
|
the user replies inside that thread). In-memory and ephemeral by design — a restart drops
|
||||||
|
pending proposals (the user just re-sends), matching matrix-bridge's stateless-by-default
|
||||||
|
ethos. Nothing here writes to the CRM; the bot calls the CRM client only after `approve`.
|
||||||
|
|
||||||
|
A proposal carries a `_stage`: "approval" (the normal yes/edit/no card) or "disambiguate"
|
||||||
|
(a fuzzy-match shortlist the human must resolve — pick a number / "new" / "no" — before it
|
||||||
|
becomes an approval-stage proposal). The shortlist itself rides on `_candidates`.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# field aliases accepted in `edit <field>=<value>`
|
||||||
|
_EDIT_ALIASES = {
|
||||||
|
"name": "investor_name", "investor": "investor_name", "firm": "investor_name", "org": "investor_name",
|
||||||
|
"contact": "contact_name", "person": "contact_name",
|
||||||
|
"email": "contact_email",
|
||||||
|
"title": "contact_title", "role": "contact_title",
|
||||||
|
"city": "city", "location": "city",
|
||||||
|
"linkedin": "linkedin_url", "linkedin_url": "linkedin_url", "li": "linkedin_url",
|
||||||
|
"phone": "phone", "tel": "phone", "office": "phone",
|
||||||
|
"mobile": "mobile", "cell": "mobile",
|
||||||
|
"note": "note",
|
||||||
|
}
|
||||||
|
|
||||||
|
_YES = {"yes", "y", "approve", "approved", "ok", "confirm", "go", "👍", "✅"}
|
||||||
|
_NO = {"no", "n", "cancel", "discard", "reject", "stop", "👎", "❌"}
|
||||||
|
# "create a new investor anyway" replies to a disambiguation shortlist
|
||||||
|
_NEW = {"new", "none", "new investor", "none of these", "create", "create new", "add new", "neither"}
|
||||||
|
|
||||||
|
_CONTENT_FIELDS = ("intent", "investor_name", "contact_name", "contact_email", "contact_title",
|
||||||
|
"city", "linkedin_url", "phone", "mobile", "note")
|
||||||
|
|
||||||
|
|
||||||
|
class ProposalStore:
|
||||||
|
def __init__(self):
|
||||||
|
self._pending = {} # thread_root -> proposal dict
|
||||||
|
|
||||||
|
def put(self, thread_root, proposal):
|
||||||
|
self._pending[thread_root] = proposal
|
||||||
|
|
||||||
|
def get(self, thread_root):
|
||||||
|
return self._pending.get(thread_root)
|
||||||
|
|
||||||
|
def pop(self, thread_root):
|
||||||
|
return self._pending.pop(thread_root, None)
|
||||||
|
|
||||||
|
def has(self, thread_root):
|
||||||
|
return thread_root in self._pending
|
||||||
|
|
||||||
|
def any_pending(self):
|
||||||
|
return bool(self._pending)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_edit(text):
|
||||||
|
"""Parse 'edit field=value' (also 'field: value'); return (canonical_field, value) or None."""
|
||||||
|
body = text.strip()
|
||||||
|
if body.lower().startswith("edit "):
|
||||||
|
body = body[5:].strip()
|
||||||
|
for sep in ("=", ":"):
|
||||||
|
if sep in body:
|
||||||
|
field, value = body.split(sep, 1)
|
||||||
|
field = field.strip().lower()
|
||||||
|
canon = _EDIT_ALIASES.get(field)
|
||||||
|
value = value.strip()
|
||||||
|
if canon and value:
|
||||||
|
return canon, value
|
||||||
|
# Not a known field on this separator — try the next one rather than bail,
|
||||||
|
# so e.g. "note: see deck=v2" still parses (split on ':' not the inner '=').
|
||||||
|
continue
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def interpret_reply(text):
|
||||||
|
"""Classify a threaded reply to a pending proposal.
|
||||||
|
|
||||||
|
Returns one of:
|
||||||
|
("approve", None) | ("reject", None) | ("edit", (field, value)) | ("unknown", None)
|
||||||
|
"""
|
||||||
|
t = (text or "").strip()
|
||||||
|
low = t.lower()
|
||||||
|
if low in _YES:
|
||||||
|
return ("approve", None)
|
||||||
|
if low in _NO:
|
||||||
|
return ("reject", None)
|
||||||
|
edit = _parse_edit(t)
|
||||||
|
if edit:
|
||||||
|
return ("edit", edit)
|
||||||
|
return ("unknown", None)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_edit(proposal, field, value):
|
||||||
|
"""Return a copy of the proposal with one field changed."""
|
||||||
|
updated = dict(proposal)
|
||||||
|
updated[field] = value
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def same_fields(a, b):
|
||||||
|
"""True if two proposals carry identical content (used to detect a no-op NL revision so we
|
||||||
|
don't tell the human 'Updated' when nothing changed)."""
|
||||||
|
return all((a or {}).get(k) == (b or {}).get(k) for k in _CONTENT_FIELDS)
|
||||||
|
|
||||||
|
|
||||||
|
def interpret_disambiguation(text, n_candidates):
|
||||||
|
"""Classify a reply to a fuzzy-match shortlist.
|
||||||
|
|
||||||
|
Returns ("pick", index) | ("new", None) | ("reject", None) | ("unknown", None). A bare
|
||||||
|
number selects that candidate; "new"/"none" creates a new investor; "no"/"cancel" discards."""
|
||||||
|
t = (text or "").strip().lower()
|
||||||
|
if not t:
|
||||||
|
return ("unknown", None)
|
||||||
|
if t in _NO:
|
||||||
|
return ("reject", None)
|
||||||
|
if t in _NEW:
|
||||||
|
return ("new", None)
|
||||||
|
m = re.fullmatch(r"#?\s*(\d{1,2})", t)
|
||||||
|
if m:
|
||||||
|
idx = int(m.group(1)) - 1
|
||||||
|
if 0 <= idx < n_candidates:
|
||||||
|
return ("pick", idx)
|
||||||
|
return ("unknown", None)
|
||||||
|
|
||||||
|
|
||||||
|
def attach_to_candidate(proposal, candidate):
|
||||||
|
"""Promote a disambiguation pick into an approval-stage meeting note on the chosen investor.
|
||||||
|
The note will target that existing grid row (via _match_id); the firm name is shown for
|
||||||
|
accuracy. Drops the shortlist."""
|
||||||
|
updated = dict(proposal)
|
||||||
|
updated.pop("_candidates", None)
|
||||||
|
updated["_stage"] = "approval"
|
||||||
|
updated["_match_id"] = candidate["id"]
|
||||||
|
updated["intent"] = "meeting_note"
|
||||||
|
if candidate.get("name"):
|
||||||
|
updated["investor_name"] = candidate["name"]
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def promote_to_new(proposal):
|
||||||
|
"""Disambiguation 'new' — discard the shortlist and proceed as a new-investor proposal."""
|
||||||
|
updated = dict(proposal)
|
||||||
|
updated.pop("_candidates", None)
|
||||||
|
updated.pop("_match_id", None)
|
||||||
|
updated["_stage"] = "approval"
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def render_disambiguation(proposal):
|
||||||
|
"""Render the fuzzy-match shortlist a human resolves before we create a new investor."""
|
||||||
|
name = proposal.get("investor_name") or proposal.get("contact_name") or "?"
|
||||||
|
cands = proposal.get("_candidates") or []
|
||||||
|
lines = [f"🔎 Before adding **{name}** as new — these existing investors look similar:"]
|
||||||
|
for i, c in enumerate(cands, 1):
|
||||||
|
lines.append(f" **{i}.** {c.get('name') or '?'}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("Reply a **number** to log this against that investor, **new** to add it as a "
|
||||||
|
"new investor, or **no** to discard.")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def disambiguation_nudge(proposal):
|
||||||
|
"""Brief main-timeline pointer for a disambiguation proposal (the shortlist is in the thread)."""
|
||||||
|
name = proposal.get("investor_name") or proposal.get("contact_name") or "?"
|
||||||
|
return (f"🔎 **{name}** may match an existing investor — open the **thread** to pick one "
|
||||||
|
"or confirm it's new.")
|
||||||
|
|
||||||
|
|
||||||
|
def render(proposal):
|
||||||
|
"""Render a proposal as the in-thread message a human approves."""
|
||||||
|
if proposal.get("intent") == "meeting_note":
|
||||||
|
head = f"📝 Proposed **meeting note** for **{proposal.get('investor_name') or proposal.get('contact_name') or '?'}**"
|
||||||
|
else:
|
||||||
|
head = f"📇 Proposed **new investor**: **{proposal.get('investor_name') or proposal.get('contact_name') or '?'}**"
|
||||||
|
lines = [head]
|
||||||
|
fields = [
|
||||||
|
("Investor", proposal.get("investor_name")),
|
||||||
|
("Contact", proposal.get("contact_name")),
|
||||||
|
("Email", proposal.get("contact_email")),
|
||||||
|
("Title", proposal.get("contact_title")),
|
||||||
|
("Phone", proposal.get("phone")),
|
||||||
|
("Mobile", proposal.get("mobile")),
|
||||||
|
("City", proposal.get("city")),
|
||||||
|
("LinkedIn", proposal.get("linkedin_url")),
|
||||||
|
("Note", proposal.get("note")),
|
||||||
|
]
|
||||||
|
for label, val in fields:
|
||||||
|
if val:
|
||||||
|
lines.append(f"· {label}: {val}")
|
||||||
|
lines.append("")
|
||||||
|
lines.append("Reply **yes** to commit, **no** to discard, or just tell me what to change in "
|
||||||
|
"plain words (e.g. \"change the email to a@b.com\", \"the firm is Acme Capital\").")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def summary_line(proposal):
|
||||||
|
"""A brief one-liner for the main-timeline nudge; the full card lives in the thread."""
|
||||||
|
name = proposal.get("investor_name") or proposal.get("contact_name") or "?"
|
||||||
|
if proposal.get("intent") == "meeting_note":
|
||||||
|
return f"📝 Proposed a meeting note for **{name}** — see the thread to review & approve."
|
||||||
|
return f"📇 Proposed a new investor: **{name}** — see the thread to review & approve."
|
||||||
@@ -0,0 +1,189 @@
|
|||||||
|
"""NL-query Matrix surface (W2 step 5) — turn an '@bot <question>' message into a read-only
|
||||||
|
answer from the CRM's curated NL-query endpoint, and render that answer for the chat room.
|
||||||
|
|
||||||
|
This module is PURE (no network, no matrix-nio) so it's unit-testable offline; the async wiring
|
||||||
|
(call the endpoint, post in a thread) lives in bot.py. The endpoint does the real work:
|
||||||
|
translation runs on the box's LOCAL model (nothing leaves the box) and only the curated,
|
||||||
|
parameterized queries can run — there is no write path here, so no approval gate applies.
|
||||||
|
|
||||||
|
Trigger: a top-level message starting with '?' / '@bot' / '/ask' (see parse_trigger). We
|
||||||
|
deliberately do NOT accept a bare leading 'ask', which would collide with intake notes like
|
||||||
|
"Ask Jane to send the Q3 deck".
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Markers a human wouldn't start an intake note with. '?' is handled separately (single char).
|
||||||
|
QUERY_PREFIXES = ("@bot", "/ask", "/query", "/q")
|
||||||
|
|
||||||
|
# Soft cap on rows rendered into a single chat answer. The endpoint already caps the SQL result
|
||||||
|
# (server MAX_ROWS), but 500 rows is unreadable on mobile — show the first N and say how many
|
||||||
|
# more there are (never a silent cut). Refine the question or use the web Ask box for the rest.
|
||||||
|
MAX_DISPLAY_ROWS = 30
|
||||||
|
|
||||||
|
# Column-name hints used only for nicer formatting (money / dates). Cosmetic — never affects
|
||||||
|
# what's queried (that's fixed in intents.py).
|
||||||
|
_MONEY_HINTS = ("amount", "invested", "total", "expected", "committed")
|
||||||
|
# 0/1 flag columns: suppress when 0 (noise), show a label when 1.
|
||||||
|
_FLAG_LABELS = {"graveyard": "retired", "overdue": "⚠️ overdue"}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_trigger(text):
|
||||||
|
"""If `text` is addressed to the query bot, return the question (the remainder after the
|
||||||
|
trigger, possibly an empty string when the trigger is bare). Return None if it isn't a query,
|
||||||
|
so the caller routes it to intake instead."""
|
||||||
|
s = (text or "").strip()
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
if s[0] == "?":
|
||||||
|
return s[1:].strip()
|
||||||
|
low = s.lower()
|
||||||
|
for p in QUERY_PREFIXES:
|
||||||
|
if low.startswith(p):
|
||||||
|
rest = s[len(p):]
|
||||||
|
# Require a separator so '/asking …' isn't read as the '/ask' trigger.
|
||||||
|
if rest == "" or rest[0] in " \t\n:,":
|
||||||
|
return rest.lstrip(" \t\n:,").strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _examples():
|
||||||
|
return ("Try things like:\n"
|
||||||
|
"• `?which investors haven't we contacted in 90 days?`\n"
|
||||||
|
"• `?top 10 investors by committed capital`\n"
|
||||||
|
"• `?when did we last reach out to Acme Capital?`\n"
|
||||||
|
"• `?how many emails has Grant sent this month?`")
|
||||||
|
|
||||||
|
|
||||||
|
HELP = ("💬 Ask me about the fundraising database — start your message with `?` (or `@bot`).\n\n"
|
||||||
|
+ _examples())
|
||||||
|
|
||||||
|
|
||||||
|
def _is_money_col(name):
|
||||||
|
n = name.lower()
|
||||||
|
return any(h in n for h in _MONEY_HINTS)
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_value(col, val):
|
||||||
|
"""Format one scalar cell for chat: dates -> YYYY-MM-DD, money columns -> $1,234, else str."""
|
||||||
|
if val is None:
|
||||||
|
return ""
|
||||||
|
name = col.lower()
|
||||||
|
if name.endswith("_at") or name.endswith("date"):
|
||||||
|
return str(val)[:10]
|
||||||
|
if isinstance(val, (int, float)) and _is_money_col(col):
|
||||||
|
return f"${val:,.0f}"
|
||||||
|
return str(val)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_contacts(contacts):
|
||||||
|
"""investor_lookup's nested contact dicts -> 'Name <email> (title · city, state)' lines."""
|
||||||
|
out = []
|
||||||
|
for c in contacts:
|
||||||
|
bits = c.get("full_name") or "?"
|
||||||
|
if c.get("email"):
|
||||||
|
bits += f" <{c['email']}>"
|
||||||
|
loc = ", ".join(x for x in (c.get("city"), c.get("state"), c.get("country")) if x)
|
||||||
|
extra = " · ".join(x for x in (c.get("title"), loc) if x)
|
||||||
|
if extra:
|
||||||
|
bits += f" ({extra})"
|
||||||
|
out.append(bits)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _render_commitments(commitments):
|
||||||
|
"""investor_lookup's nested per-fund commitments -> 'Fund: $amount' lines."""
|
||||||
|
out = []
|
||||||
|
for c in commitments:
|
||||||
|
fund = c.get("fund_name") or "?"
|
||||||
|
amt = c.get("amount")
|
||||||
|
out.append(f"{fund}: ${amt:,.0f}" if isinstance(amt, (int, float)) else f"{fund}: {amt}")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _render_row(i, row, columns):
|
||||||
|
cols = columns or list(row.keys())
|
||||||
|
lead = None
|
||||||
|
scalars = []
|
||||||
|
sublines = []
|
||||||
|
for col in cols:
|
||||||
|
val = row.get(col)
|
||||||
|
if isinstance(val, list):
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
if col == "contacts":
|
||||||
|
sublines += [f" – {x}" for x in _render_contacts(val)]
|
||||||
|
elif col == "commitments":
|
||||||
|
sublines += [f" – {x}" for x in _render_commitments(val)]
|
||||||
|
else: # generic list-of-dicts fallback (no intent uses this yet)
|
||||||
|
sublines += [f" – {', '.join(f'{k}={v}' for k, v in d.items())}"
|
||||||
|
for d in val if isinstance(d, dict)]
|
||||||
|
continue
|
||||||
|
if col in _FLAG_LABELS:
|
||||||
|
if val:
|
||||||
|
scalars.append(_FLAG_LABELS[col])
|
||||||
|
continue
|
||||||
|
s = _fmt_value(col, val)
|
||||||
|
if s == "":
|
||||||
|
continue
|
||||||
|
if lead is None: # first non-empty column is the bold identifier for the row
|
||||||
|
lead = s
|
||||||
|
else:
|
||||||
|
scalars.append(f"{col}: {s}")
|
||||||
|
head = f"{i}. **{lead}**" if lead else f"{i}."
|
||||||
|
if scalars:
|
||||||
|
head += " — " + " · ".join(scalars)
|
||||||
|
return "\n".join([head] + sublines)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_interpretation(intent, slots):
|
||||||
|
if not intent:
|
||||||
|
return ""
|
||||||
|
if slots:
|
||||||
|
return f"read as: {intent} ({', '.join(f'{k}={v}' for k, v in slots.items())})"
|
||||||
|
return f"read as: {intent}"
|
||||||
|
|
||||||
|
|
||||||
|
def _render_error(err, result):
|
||||||
|
detail = (result.get("detail") or "").strip()
|
||||||
|
if err == "no_match":
|
||||||
|
return "🤷 I couldn't map that to one of my saved queries.\n\n" + _examples()
|
||||||
|
if err == "model_unavailable":
|
||||||
|
return "⚠️ The local query model is unreachable right now — try again in a moment."
|
||||||
|
if err == "query_failed":
|
||||||
|
return f"⚠️ That query failed to run{(': ' + detail) if detail else ''}."
|
||||||
|
# unknown_intent / bad_slot / anything unexpected
|
||||||
|
return (f"⚠️ I couldn't run that ({err}){(': ' + detail) if detail else ''}.\n\n" + _examples())
|
||||||
|
|
||||||
|
|
||||||
|
def render_answer(result):
|
||||||
|
"""Render the NL-query endpoint's structured result into a Matrix markdown answer.
|
||||||
|
|
||||||
|
`result` is the endpoint body: a hit {intent, slots, columns, rows, summary, truncated} or
|
||||||
|
an error {error, detail}. Results never go back to any model — this is a deterministic format."""
|
||||||
|
result = result or {}
|
||||||
|
err = result.get("error")
|
||||||
|
if err:
|
||||||
|
return _render_error(err, result)
|
||||||
|
|
||||||
|
summary = (result.get("summary") or "").strip()
|
||||||
|
rows = result.get("rows") or []
|
||||||
|
columns = result.get("columns") or []
|
||||||
|
header = f"📊 {summary}" if summary else "📊 Done."
|
||||||
|
interp = _render_interpretation(result.get("intent"), result.get("slots") or {})
|
||||||
|
if interp:
|
||||||
|
header += f"\n_{interp}_"
|
||||||
|
if not rows:
|
||||||
|
return header + "\n\n(no matching records)"
|
||||||
|
|
||||||
|
shown = rows[:MAX_DISPLAY_ROWS]
|
||||||
|
blocks = [_render_row(i + 1, r, columns) for i, r in enumerate(shown)]
|
||||||
|
out = header + "\n\n" + "\n".join(blocks)
|
||||||
|
|
||||||
|
notes = []
|
||||||
|
extra = len(rows) - len(shown)
|
||||||
|
if extra > 0:
|
||||||
|
notes.append(f"+{extra} more not shown")
|
||||||
|
if result.get("truncated"):
|
||||||
|
notes.append("results hit the server cap")
|
||||||
|
if notes:
|
||||||
|
out += "\n\n_" + "; ".join(notes) + " — refine your question or use the web Ask box._"
|
||||||
|
return out
|
||||||
@@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""One-time maintenance: clear the intake room's backlog of resolved/stale messages.
|
||||||
|
|
||||||
|
Going forward the bot redacts each intake thread when it's approved/rejected (bot card + ack +
|
||||||
|
nudge + the user's own note/photo). This clears the messages that piled up BEFORE that shipped.
|
||||||
|
|
||||||
|
The intake room is single-purpose and the bot keeps **no durable pending state** (its proposal
|
||||||
|
store is in-memory and is lost on every restart), so nothing in the room is "still live" after a
|
||||||
|
restart — every message in it is safe to redact. This walks the room history and redacts every
|
||||||
|
m.room.message event (text + business-card images), bot's and humans' alike.
|
||||||
|
|
||||||
|
Redacting another user's message (the humans' notes/photos) needs the bot to hold a **redact /
|
||||||
|
moderator power level** in the intake room — without it those just no-op and linger (the bot's own
|
||||||
|
messages still clear). Make the bot a moderator of the intake room in Element first.
|
||||||
|
|
||||||
|
Safe by default: prints what it WOULD redact and does nothing. Pass --apply to actually redact.
|
||||||
|
Run on the Spark via the bot's own creds/image:
|
||||||
|
docker compose run --rm matrix-intake python -u backend/matrix_intake/redact_intake.py
|
||||||
|
docker compose run --rm matrix-intake python -u backend/matrix_intake/redact_intake.py --apply
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from nio import AsyncClient, MessageDirection
|
||||||
|
|
||||||
|
import settings
|
||||||
|
|
||||||
|
MAX_PAGES = 50 # 50 * 100 events is far more history than this room holds
|
||||||
|
|
||||||
|
|
||||||
|
async def main(apply):
|
||||||
|
mx = settings.matrix_settings()
|
||||||
|
intake_room = mx.get("intake_room")
|
||||||
|
if not intake_room:
|
||||||
|
print("MATRIX_INTAKE_ROOM is not set — nothing to do.")
|
||||||
|
return
|
||||||
|
client = AsyncClient(mx["homeserver"], mx["user_id"])
|
||||||
|
client.restore_login(user_id=mx["user_id"], device_id=mx["device_id"], access_token=mx["token"])
|
||||||
|
try:
|
||||||
|
sync = await client.sync(timeout=10000, full_state=False)
|
||||||
|
token = sync.next_batch
|
||||||
|
targets = [] # (event_id, label)
|
||||||
|
seen = set()
|
||||||
|
for _ in range(MAX_PAGES):
|
||||||
|
resp = await client.room_messages(intake_room, start=token,
|
||||||
|
direction=MessageDirection.back, limit=100)
|
||||||
|
chunk = getattr(resp, "chunk", None)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
for ev in chunk:
|
||||||
|
src = getattr(ev, "source", None) or {}
|
||||||
|
if src.get("type") != "m.room.message":
|
||||||
|
continue # only chat messages + images; leave membership/state events alone
|
||||||
|
eid = getattr(ev, "event_id", None)
|
||||||
|
if not eid or eid in seen:
|
||||||
|
continue
|
||||||
|
seen.add(eid)
|
||||||
|
content = src.get("content") or {}
|
||||||
|
if not content:
|
||||||
|
continue # already redacted (content stripped) — skip
|
||||||
|
msgtype = content.get("msgtype") or "?"
|
||||||
|
body = (content.get("body", "") or "").replace("\n", " ")
|
||||||
|
who = "bot " if getattr(ev, "sender", None) == mx["user_id"] else "user"
|
||||||
|
targets.append((eid, f"{who} [{msgtype}] {body[:60]}"))
|
||||||
|
token = getattr(resp, "end", None)
|
||||||
|
if not token:
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"messages to clear in the intake room: {len(targets)}")
|
||||||
|
fails = 0
|
||||||
|
for eid, label in targets:
|
||||||
|
print(("APPLY redact " if apply else "WOULD redact ") + eid + " :: " + label)
|
||||||
|
if apply:
|
||||||
|
r = await client.room_redact(intake_room, eid, reason="retroactive intake-room cleanup")
|
||||||
|
if not hasattr(r, "event_id"):
|
||||||
|
fails += 1
|
||||||
|
print(f" ! redact failed (need mod power for others' messages?): {r}")
|
||||||
|
print(("done — redacted " if apply else "dry run — would redact ")
|
||||||
|
+ f"{len(targets) - (fails if apply else 0)}/{len(targets)} event(s)"
|
||||||
|
+ (f"; {fails} failed" if apply and fails else "") + ".")
|
||||||
|
finally:
|
||||||
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main(apply="--apply" in sys.argv[1:]))
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""One-time maintenance: redact already-resolved email-proposal review cards.
|
||||||
|
|
||||||
|
The bot redacts a card when it's decided going forward, but cards that were decided BEFORE that
|
||||||
|
behavior shipped (e.g. smoke-test remnants) are already `closed` in the CRM, so the normal
|
||||||
|
to_close sweep never touches them. This walks the review room's history, finds the bot's own
|
||||||
|
"proposed grid note" cards, and redacts every one that is NOT still pending (i.e. not in the CRM
|
||||||
|
`open` work-list) — leaving the room showing only what still needs handling.
|
||||||
|
|
||||||
|
Safe by default: prints what it WOULD redact and does nothing. Pass --apply to actually redact.
|
||||||
|
Run on the Spark via the bot's own creds/image:
|
||||||
|
docker compose run --rm matrix-intake python -u backend/matrix_intake/redact_resolved.py
|
||||||
|
docker compose run --rm matrix-intake python -u backend/matrix_intake/redact_resolved.py --apply
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from nio import AsyncClient, MessageDirection
|
||||||
|
|
||||||
|
import crm_client
|
||||||
|
import settings
|
||||||
|
|
||||||
|
CARD_MARKER = "📧 Proposed" # present in every review card (old and dash-framed)
|
||||||
|
MAX_PAGES = 30 # 30 * 100 events is far more history than this room holds
|
||||||
|
|
||||||
|
|
||||||
|
async def main(apply):
|
||||||
|
mx = settings.matrix_settings()
|
||||||
|
review_room = settings.email_review_room()
|
||||||
|
if not review_room:
|
||||||
|
print("MATRIX_EMAIL_REVIEW_ROOM is not set — nothing to do.")
|
||||||
|
return
|
||||||
|
client = AsyncClient(mx["homeserver"], mx["user_id"])
|
||||||
|
client.restore_login(user_id=mx["user_id"], device_id=mx["device_id"], access_token=mx["token"])
|
||||||
|
try:
|
||||||
|
# Cards still pending (must be KEPT) — their thread-root event id is the card event id.
|
||||||
|
open_ids = {it["event_id"] for it in crm_client.list_email_proposals().get("open", []) if it.get("event_id")}
|
||||||
|
print(f"pending cards to keep: {len(open_ids)}")
|
||||||
|
|
||||||
|
sync = await client.sync(timeout=10000, full_state=False)
|
||||||
|
token = sync.next_batch
|
||||||
|
cards = {} # root event_id -> snippet (still-identifiable card bodies)
|
||||||
|
replies = {} # reply event_id -> (thread_root, snippet)
|
||||||
|
for _ in range(MAX_PAGES):
|
||||||
|
resp = await client.room_messages(review_room, start=token,
|
||||||
|
direction=MessageDirection.back, limit=100)
|
||||||
|
chunk = getattr(resp, "chunk", None)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
for ev in chunk:
|
||||||
|
body = (getattr(ev, "body", "") or "").replace("\n", " ")
|
||||||
|
rel = ((getattr(ev, "source", None) or {}).get("content", {}) or {}).get("m.relates_to") or {}
|
||||||
|
if rel.get("rel_type") == "m.thread" and rel.get("event_id"):
|
||||||
|
replies[ev.event_id] = (rel["event_id"], body[:50]) # a threaded reply (card already redacted)
|
||||||
|
elif getattr(ev, "sender", None) == mx["user_id"] and CARD_MARKER in body:
|
||||||
|
cards[ev.event_id] = body[:70] # an un-redacted card root
|
||||||
|
token = getattr(resp, "end", None)
|
||||||
|
if not token:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Redact card roots that aren't still pending, AND any reply whose thread isn't still pending.
|
||||||
|
targets = [(eid, "card :: " + snip) for eid, snip in cards.items() if eid not in open_ids]
|
||||||
|
targets += [(eid, "reply :: " + snip) for eid, (root, snip) in replies.items() if root not in open_ids]
|
||||||
|
print(f"resolved cards: {sum(1 for e,_ in cards.items() if e not in open_ids)}; "
|
||||||
|
f"thread replies to clear: {sum(1 for _,(r,_) in replies.items() if r not in open_ids)}")
|
||||||
|
for eid, label in targets:
|
||||||
|
print(("APPLY redact " if apply else "WOULD redact ") + eid + " :: " + label)
|
||||||
|
if apply:
|
||||||
|
r = await client.room_redact(review_room, eid, reason="retroactive cleanup of resolved review threads")
|
||||||
|
if not hasattr(r, "event_id"):
|
||||||
|
print(f" ! redact failed: {r}")
|
||||||
|
print(("done — redacted " if apply else "dry run — would redact ") + f"{len(targets)} event(s).")
|
||||||
|
finally:
|
||||||
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main(apply="--apply" in sys.argv[1:]))
|
||||||
@@ -0,0 +1,4 @@
|
|||||||
|
# Matrix intake bot — isolated to this component's own process. matrix-nio is the ONLY
|
||||||
|
# third-party runtime dep and MUST NOT be added to the stdlib CRM (backend/server.py).
|
||||||
|
# The Spark/Qwen + CRM-API calls reuse the repo's stdlib HTTP client (backend/ingest/http_util).
|
||||||
|
matrix-nio>=0.24
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
"""Config for the Matrix intake bot — Matrix creds + the dedicated intake room.
|
||||||
|
|
||||||
|
Spark settings (SPARK_CONTROL_URL, CHAT_MODEL, …) are NOT read here; they come from the
|
||||||
|
reused ingest client (see spark.py), which loads the same repo .env. This module only owns
|
||||||
|
the Matrix connection and the CRM API target for the write-back leg (M2).
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
|
||||||
|
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
def load_env(path=None):
|
||||||
|
"""Populate os.environ from the repo .env (setdefault — never clobber a real env var)."""
|
||||||
|
path = path or os.path.join(REPO_ROOT, ".env")
|
||||||
|
if not os.path.exists(path):
|
||||||
|
return
|
||||||
|
with open(path, "r", encoding="utf-8") as fh:
|
||||||
|
for line in fh:
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#") or "=" not in line:
|
||||||
|
continue
|
||||||
|
k, v = line.split("=", 1)
|
||||||
|
os.environ.setdefault(k.strip(), v.strip())
|
||||||
|
|
||||||
|
|
||||||
|
load_env()
|
||||||
|
|
||||||
|
|
||||||
|
def _require(name):
|
||||||
|
val = os.environ.get(name, "").strip()
|
||||||
|
if not val:
|
||||||
|
raise RuntimeError(f"matrix_intake: required env var {name} is not set (see .env.example)")
|
||||||
|
return val
|
||||||
|
|
||||||
|
|
||||||
|
# Matrix connection (resolved lazily so importing this module for tests never requires creds).
|
||||||
|
def matrix_settings():
|
||||||
|
return {
|
||||||
|
"homeserver": _require("MATRIX_HOMESERVER"),
|
||||||
|
"user_id": _require("MATRIX_USER"),
|
||||||
|
"token": _require("MATRIX_ACCESS_TOKEN"),
|
||||||
|
"device_id": os.environ.get("MATRIX_DEVICE_ID", "ten31-intake-bot"),
|
||||||
|
"intake_room": _require("MATRIX_INTAKE_ROOM"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# CRM API target for the write-back leg (M2). The CRM has no service-key auth path — auth is
|
||||||
|
# Bearer-JWT via /api/auth/login — so the bot logs in as a DEDICATED service user (a normal
|
||||||
|
# CRM user, created by an admin) and reuses the existing auth. Creds live in .env, never code.
|
||||||
|
def crm_settings():
|
||||||
|
return {
|
||||||
|
"base": os.environ.get("CRM_API_BASE", "http://127.0.0.1:8080").rstrip("/"),
|
||||||
|
"username": os.environ.get("CRM_BOT_USERNAME", "").strip(),
|
||||||
|
"password": os.environ.get("CRM_BOT_PASSWORD", ""),
|
||||||
|
"verify_tls": os.environ.get("CRM_API_VERIFY_TLS", "true").lower() in ("1", "true", "yes", "on"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Team-member names (comma-separated in INTAKE_TEAM_ROSTER), fed to the parser so a teammate's
|
||||||
|
# name reads as the person DOING outreach, not the investor (see parse.build_system). Optional —
|
||||||
|
# unset/empty just means no roster framing, i.e. the prior behavior.
|
||||||
|
def team_roster():
|
||||||
|
return [n.strip() for n in os.environ.get("INTAKE_TEAM_ROSTER", "").split(",") if n.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
# Dedicated room for reviewing CRM-drafted email-activity proposals (the CRM→Matrix push leg).
|
||||||
|
# Separate from the intake room so high-volume email proposals don't drown the conversational
|
||||||
|
# intake flow. Unset/empty disables the whole email-review poll loop (the bot just does intake).
|
||||||
|
def email_review_room():
|
||||||
|
return os.environ.get("MATRIX_EMAIL_REVIEW_ROOM", "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
# Dedicated Q&A room for read-only natural-language queries (W2). In this room EVERY top-level
|
||||||
|
# message is treated as a question — no '?'/'@bot' trigger needed (the trigger only exists to
|
||||||
|
# disambiguate question-vs-note when Q&A shares the intake room; here that's unnecessary). The
|
||||||
|
# '?'/'@bot' trigger still works in the intake room too, as a cross-room convenience. Unset/empty
|
||||||
|
# just means no dedicated room (questions then go through the intake-room trigger). The bot must be
|
||||||
|
# a member of this room. Read-only — no approval gate, no redaction, no special power level needed.
|
||||||
|
def query_room():
|
||||||
|
return os.environ.get("MATRIX_QUERY_ROOM", "").strip()
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
"""Thin reuse of the in-repo local-Qwen client (backend/ingest/llm.py) via Spark Control.
|
||||||
|
|
||||||
|
We import the ingest client rather than re-implementing the HTTP call so the intake bot
|
||||||
|
speaks the exact same Spark contract (model, /v1/chat/completions, TLS verify, .env load).
|
||||||
|
The intake message is real LP substance, but it goes ONLY to the local Qwen on Ten31 infra
|
||||||
|
— never Claude — so no scrub boundary applies (same basis as the daily digest). Never call a
|
||||||
|
Spark directly; everything goes through SPARK_CONTROL_URL.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
_INGEST = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest")
|
||||||
|
if _INGEST not in sys.path:
|
||||||
|
sys.path.insert(0, _INGEST)
|
||||||
|
|
||||||
|
import llm # noqa: E402 (backend/ingest/llm.py — chat / chat_json over Spark Control)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_json(prompt, system=None, max_tokens=400):
|
||||||
|
"""Send to local Qwen (temp 0, thinking off) and parse the first JSON object, or None."""
|
||||||
|
return llm.chat_json(prompt, system=system, max_tokens=max_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
# The vision model only TRANSCRIBES the card; the existing text-parse flow then extracts the
|
||||||
|
# structured proposal from that transcription. Keeping the two steps separate (vs. asking the
|
||||||
|
# vision model for JSON directly) is deliberate: the transcription becomes the source text the
|
||||||
|
# email-integrity check runs against, so the "only keep an address that literally appears in the
|
||||||
|
# source, never let the model mint one" rule (parse.normalize) protects card intake too.
|
||||||
|
CARD_SYSTEM = (
|
||||||
|
"You are transcribing a photo of a business card. Copy the text EXACTLY as printed — never "
|
||||||
|
"paraphrase, translate, complete, normalize, or correct anything.\n"
|
||||||
|
"Read each of these character-by-character and reproduce every glyph precisely. Do NOT 'fix' "
|
||||||
|
"them toward a more common spelling or a well-known company's domain, and never add or drop a "
|
||||||
|
"character:\n"
|
||||||
|
" - Email: check the local part, the @, and the domain separately (transcribe 'mara.com' as "
|
||||||
|
"'mara.com', never 'marac.com').\n"
|
||||||
|
" - Phone, cell/mobile, and fax numbers — keep each on its own labeled line so they aren't "
|
||||||
|
"confused (put an office/main/direct number on Phone:, a cell/mobile number on Mobile:, and a "
|
||||||
|
"fax on Fax:).\n"
|
||||||
|
" - Website / LinkedIn URL.\n"
|
||||||
|
"Then list, each on its own labeled line and ONLY if present on the card:\n"
|
||||||
|
" Name: Title: Company: Email: Phone: Mobile: Fax: LinkedIn: City:\n"
|
||||||
|
"If a character is genuinely ambiguous, give your single best reading — never invent extra "
|
||||||
|
"characters to fill a gap. If the image is not a readable business card, reply with the single "
|
||||||
|
"word NONE. Output only the labeled lines, nothing else."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def transcribe_card(image_b64, mime="image/jpeg", chat_fn=None):
|
||||||
|
"""Vision-transcribe a business card to faithful text via the local VL model (same model and
|
||||||
|
Spark Control endpoint as the text parse). Returns the transcription string, or '' if the model
|
||||||
|
saw no readable card. `chat_fn` is injectable for offline tests (defaults to Spark/VL)."""
|
||||||
|
chat_fn = chat_fn or llm.chat_vision
|
||||||
|
out = (chat_fn("Transcribe this business card.", image_b64, mime=mime,
|
||||||
|
system=CARD_SYSTEM, max_tokens=600) or "").strip()
|
||||||
|
return "" if out.upper() == "NONE" else out
|
||||||
@@ -0,0 +1,180 @@
|
|||||||
|
"""Tests for the CRM client's payload builder (pure logic, no network)."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
import crm_client # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def test_new_investor_payload():
|
||||||
|
p = {"intent": "new_investor", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
||||||
|
"contact_title": "GP", "note": "met at conf"}
|
||||||
|
out = crm_client.build_commit_payload(p)
|
||||||
|
assert out["investor_name"] == "Acme Capital"
|
||||||
|
assert out["create_investor_if_missing"] is True
|
||||||
|
assert "row_id" not in out
|
||||||
|
assert out["contact"] == {"name": "Jane Doe", "email": "jane@acme.com", "title": "GP",
|
||||||
|
"city": "", "linkedin_url": "", "phone": "", "mobile": ""}
|
||||||
|
assert out["body"] == "met at conf"
|
||||||
|
assert out["source"] == "matrix_intake"
|
||||||
|
|
||||||
|
|
||||||
|
def test_contact_carries_card_fields_when_present():
|
||||||
|
p = {"intent": "new_investor", "investor_name": "Acme Capital", "contact_name": "Jane Doe",
|
||||||
|
"contact_email": "jane@acme.com", "city": "New York",
|
||||||
|
"linkedin_url": "linkedin.com/in/janedoe", "phone": "212-555-0100",
|
||||||
|
"mobile": "917-555-0199", "note": "met at conf"}
|
||||||
|
out = crm_client.build_commit_payload(p)
|
||||||
|
assert out["contact"]["city"] == "New York"
|
||||||
|
assert out["contact"]["linkedin_url"] == "linkedin.com/in/janedoe"
|
||||||
|
assert out["contact"]["phone"] == "212-555-0100" # office/main line
|
||||||
|
assert out["contact"]["mobile"] == "917-555-0199" # cell
|
||||||
|
|
||||||
|
|
||||||
|
def test_existing_investor_uses_row_id_not_create():
|
||||||
|
p = {"intent": "meeting_note", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane Doe", "contact_email": None, "note": "wants Q3 deck",
|
||||||
|
"_match_id": "rowAcme"}
|
||||||
|
out = crm_client.build_commit_payload(p)
|
||||||
|
assert out["row_id"] == "rowAcme"
|
||||||
|
assert "create_investor_if_missing" not in out
|
||||||
|
assert "investor_name" not in out # targeted by row id, never re-matched by name
|
||||||
|
assert out["body"] == "wants Q3 deck"
|
||||||
|
|
||||||
|
|
||||||
|
def test_contact_falls_back_to_investor_name_when_no_person():
|
||||||
|
p = {"intent": "new_investor", "investor_name": "Delta Fund",
|
||||||
|
"contact_name": None, "contact_email": None, "note": None}
|
||||||
|
out = crm_client.build_commit_payload(p)
|
||||||
|
assert out["contact"]["name"] == "Delta Fund"
|
||||||
|
assert out["body"] == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_email_sends_empty_string_not_none():
|
||||||
|
p = {"intent": "new_investor", "investor_name": "Gamma", "contact_name": "Bob",
|
||||||
|
"contact_email": None, "note": "x"}
|
||||||
|
out = crm_client.build_commit_payload(p)
|
||||||
|
assert out["contact"]["email"] == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_subject_blank_when_note_present_else_provenance_label():
|
||||||
|
# The CRM's grid note line uses subject-or-body, so a blank subject lets the note text show.
|
||||||
|
with_note = crm_client.build_commit_payload(
|
||||||
|
{"intent": "meeting_note", "investor_name": "Acme", "note": "sent the deck", "_match_id": "r1"})
|
||||||
|
assert with_note["subject"] == ""
|
||||||
|
assert with_note["body"] == "sent the deck"
|
||||||
|
# no note text → fall back to a provenance label so the grid line isn't empty
|
||||||
|
no_note = crm_client.build_commit_payload(
|
||||||
|
{"intent": "new_investor", "investor_name": "Beta", "contact_name": "X", "note": None})
|
||||||
|
assert no_note["subject"] == "Intake (Matrix)"
|
||||||
|
|
||||||
|
|
||||||
|
def test_source_defaults_to_intake_and_card_overrides():
|
||||||
|
# Provenance: a typed note tags source="matrix_intake"; a scanned card rides in on
|
||||||
|
# _source="matrix_card" (set by the bot's image handler) so the audit log distinguishes them.
|
||||||
|
typed = crm_client.build_commit_payload(
|
||||||
|
{"intent": "new_investor", "investor_name": "Acme", "note": "x"})
|
||||||
|
assert typed["source"] == "matrix_intake"
|
||||||
|
card = crm_client.build_commit_payload(
|
||||||
|
{"intent": "new_investor", "investor_name": "Acme", "note": "x", "_source": "matrix_card"})
|
||||||
|
assert card["source"] == "matrix_card"
|
||||||
|
|
||||||
|
|
||||||
|
def _with_stub_authed(reply, capture=None):
|
||||||
|
"""Swap crm_client._authed for a canned (status, data); return a restorer."""
|
||||||
|
orig = crm_client._authed
|
||||||
|
|
||||||
|
def fake(method, path, body=None):
|
||||||
|
if capture is not None:
|
||||||
|
capture["path"] = path
|
||||||
|
return reply
|
||||||
|
|
||||||
|
crm_client._authed = fake
|
||||||
|
return orig
|
||||||
|
|
||||||
|
|
||||||
|
def test_match_parses_exact_match():
|
||||||
|
cap = {}
|
||||||
|
orig = _with_stub_authed((200, {"data": {
|
||||||
|
"match": {"id": "rowAcme", "investor_name": "Acme Capital", "matched_on": "name"},
|
||||||
|
"candidates": [],
|
||||||
|
}}), cap)
|
||||||
|
try:
|
||||||
|
res = crm_client.match({"investor_name": "Acme Capital", "contact_email": ""})
|
||||||
|
finally:
|
||||||
|
crm_client._authed = orig
|
||||||
|
assert res["match"] == {"id": "rowAcme", "name": "Acme Capital"}
|
||||||
|
assert res["candidates"] == []
|
||||||
|
assert "q=Acme" in cap["path"] # the query was forwarded
|
||||||
|
|
||||||
|
|
||||||
|
def test_match_returns_ranked_candidates_when_no_exact():
|
||||||
|
orig = _with_stub_authed((200, {"data": {"match": None, "candidates": [
|
||||||
|
{"id": "rowCharlie", "investor_name": "Charlie Brown", "score": 0.92, "matched_on": "name"},
|
||||||
|
{"id": "rowBeta", "investor_name": "Beta Capital LLC", "score": 0.86, "matched_on": "name"},
|
||||||
|
]}}))
|
||||||
|
try:
|
||||||
|
res = crm_client.match({"investor_name": "Charles Brown"})
|
||||||
|
finally:
|
||||||
|
crm_client._authed = orig
|
||||||
|
assert res["match"] is None
|
||||||
|
assert [c["id"] for c in res["candidates"]] == ["rowCharlie", "rowBeta"]
|
||||||
|
assert res["candidates"][0]["name"] == "Charlie Brown"
|
||||||
|
assert res["candidates"][0]["matched_on"] == "name"
|
||||||
|
|
||||||
|
|
||||||
|
def test_match_no_query_skips_network():
|
||||||
|
def boom(*a, **k):
|
||||||
|
raise AssertionError("should not hit the network when there's nothing to match on")
|
||||||
|
orig = crm_client._authed
|
||||||
|
crm_client._authed = boom
|
||||||
|
try:
|
||||||
|
res = crm_client.match({"investor_name": None, "contact_name": None, "contact_email": None})
|
||||||
|
finally:
|
||||||
|
crm_client._authed = orig
|
||||||
|
assert res == {"match": None, "candidates": []}
|
||||||
|
|
||||||
|
|
||||||
|
def test_nl_query_returns_endpoint_data():
|
||||||
|
cap = {}
|
||||||
|
orig = _with_stub_authed(
|
||||||
|
(200, {"data": {"intent": "top_investors_committed", "rows": [], "summary": "ok"}}), cap)
|
||||||
|
try:
|
||||||
|
res = crm_client.nl_query("top investors")
|
||||||
|
finally:
|
||||||
|
crm_client._authed = orig
|
||||||
|
assert res["intent"] == "top_investors_committed"
|
||||||
|
assert cap["path"] == "/api/query/nl"
|
||||||
|
|
||||||
|
|
||||||
|
def test_nl_query_passes_through_soft_503():
|
||||||
|
# Model-down still carries a structured body (the endpoint 503s with the error in `data`) —
|
||||||
|
# return it for the renderer to surface, don't raise.
|
||||||
|
orig = _with_stub_authed((503, {"data": {"error": "model_unavailable"}}))
|
||||||
|
try:
|
||||||
|
res = crm_client.nl_query("anything")
|
||||||
|
finally:
|
||||||
|
crm_client._authed = orig
|
||||||
|
assert res["error"] == "model_unavailable"
|
||||||
|
|
||||||
|
|
||||||
|
def test_nl_query_raises_on_auth_failure():
|
||||||
|
orig = _with_stub_authed((403, {"error": "Bot or admin required"}))
|
||||||
|
raised = False
|
||||||
|
try:
|
||||||
|
crm_client.nl_query("x")
|
||||||
|
except RuntimeError:
|
||||||
|
raised = True
|
||||||
|
finally:
|
||||||
|
crm_client._authed = orig
|
||||||
|
assert raised
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
||||||
|
for fn in fns:
|
||||||
|
fn()
|
||||||
|
print(f"ok {fn.__name__}")
|
||||||
|
print(f"\n{len(fns)} passed")
|
||||||
@@ -0,0 +1,78 @@
|
|||||||
|
"""Offline tests for the email-proposal review logic (card render, framing, reply grammar, note
|
||||||
|
revision). The network/Matrix wiring lives in bot.py (live-smoke only); this covers pure functions."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
import email_proposals # noqa: E402
|
||||||
|
|
||||||
|
ITEM = {
|
||||||
|
"id": "p1", "investor_name": "Acme Capital", "direction": "received",
|
||||||
|
"from_name": "Jane Doe", "from_email": "jane@acme.com",
|
||||||
|
"email_subject": "Re: Fund III", "email_date": "2026-06-02",
|
||||||
|
"snippet": "thanks for the deck — one question on terms",
|
||||||
|
"proposed_note": "✉ Jane Doe emailed the team: asked about terms",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_yes_no_else():
|
||||||
|
assert email_proposals.interpret("yes") == "approve"
|
||||||
|
assert email_proposals.interpret(" Y ") == "approve"
|
||||||
|
assert email_proposals.interpret("✅") == "approve"
|
||||||
|
assert email_proposals.interpret("no") == "reject"
|
||||||
|
assert email_proposals.interpret("skip") == "reject"
|
||||||
|
# anything that isn't a clear yes/no is treated as a revision instruction
|
||||||
|
assert email_proposals.interpret("say we discussed the Q3 raise") == "revise"
|
||||||
|
|
||||||
|
|
||||||
|
def test_frame_wraps_with_rules():
|
||||||
|
out = email_proposals.frame("hello")
|
||||||
|
lines = out.split("\n")
|
||||||
|
assert lines[0] == email_proposals.RULE and lines[-1] == email_proposals.RULE
|
||||||
|
assert "hello" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_card_has_context_note_and_actions():
|
||||||
|
card = email_proposals.render_card(ITEM)
|
||||||
|
assert "Acme Capital" in card
|
||||||
|
assert "Jane Doe" in card
|
||||||
|
assert "Re: Fund III" in card and "2026-06-02" in card
|
||||||
|
assert "thanks for the deck" in card
|
||||||
|
assert "Jane Doe emailed the team: asked about terms" in card # the clear, named note
|
||||||
|
assert "yes" in card.lower() and "no" in card.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_card_is_framed_and_dropless_direction():
|
||||||
|
card = email_proposals.render_card(ITEM)
|
||||||
|
assert card.startswith(email_proposals.RULE) and card.rstrip().endswith(email_proposals.RULE)
|
||||||
|
# the bare Sent/Received label is gone — the note itself names who emailed whom
|
||||||
|
assert "(Received)" not in card and "(Sent)" not in card
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_card_truncates_long_snippet():
|
||||||
|
card = email_proposals.render_card(dict(ITEM, snippet="x" * 1000))
|
||||||
|
assert "…" in card and len(card) < 1000
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_note_applies_model_output():
|
||||||
|
out = email_proposals.revise_note(
|
||||||
|
"old note", "make it about the Q3 raise",
|
||||||
|
parse_fn=lambda prompt, system=None, max_tokens=400: {"note": "Discussed the Q3 raise."})
|
||||||
|
assert out == "Discussed the Q3 raise."
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_note_noop_or_empty_returns_none():
|
||||||
|
# model echoes the same note unchanged -> None so the caller re-prompts (not "Updated")
|
||||||
|
assert email_proposals.revise_note("same", "x", parse_fn=lambda *a, **k: {"note": "same"}) is None
|
||||||
|
# model returns nothing usable -> None
|
||||||
|
assert email_proposals.revise_note("n", "y", parse_fn=lambda *a, **k: {}) is None
|
||||||
|
assert email_proposals.revise_note("n", "y", parse_fn=lambda *a, **k: None) is None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
||||||
|
for fn in fns:
|
||||||
|
fn()
|
||||||
|
print(f"ok {fn.__name__}")
|
||||||
|
print(f"\n{len(fns)} passed")
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
"""Tests for matrix_io content builders — pure dict shaping, no network."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
import matrix_io # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def test_reply_content_is_plain_main_timeline_reply():
|
||||||
|
c = matrix_io.reply_content("hi", "$evt1")
|
||||||
|
rel = c["m.relates_to"]
|
||||||
|
assert rel["m.in_reply_to"]["event_id"] == "$evt1"
|
||||||
|
# a plain reply must NOT carry a thread relation, or it'd land in the thread
|
||||||
|
# instead of the main timeline (the whole point of the nudge).
|
||||||
|
assert "rel_type" not in rel
|
||||||
|
|
||||||
|
|
||||||
|
def test_reply_content_without_target_has_no_relation():
|
||||||
|
c = matrix_io.reply_content("hi", None)
|
||||||
|
assert "m.relates_to" not in c
|
||||||
|
assert c["body"] == "hi"
|
||||||
|
|
||||||
|
|
||||||
|
def test_thread_content_stays_threaded():
|
||||||
|
c = matrix_io.thread_content("hi", "$root1")
|
||||||
|
rel = c["m.relates_to"]
|
||||||
|
assert rel["rel_type"] == "m.thread"
|
||||||
|
assert rel["event_id"] == "$root1"
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
||||||
|
for fn in fns:
|
||||||
|
fn()
|
||||||
|
print(f"ok {fn.__name__}")
|
||||||
|
print(f"\n{len(fns)} passed")
|
||||||
@@ -0,0 +1,287 @@
|
|||||||
|
"""Tests for the intake parse/normalize layer — Spark/Qwen stubbed (no network)."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
import parse # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def _stub(reply):
|
||||||
|
"""Return a parse_fn that ignores input and yields `reply` (simulating Qwen's JSON)."""
|
||||||
|
return lambda text, system=None, max_tokens=400: reply
|
||||||
|
|
||||||
|
|
||||||
|
def test_new_investor_basic():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"New investor Acme Capital, contact Jane Doe jane@acme.com, met at the Austin conf",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
||||||
|
"contact_title": None, "note": "met at the Austin conf"}),
|
||||||
|
)
|
||||||
|
assert p["intent"] == "new_investor"
|
||||||
|
assert p["investor_name"] == "Acme Capital"
|
||||||
|
assert p["contact_email"] == "jane@acme.com"
|
||||||
|
|
||||||
|
|
||||||
|
def test_email_salvaged_from_source_when_model_misses():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"add bob@example.org from Beta LP",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Beta LP",
|
||||||
|
"contact_name": "Bob", "contact_email": None}),
|
||||||
|
)
|
||||||
|
assert p["contact_email"] == "bob@example.org"
|
||||||
|
|
||||||
|
|
||||||
|
def test_fabricated_email_dropped_when_not_in_source():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"new prospect Gamma Partners, talked to their GP",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
|
||||||
|
"contact_name": "their GP", "contact_email": "made-up@nowhere.test"}),
|
||||||
|
)
|
||||||
|
# the model invented an address that isn't in the source → must be dropped
|
||||||
|
assert p["contact_email"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_email_extracted_without_surrounding_punctuation():
|
||||||
|
# "Name <addr>" is the most common contact format; parens / trailing period also occur.
|
||||||
|
# The salvage-from-source path must extract the bare address, never the brackets.
|
||||||
|
cases = [
|
||||||
|
("New investor: Larch Capital — Dana Reed <dana@larchcap.com>, met at conf", "dana@larchcap.com"),
|
||||||
|
("ping (sam@beta.io) re the deck", "sam@beta.io"),
|
||||||
|
("reach kim@acme.co.", "kim@acme.co"),
|
||||||
|
]
|
||||||
|
for src, expected in cases:
|
||||||
|
p = parse.parse_message(
|
||||||
|
src,
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "X",
|
||||||
|
"contact_name": "Y", "contact_email": None}),
|
||||||
|
)
|
||||||
|
assert p["contact_email"] == expected, (src, p["contact_email"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_meeting_note_intent_preserved():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"Note for Acme Capital: wants the Q3 deck",
|
||||||
|
parse_fn=_stub({"intent": "meeting_note", "investor_name": "Acme Capital",
|
||||||
|
"note": "wants the Q3 deck"}),
|
||||||
|
)
|
||||||
|
assert p["intent"] == "meeting_note"
|
||||||
|
assert p["note"] == "wants the Q3 deck"
|
||||||
|
|
||||||
|
|
||||||
|
def test_unclear_when_no_entity():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"hey what's up",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": None, "contact_name": None}),
|
||||||
|
)
|
||||||
|
assert p["intent"] == "unclear"
|
||||||
|
|
||||||
|
|
||||||
|
def test_null_strings_normalized():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"Delta Fund",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Delta Fund",
|
||||||
|
"contact_name": "null", "contact_email": "N/A", "note": ""}),
|
||||||
|
)
|
||||||
|
assert p["contact_name"] is None
|
||||||
|
assert p["contact_email"] is None
|
||||||
|
assert p["note"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_bad_intent_falls_back_to_unclear():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"Epsilon Capital",
|
||||||
|
parse_fn=_stub({"intent": "garbage", "investor_name": "Epsilon Capital"}),
|
||||||
|
)
|
||||||
|
assert p["intent"] == "unclear"
|
||||||
|
|
||||||
|
|
||||||
|
def test_none_model_reply_is_unclear():
|
||||||
|
p = parse.parse_message("???", parse_fn=_stub(None))
|
||||||
|
assert p["intent"] == "unclear"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_message_stashes_source_text():
|
||||||
|
p = parse.parse_message("Acme Capital, Jane jane@acme.com",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane", "contact_email": "jane@acme.com"}))
|
||||||
|
assert p["_source_text"] == "Acme Capital, Jane jane@acme.com"
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_applies_note_change_and_preserves_control_keys():
|
||||||
|
proposal = parse.parse_message(
|
||||||
|
"New investor Acme Capital, Jane Doe jane@acme.com",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
||||||
|
"contact_title": None, "note": None}))
|
||||||
|
revised = parse.revise(
|
||||||
|
proposal, "add that we met on June 14",
|
||||||
|
parse_fn=_stub({"investor_name": "Acme Capital", "contact_name": "Jane Doe",
|
||||||
|
"contact_email": "jane@acme.com", "contact_title": None,
|
||||||
|
"note": "met on June 14"}))
|
||||||
|
assert revised["note"] == "met on June 14"
|
||||||
|
assert revised["investor_name"] == "Acme Capital"
|
||||||
|
assert revised["intent"] == "new_investor" # control key preserved
|
||||||
|
assert revised["_source_text"] == proposal["_source_text"] # preserved for email integrity
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_email_taken_only_from_instruction():
|
||||||
|
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
||||||
|
"contact_email": "jane@acme.com", "contact_title": None, "note": None,
|
||||||
|
"_source_text": "Acme, Jane jane@acme.com"}
|
||||||
|
# instruction literally carries the new address → accepted
|
||||||
|
r1 = parse.revise(proposal, "her email is jane@newfirm.com",
|
||||||
|
parse_fn=_stub({"contact_email": "jane@newfirm.com"}))
|
||||||
|
assert r1["contact_email"] == "jane@newfirm.com"
|
||||||
|
# model tries to change the email but the instruction has no address → keep the existing one
|
||||||
|
r2 = parse.revise(proposal, "set her title to GP",
|
||||||
|
parse_fn=_stub({"contact_email": "totally@madeup.test", "contact_title": "GP"}))
|
||||||
|
assert r2["contact_email"] == "jane@acme.com" # model's email ignored (not in instruction)
|
||||||
|
assert r2["contact_title"] == "GP"
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_preserves_match_id():
|
||||||
|
proposal = {"intent": "meeting_note", "investor_name": "Acme", "contact_name": None,
|
||||||
|
"contact_email": None, "contact_title": None, "note": "old",
|
||||||
|
"_match_id": "rowAcme", "_stage": "approval", "_source_text": "note for Acme: old"}
|
||||||
|
revised = parse.revise(proposal, "change the note to: sent the deck",
|
||||||
|
parse_fn=_stub({"note": "sent the deck"}))
|
||||||
|
assert revised["note"] == "sent the deck"
|
||||||
|
assert revised["_match_id"] == "rowAcme"
|
||||||
|
assert revised["intent"] == "meeting_note"
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_system_appends_roster_frame_only_when_roster_given():
|
||||||
|
base = parse.build_system()
|
||||||
|
assert base.strip().endswith("Output JSON only.")
|
||||||
|
assert "doing the outreach" not in base # no roster → no outreach frame
|
||||||
|
|
||||||
|
framed = parse.build_system(["Grant", "Jonathan", "Marty"])
|
||||||
|
assert "Grant" in framed and "Jonathan" in framed and "Marty" in framed
|
||||||
|
assert "doing the outreach" in framed # the outreach frame is present
|
||||||
|
assert framed.strip().endswith("Output JSON only.") # JSON-only stays last for recency
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_message_injects_roster_into_system_prompt():
|
||||||
|
# Capture the system prompt the model is handed, and confirm the teammate ("jonathan")
|
||||||
|
# is framed as outreach while the prospect ("wyoming") is what gets extracted.
|
||||||
|
seen = {}
|
||||||
|
|
||||||
|
def cap(text, system=None, max_tokens=400):
|
||||||
|
seen["system"] = system
|
||||||
|
return {"intent": "meeting_note", "investor_name": "Wyoming", "contact_name": None,
|
||||||
|
"note": "jonathan chatting with them"}
|
||||||
|
|
||||||
|
p = parse.parse_message("jonathan is chatting with wyoming", parse_fn=cap,
|
||||||
|
roster=["Grant", "Jonathan", "Marty"])
|
||||||
|
assert "Jonathan" in seen["system"]
|
||||||
|
assert "doing the outreach" in seen["system"]
|
||||||
|
assert p["investor_name"] == "Wyoming"
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_injects_roster_into_system_prompt():
|
||||||
|
proposal = {"intent": "meeting_note", "investor_name": "Wyoming", "contact_name": None,
|
||||||
|
"contact_email": None, "contact_title": None, "note": "x",
|
||||||
|
"_source_text": "jonathan is chatting with wyoming"}
|
||||||
|
seen = {}
|
||||||
|
|
||||||
|
def cap(prompt, system=None, max_tokens=400):
|
||||||
|
seen["system"] = system
|
||||||
|
return {"note": "sent the deck"}
|
||||||
|
|
||||||
|
parse.revise(proposal, "note: sent the deck", parse_fn=cap, roster=["Grant", "Jonathan"])
|
||||||
|
assert "Jonathan" in seen["system"]
|
||||||
|
assert "doing the outreach" in seen["system"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_city_kept_as_plain_field_and_linkedin_salvaged_from_source():
|
||||||
|
# A card transcription carries labeled lines; city is kept as-is, LinkedIn is salvaged from
|
||||||
|
# the source text (verbatim) the same way email is.
|
||||||
|
src = ("New investor — from a business card:\nName: Jane Doe\nCompany: Acme Capital\n"
|
||||||
|
"Email: jane@acme.com\nLinkedIn: linkedin.com/in/janedoe\nCity: New York")
|
||||||
|
p = parse.parse_message(
|
||||||
|
src,
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
||||||
|
"city": "New York", "linkedin_url": None}), # model missed the URL
|
||||||
|
)
|
||||||
|
assert p["city"] == "New York"
|
||||||
|
assert p["linkedin_url"] == "linkedin.com/in/janedoe" # salvaged from source
|
||||||
|
|
||||||
|
|
||||||
|
def test_fabricated_linkedin_dropped_when_not_in_source():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"new prospect Gamma Partners, talked to their GP",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
|
||||||
|
"contact_name": "their GP", "linkedin_url": "linkedin.com/in/madeup"}),
|
||||||
|
)
|
||||||
|
assert p["linkedin_url"] is None # model invented a URL not in the source → dropped
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_linkedin_taken_only_from_instruction():
|
||||||
|
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
||||||
|
"contact_email": "jane@acme.com", "contact_title": None, "city": None,
|
||||||
|
"linkedin_url": None, "note": None, "_source_text": "Acme Jane jane@acme.com"}
|
||||||
|
r1 = parse.revise(proposal, "her linkedin is linkedin.com/in/janedoe",
|
||||||
|
parse_fn=_stub({"linkedin_url": "linkedin.com/in/janedoe"}))
|
||||||
|
assert r1["linkedin_url"] == "linkedin.com/in/janedoe"
|
||||||
|
# model tries to set a URL but the instruction carries none → keep existing (None)
|
||||||
|
r2 = parse.revise(proposal, "set her title to GP",
|
||||||
|
parse_fn=_stub({"linkedin_url": "linkedin.com/in/fake", "contact_title": "GP"}))
|
||||||
|
assert r2["linkedin_url"] is None
|
||||||
|
assert r2["contact_title"] == "GP"
|
||||||
|
|
||||||
|
|
||||||
|
def test_phone_and_mobile_kept_when_digits_in_source():
|
||||||
|
# A card transcription separates Phone/Mobile/Fax; the model maps office->phone, cell->mobile.
|
||||||
|
src = ("New investor — from a business card:\nName: Daniel Raupp\nCompany: Fortitude\n"
|
||||||
|
"Phone: 631-474-5610\nFax: 631-474-1806\nCell: 631-922-1195")
|
||||||
|
p = parse.parse_message(
|
||||||
|
src,
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Fortitude",
|
||||||
|
"contact_name": "Daniel Raupp", "phone": "631-474-5610",
|
||||||
|
"mobile": "631-922-1195"}),
|
||||||
|
)
|
||||||
|
assert p["phone"] == "631-474-5610"
|
||||||
|
assert p["mobile"] == "631-922-1195" # the cell, kept in its printed formatting
|
||||||
|
|
||||||
|
|
||||||
|
def test_fabricated_phone_dropped_when_digits_not_in_source():
|
||||||
|
p = parse.parse_message(
|
||||||
|
"new prospect Gamma Partners, talked to their GP",
|
||||||
|
parse_fn=_stub({"intent": "new_investor", "investor_name": "Gamma Partners",
|
||||||
|
"contact_name": "their GP", "phone": "555-867-5309"}),
|
||||||
|
)
|
||||||
|
assert p["phone"] is None # number not in the source → never minted
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_phone_taken_only_from_instruction():
|
||||||
|
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
||||||
|
"contact_email": None, "contact_title": None, "city": None, "linkedin_url": None,
|
||||||
|
"phone": None, "mobile": None, "note": None, "_source_text": "Acme Jane"}
|
||||||
|
r1 = parse.revise(proposal, "her cell is 917-555-0199",
|
||||||
|
parse_fn=_stub({"mobile": "917-555-0199"}))
|
||||||
|
assert r1["mobile"] == "917-555-0199"
|
||||||
|
# model tries to set a number but the instruction has none → keep existing (None)
|
||||||
|
r2 = parse.revise(proposal, "set her title to GP",
|
||||||
|
parse_fn=_stub({"mobile": "000-000-0000", "contact_title": "GP"}))
|
||||||
|
assert r2["mobile"] is None
|
||||||
|
assert r2["contact_title"] == "GP"
|
||||||
|
|
||||||
|
|
||||||
|
def test_revise_cannot_empty_the_proposal():
|
||||||
|
proposal = {"intent": "new_investor", "investor_name": "Acme", "contact_name": "Jane",
|
||||||
|
"contact_email": None, "contact_title": None, "note": "x", "_source_text": "Acme Jane"}
|
||||||
|
revised = parse.revise(proposal, "clear it",
|
||||||
|
parse_fn=_stub({"investor_name": None, "contact_name": None,
|
||||||
|
"contact_title": None, "note": None}))
|
||||||
|
assert revised["investor_name"] == "Acme" and revised["contact_name"] == "Jane"
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
||||||
|
for fn in fns:
|
||||||
|
fn()
|
||||||
|
print(f"ok {fn.__name__}")
|
||||||
|
print(f"\n{len(fns)} passed")
|
||||||
@@ -0,0 +1,213 @@
|
|||||||
|
"""Tests for the proposal store + approval state machine (pure logic, no network)."""
|
||||||
|
import copy
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
import proposals # noqa: E402
|
||||||
|
|
||||||
|
SAMPLE = {"intent": "new_investor", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane Doe", "contact_email": "jane@acme.com",
|
||||||
|
"contact_title": None, "note": "met at conf"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_store_put_get_pop():
|
||||||
|
s = proposals.ProposalStore()
|
||||||
|
assert not s.has("$root")
|
||||||
|
s.put("$root", SAMPLE)
|
||||||
|
assert s.has("$root")
|
||||||
|
assert s.get("$root")["investor_name"] == "Acme Capital"
|
||||||
|
assert s.pop("$root")["investor_name"] == "Acme Capital"
|
||||||
|
assert not s.has("$root")
|
||||||
|
assert s.pop("$missing") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_store_any_pending():
|
||||||
|
s = proposals.ProposalStore()
|
||||||
|
assert not s.any_pending()
|
||||||
|
s.put("$r", SAMPLE)
|
||||||
|
assert s.any_pending()
|
||||||
|
s.pop("$r")
|
||||||
|
assert not s.any_pending()
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_yes_variants():
|
||||||
|
for t in ("yes", "Y", "approve", " ok ", "👍"):
|
||||||
|
assert proposals.interpret_reply(t)[0] == "approve", t
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_no_variants():
|
||||||
|
for t in ("no", "N", "cancel", "discard", "❌"):
|
||||||
|
assert proposals.interpret_reply(t)[0] == "reject", t
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_edit_equals():
|
||||||
|
action, payload = proposals.interpret_reply("edit email=new@acme.com")
|
||||||
|
assert action == "edit"
|
||||||
|
assert payload == ("contact_email", "new@acme.com")
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_edit_colon_and_alias():
|
||||||
|
action, payload = proposals.interpret_reply("firm: Acme Capital LLC")
|
||||||
|
assert action == "edit"
|
||||||
|
assert payload == ("investor_name", "Acme Capital LLC")
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_edit_city_and_linkedin_aliases():
|
||||||
|
a1, p1 = proposals.interpret_reply("city: New York")
|
||||||
|
assert (a1, p1) == ("edit", ("city", "New York"))
|
||||||
|
a2, p2 = proposals.interpret_reply("linkedin=linkedin.com/in/jane")
|
||||||
|
assert (a2, p2) == ("edit", ("linkedin_url", "linkedin.com/in/jane"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_shows_city_and_linkedin_when_present():
|
||||||
|
p = {**SAMPLE, "city": "New York", "linkedin_url": "linkedin.com/in/jane"}
|
||||||
|
out = proposals.render(p)
|
||||||
|
assert "City: New York" in out
|
||||||
|
assert "LinkedIn: linkedin.com/in/jane" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_edit_phone_and_mobile_aliases():
|
||||||
|
assert proposals.interpret_reply("phone=212-555-0100") == ("edit", ("phone", "212-555-0100"))
|
||||||
|
assert proposals.interpret_reply("cell: 917-555-0199") == ("edit", ("mobile", "917-555-0199"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_shows_phone_and_mobile_when_present():
|
||||||
|
p = {**SAMPLE, "phone": "212-555-0100", "mobile": "917-555-0199"}
|
||||||
|
out = proposals.render(p)
|
||||||
|
assert "Phone: 212-555-0100" in out
|
||||||
|
assert "Mobile: 917-555-0199" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_unknown():
|
||||||
|
assert proposals.interpret_reply("maybe later")[0] == "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_edit_colon_value_contains_equals():
|
||||||
|
# the '=' inside the value must not break parsing — split on ':' first, keep the rest
|
||||||
|
action, payload = proposals.interpret_reply("note: see deck=v2")
|
||||||
|
assert action == "edit"
|
||||||
|
assert payload == ("note", "see deck=v2")
|
||||||
|
|
||||||
|
|
||||||
|
def test_claim_once_pop_guards_double_approve():
|
||||||
|
# the double-approve guard relies on pop() yielding the proposal exactly once;
|
||||||
|
# a second claim returns None so a racing second 'yes' is a no-op
|
||||||
|
s = proposals.ProposalStore()
|
||||||
|
s.put("$r", SAMPLE)
|
||||||
|
assert s.pop("$r") is not None
|
||||||
|
assert s.pop("$r") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_edit_with_unknown_field_is_not_an_edit():
|
||||||
|
# an unknown field name must not silently become an edit
|
||||||
|
assert proposals.interpret_reply("edit zipcode=90210")[0] == "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_edit_is_nondestructive():
|
||||||
|
updated = proposals.apply_edit(SAMPLE, "contact_email", "x@y.com")
|
||||||
|
assert updated["contact_email"] == "x@y.com"
|
||||||
|
assert SAMPLE["contact_email"] == "jane@acme.com" # original untouched
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_includes_fields_and_instructions():
|
||||||
|
text = proposals.render(SAMPLE)
|
||||||
|
assert "Acme Capital" in text
|
||||||
|
assert "jane@acme.com" in text
|
||||||
|
assert "yes" in text.lower() and "no" in text.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_meeting_note_variant():
|
||||||
|
note = dict(SAMPLE, intent="meeting_note")
|
||||||
|
assert "meeting note" in proposals.render(note).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_summary_line_new_vs_note():
|
||||||
|
new_line = proposals.summary_line(SAMPLE)
|
||||||
|
assert "Acme Capital" in new_line and "new investor" in new_line.lower()
|
||||||
|
note_line = proposals.summary_line(dict(SAMPLE, intent="meeting_note"))
|
||||||
|
assert "Acme Capital" in note_line and "meeting note" in note_line.lower()
|
||||||
|
# the nudge must point the user to the thread, where the actual action lives
|
||||||
|
assert "thread" in new_line.lower()
|
||||||
|
|
||||||
|
|
||||||
|
# --- fuzzy-match disambiguation + conversational-revision helpers ---
|
||||||
|
|
||||||
|
DISAMBIG = {"intent": "new_investor", "investor_name": "Charles Brown",
|
||||||
|
"contact_name": "Charles Brown", "contact_email": None, "contact_title": None,
|
||||||
|
"note": "met at conf", "_stage": "disambiguate",
|
||||||
|
"_candidates": [{"id": "rowCharlie", "name": "Charlie Brown", "score": 0.92, "matched_on": "name"},
|
||||||
|
{"id": "rowBeta", "name": "Beta Capital LLC", "score": 0.7, "matched_on": "name"}]}
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_disambiguation_pick_number():
|
||||||
|
assert proposals.interpret_disambiguation("1", 2) == ("pick", 0)
|
||||||
|
assert proposals.interpret_disambiguation(" 2 ", 2) == ("pick", 1)
|
||||||
|
assert proposals.interpret_disambiguation("#1", 2) == ("pick", 0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_disambiguation_out_of_range_is_unknown():
|
||||||
|
assert proposals.interpret_disambiguation("3", 2)[0] == "unknown"
|
||||||
|
assert proposals.interpret_disambiguation("0", 2)[0] == "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_disambiguation_new_and_no():
|
||||||
|
assert proposals.interpret_disambiguation("new", 2)[0] == "new"
|
||||||
|
assert proposals.interpret_disambiguation("none of these", 2)[0] == "new"
|
||||||
|
assert proposals.interpret_disambiguation("no", 2)[0] == "reject"
|
||||||
|
|
||||||
|
|
||||||
|
def test_interpret_disambiguation_freeform_is_unknown():
|
||||||
|
# a free-form reply in the shortlist stage isn't guessed at — re-prompt instead
|
||||||
|
assert proposals.interpret_disambiguation("the first one", 2)[0] == "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def test_attach_to_candidate_promotes_to_meeting_note():
|
||||||
|
out = proposals.attach_to_candidate(DISAMBIG, DISAMBIG["_candidates"][0])
|
||||||
|
assert out["_match_id"] == "rowCharlie"
|
||||||
|
assert out["intent"] == "meeting_note"
|
||||||
|
assert out["_stage"] == "approval"
|
||||||
|
assert out["investor_name"] == "Charlie Brown" # canonical existing name shown
|
||||||
|
assert "_candidates" not in out
|
||||||
|
assert "_candidates" in DISAMBIG # original untouched
|
||||||
|
|
||||||
|
|
||||||
|
def test_promote_to_new_clears_shortlist_and_match():
|
||||||
|
out = proposals.promote_to_new(dict(DISAMBIG, _match_id="rowX"))
|
||||||
|
assert out["_stage"] == "approval"
|
||||||
|
assert "_candidates" not in out
|
||||||
|
assert "_match_id" not in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_disambiguation_pick_then_yes_reaches_approval():
|
||||||
|
# Closes the seam between the two state machines: a shortlist pick promotes the proposal to
|
||||||
|
# approval stage carrying the chosen investor's row id, and a following 'yes' classifies as
|
||||||
|
# approve (the normal commit path) — so pick -> yes lands the note on the existing investor.
|
||||||
|
picked = proposals.attach_to_candidate(copy.deepcopy(DISAMBIG), DISAMBIG["_candidates"][0])
|
||||||
|
assert picked["_stage"] == "approval"
|
||||||
|
assert picked["_match_id"] == "rowCharlie"
|
||||||
|
assert picked["intent"] == "meeting_note"
|
||||||
|
assert proposals.interpret_reply("yes") == ("approve", None)
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_disambiguation_lists_numbered_candidates():
|
||||||
|
text = proposals.render_disambiguation(DISAMBIG)
|
||||||
|
assert "Charlie Brown" in text and "Beta Capital LLC" in text
|
||||||
|
assert "1." in text and "2." in text
|
||||||
|
assert "new" in text.lower() and "no" in text.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_same_fields_ignores_control_keys():
|
||||||
|
a = dict(SAMPLE)
|
||||||
|
assert proposals.same_fields(a, dict(a))
|
||||||
|
assert not proposals.same_fields(a, dict(a, note="different"))
|
||||||
|
assert proposals.same_fields(a, dict(a, _match_id="r1", _stage="approval"))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
||||||
|
for fn in fns:
|
||||||
|
fn()
|
||||||
|
print(f"ok {fn.__name__}")
|
||||||
|
print(f"\n{len(fns)} passed")
|
||||||
@@ -0,0 +1,112 @@
|
|||||||
|
"""Tests for the NL-query Matrix surface: trigger detection + answer rendering (pure, no network)."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
import query # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
# ── parse_trigger ───────────────────────────────────────────────────────────────────────
|
||||||
|
def test_trigger_question_mark():
|
||||||
|
assert query.parse_trigger("?who are our top investors") == "who are our top investors"
|
||||||
|
assert query.parse_trigger(" ? spaced out ") == "spaced out"
|
||||||
|
|
||||||
|
|
||||||
|
def test_trigger_at_bot():
|
||||||
|
assert query.parse_trigger("@bot top 10 investors") == "top 10 investors"
|
||||||
|
assert query.parse_trigger("@bot: top 10 investors") == "top 10 investors" # pill-style colon
|
||||||
|
assert query.parse_trigger("@BOT spaced") == "spaced" # case-insensitive
|
||||||
|
|
||||||
|
|
||||||
|
def test_trigger_slash_forms():
|
||||||
|
assert query.parse_trigger("/ask when did we last email Acme?") == "when did we last email Acme?"
|
||||||
|
assert query.parse_trigger("/query top investors") == "top investors"
|
||||||
|
assert query.parse_trigger("/q top investors") == "top investors"
|
||||||
|
|
||||||
|
|
||||||
|
def test_trigger_bare_returns_empty_string():
|
||||||
|
# A bare trigger is matched (so we show help) but carries no question.
|
||||||
|
assert query.parse_trigger("@bot") == ""
|
||||||
|
assert query.parse_trigger("?") == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_trigger_routes_to_intake():
|
||||||
|
assert query.parse_trigger("New investor: Acme — Jane <jane@acme.com>") is None
|
||||||
|
# 'ask' as a note verb must NOT trigger (would collide with real intake notes).
|
||||||
|
assert query.parse_trigger("Ask Jane to send the Q3 deck") is None
|
||||||
|
assert query.parse_trigger("/asking for a friend") is None # needs a separator after /ask
|
||||||
|
assert query.parse_trigger("") is None
|
||||||
|
assert query.parse_trigger(" ") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ── render_answer ───────────────────────────────────────────────────────────────────────
|
||||||
|
def test_render_scalar_rows():
|
||||||
|
out = query.render_answer({
|
||||||
|
"intent": "top_investors_committed", "slots": {"limit": 2},
|
||||||
|
"summary": "Top 2 investor(s) by committed capital.",
|
||||||
|
"columns": ["investor_name", "total_invested", "lead"],
|
||||||
|
"rows": [{"investor_name": "Acme Capital", "total_invested": 5000000, "lead": "Grant"},
|
||||||
|
{"investor_name": "Beta Fund", "total_invested": 2500000, "lead": "Jonathan"}],
|
||||||
|
"truncated": False})
|
||||||
|
assert "Top 2 investor(s)" in out
|
||||||
|
assert "**Acme Capital**" in out
|
||||||
|
assert "$5,000,000" in out # money formatting
|
||||||
|
assert "read as: top_investors_committed" in out # interpretation footer
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_nested_contacts_and_commitments():
|
||||||
|
out = query.render_answer({
|
||||||
|
"intent": "investor_lookup", "slots": {"name": "Acme"},
|
||||||
|
"summary": '1 investor(s) matching "Acme".',
|
||||||
|
"columns": ["investor_name", "lead", "total_invested", "graveyard", "contacts", "commitments"],
|
||||||
|
"rows": [{"investor_name": "Acme Capital", "lead": "Grant", "total_invested": 5000000,
|
||||||
|
"graveyard": 0,
|
||||||
|
"contacts": [{"full_name": "Jane Doe", "email": "jane@acme.com", "title": "GP",
|
||||||
|
"city": "Austin", "state": "TX", "country": ""}],
|
||||||
|
"commitments": [{"fund_name": "Fund I", "amount": 5000000}]}],
|
||||||
|
"truncated": False})
|
||||||
|
assert "Jane Doe <jane@acme.com>" in out
|
||||||
|
assert "Fund I: $5,000,000" in out
|
||||||
|
assert "graveyard" not in out # 0-valued flag column suppressed
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_flag_when_set():
|
||||||
|
out = query.render_answer({
|
||||||
|
"intent": "investors_follow_up", "slots": {},
|
||||||
|
"summary": "1 investor(s) with an open follow-up reminder.",
|
||||||
|
"columns": ["investor_name", "title", "due_date", "status", "overdue"],
|
||||||
|
"rows": [{"investor_name": "Acme", "title": "Send deck", "due_date": "2026-01-01",
|
||||||
|
"status": "open", "overdue": 1}]})
|
||||||
|
assert "⚠️ overdue" in out
|
||||||
|
assert "2026-01-01" in out # date truncated to YYYY-MM-DD
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_no_rows():
|
||||||
|
out = query.render_answer({"intent": "investors_by_city", "slots": {"city": "Nowhere"},
|
||||||
|
"summary": '0 investor contact(s) in "Nowhere".',
|
||||||
|
"columns": [], "rows": []})
|
||||||
|
assert "no matching" in out.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_overflow_note():
|
||||||
|
rows = [{"investor_name": f"Inv {i}", "total_invested": i}
|
||||||
|
for i in range(query.MAX_DISPLAY_ROWS + 5)]
|
||||||
|
out = query.render_answer({"intent": "top_investors_committed", "slots": {}, "summary": "many",
|
||||||
|
"columns": ["investor_name", "total_invested"], "rows": rows})
|
||||||
|
assert "+5 more not shown" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_render_errors():
|
||||||
|
assert "couldn't map" in query.render_answer({"error": "no_match", "question": "huh"}).lower()
|
||||||
|
assert "unreachable" in query.render_answer({"error": "model_unavailable"}).lower()
|
||||||
|
assert "failed" in query.render_answer({"error": "query_failed", "detail": "boom"}).lower()
|
||||||
|
assert "bad_slot" in query.render_answer({"error": "bad_slot", "detail": "x"})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
||||||
|
for fn in fns:
|
||||||
|
fn()
|
||||||
|
print(f"ok {fn.__name__}")
|
||||||
|
print(f"\n{len(fns)} passed")
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
"""Tests for the business-card vision wrapper (pure logic, no network — chat_fn is stubbed)."""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
import spark # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def test_transcribe_card_returns_faithful_text():
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_chat(prompt, image_b64, mime="image/jpeg", system=None, max_tokens=600):
|
||||||
|
captured["image_b64"] = image_b64
|
||||||
|
captured["mime"] = mime
|
||||||
|
captured["system"] = system
|
||||||
|
return "Jane Doe\nGeneral Partner\nAcme Capital LLC\njane@acme.com\n+1 555 123 4567"
|
||||||
|
|
||||||
|
out = spark.transcribe_card("Zm9vYmFy", mime="image/png", chat_fn=fake_chat)
|
||||||
|
# The transcription is passed through verbatim — email survives for the integrity check.
|
||||||
|
assert "jane@acme.com" in out
|
||||||
|
assert "Acme Capital LLC" in out
|
||||||
|
# The image + mime reached the vision call; the card system prompt was used.
|
||||||
|
assert captured["image_b64"] == "Zm9vYmFy"
|
||||||
|
assert captured["mime"] == "image/png"
|
||||||
|
assert "business card" in (captured["system"] or "").lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_transcribe_card_none_sentinel_becomes_empty():
|
||||||
|
# The model replies NONE for an unreadable / non-card image → we return "" so the bot can
|
||||||
|
# ask for a clearer photo instead of feeding garbage into the intake parser.
|
||||||
|
assert spark.transcribe_card("x", chat_fn=lambda *a, **k: "NONE") == ""
|
||||||
|
assert spark.transcribe_card("x", chat_fn=lambda *a, **k: " none ") == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_transcribe_card_strips_whitespace():
|
||||||
|
assert spark.transcribe_card("x", chat_fn=lambda *a, **k: " Acme\n ") == "Acme"
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
|
||||||
|
for fn in fns:
|
||||||
|
fn()
|
||||||
|
print(f"ok {fn.__name__}")
|
||||||
|
print(f"\n{len(fns)} passed")
|
||||||
@@ -58,7 +58,22 @@ def _render_thesis(thesis):
|
|||||||
def _system(thesis):
|
def _system(thesis):
|
||||||
text = ("You are the Architect, the in-house copilot that sharpens Ten31's investment "
|
text = ("You are the Architect, the in-house copilot that sharpens Ten31's investment "
|
||||||
"thesis with the partners. Ten31 invests in critical infrastructure across bitcoin, "
|
"thesis with the partners. Ten31 invests in critical infrastructure across bitcoin, "
|
||||||
"AI, energy, and freedom technologies, with scarcity as the connecting idea. "
|
"AI, energy, and freedom technologies. The spine of the thesis: fiat is being debased "
|
||||||
|
"while AI drives the marginal cost of anything reproducible toward zero, so durable "
|
||||||
|
"value migrates to what stays provably scarce and verifiable. Bitcoin is the apex form "
|
||||||
|
"of that, a fixed-supply, non-debasable, verifiable reserve asset. AI is the abundance "
|
||||||
|
"engine and bitcoin is the scarcity anchor, two faces of one megatrend. The throughline "
|
||||||
|
"is an asset-value and capital-flow claim: as money debases and AI commoditizes the "
|
||||||
|
"reproducible, value accrues to the scarce side of one supply chain and the monetary "
|
||||||
|
"premium accrues to bitcoin as the non-debasable reserve asset. This is not a claim "
|
||||||
|
"that the world transacts, settles, or clears in bitcoin. The structure runs on three "
|
||||||
|
"seams: Energy and Compute (the same scarce firm power feeds both AI and bitcoin), "
|
||||||
|
"Debasement and Bitcoin (bitcoin as reserve and as pristine collateral for credit, "
|
||||||
|
"never payments), and AI and Data-Ownership (sovereign data and confidential inference, "
|
||||||
|
"the own-your-stack and own-your-inference layer). Strike is a financial-services and "
|
||||||
|
"reserve re-rate, never a payments story. Proof standard: every proof point must be "
|
||||||
|
"falsifiable as scaled substance with a number, never a first-instance milestone. Do "
|
||||||
|
"not invent proof and do not over-expose sensitive deal or return specifics. "
|
||||||
f"VOICE RULES (follow exactly): {VOICE}\n\n"
|
f"VOICE RULES (follow exactly): {VOICE}\n\n"
|
||||||
"Here is the current working thesis:\n" + _render_thesis(thesis))
|
"Here is the current working thesis:\n" + _render_thesis(thesis))
|
||||||
# Cache the thesis context so iterating across requests is cheap.
|
# Cache the thesis context so iterating across requests is cheap.
|
||||||
|
|||||||
@@ -0,0 +1,258 @@
|
|||||||
|
"""Outreach drafting agent — tailored LP outreach in Ten31's voice, grounded in the
|
||||||
|
thesis + the LP's DE-IDENTIFIED context, through the redaction boundary.
|
||||||
|
|
||||||
|
Draft-only: a human reviews, edits, and sends (guardrails #4 and #6 — no auto-send,
|
||||||
|
no cold/outbound automation until counsel defines the solicitation posture). Sovereignty:
|
||||||
|
the thesis is Ten31's own non-sensitive messaging and goes to Claude as-is; the LP's
|
||||||
|
context (CRM notes + email history) is scrubbed first, so the LP list never reaches the
|
||||||
|
API in the clear, and the draft is re-hydrated locally for the human.
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
sys.path.insert(0, _HERE) # backend/mcp on path for sibling imports (architect_grounding, architect_agent)
|
||||||
|
|
||||||
|
# outreach_type -> human description woven into the prompt
|
||||||
|
OUTREACH_TYPES = {
|
||||||
|
"intro": "a first introduction to Ten31 and the fund",
|
||||||
|
"follow_up": "a warm follow-up that moves the conversation forward",
|
||||||
|
"fund_update": "a fund update / progress note",
|
||||||
|
"meeting_follow_up": "a follow-up after a recent meeting or call",
|
||||||
|
"nurture": "a light-touch note to stay in contact",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _days_between(then_iso, now_iso):
|
||||||
|
from datetime import datetime
|
||||||
|
try:
|
||||||
|
a = datetime.strptime(str(then_iso)[:10], "%Y-%m-%d")
|
||||||
|
b = datetime.strptime(str(now_iso)[:10], "%Y-%m-%d")
|
||||||
|
return (b - a).days
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def follow_up_radar(conn, our_addresses, now_iso, warm_days=45, limit=60):
|
||||||
|
"""Deterministic scan: surface investors who need attention, each with a concrete,
|
||||||
|
checkable reason (no LLM guesswork in the *surfacing*). Tiers, most urgent first:
|
||||||
|
0 you owe a reply (their email is the most recent, unanswered)
|
||||||
|
1 flagged for follow-up and quiet
|
||||||
|
2 warm lead gone quiet (no contact in >= warm_days)
|
||||||
|
"""
|
||||||
|
own = {(a or "").lower() for a in (our_addresses or [])}
|
||||||
|
try:
|
||||||
|
rows = conn.execute("SELECT * FROM fundraising_investors").fetchall()
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
items = []
|
||||||
|
for r in rows:
|
||||||
|
d = dict(r)
|
||||||
|
inv_id, name = d.get("id"), d.get("investor_name")
|
||||||
|
if not inv_id:
|
||||||
|
continue
|
||||||
|
gv = d.get("graveyard")
|
||||||
|
if gv and str(gv).strip().lower() not in ("", "0", "false", "no"):
|
||||||
|
continue # buried leads are out of scope
|
||||||
|
try:
|
||||||
|
erows = conn.execute(
|
||||||
|
"SELECT e.from_email, e.sent_at FROM emails e "
|
||||||
|
"JOIN email_investor_links l ON l.email_id = e.id "
|
||||||
|
"WHERE l.fundraising_investor_id = ? AND e.is_matched = 1 "
|
||||||
|
"ORDER BY e.sent_at DESC LIMIT 50", (inv_id,)).fetchall()
|
||||||
|
except Exception:
|
||||||
|
erows = []
|
||||||
|
if not erows:
|
||||||
|
continue # no email history -> nothing to base a nudge on
|
||||||
|
last = erows[0]
|
||||||
|
days = _days_between(last["sent_at"], now_iso)
|
||||||
|
if days is None:
|
||||||
|
continue
|
||||||
|
inbound_last = (last["from_email"] or "").lower() not in own # they emailed last
|
||||||
|
ff = d.get("follow_up")
|
||||||
|
flagged = bool(ff) and str(ff).strip().lower() not in ("", "0", "false", "no")
|
||||||
|
|
||||||
|
reason, tier, suggested = None, None, "follow_up"
|
||||||
|
if inbound_last and days >= 3:
|
||||||
|
reason, tier, suggested = f"You owe a reply — they emailed {days} days ago", 0, "follow_up"
|
||||||
|
elif flagged and days >= 14:
|
||||||
|
reason, tier, suggested = f"Flagged for follow-up, quiet {days} days", 1, "follow_up"
|
||||||
|
elif days >= warm_days and len(erows) >= 2:
|
||||||
|
reason, tier, suggested = f"No contact in {days} days", 2, "nurture"
|
||||||
|
if reason is None:
|
||||||
|
continue
|
||||||
|
if flagged and tier != 1:
|
||||||
|
reason += " · flagged"
|
||||||
|
items.append({"investor_id": inv_id, "name": name, "reason": reason,
|
||||||
|
"days_since": days, "suggested_type": suggested, "tier": tier})
|
||||||
|
items.sort(key=lambda x: (x["tier"], -x["days_since"]))
|
||||||
|
return items[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def _context(conn, investor_id):
|
||||||
|
"""Assemble the recipient's context. Structured so the model replies to the ACTIVE
|
||||||
|
conversation (the most recent email thread) while still having earlier emails as
|
||||||
|
background. Returns (investor_name, context_text) or (None, None)."""
|
||||||
|
row = conn.execute("SELECT investor_name, notes FROM fundraising_investors WHERE id=?",
|
||||||
|
(investor_id,)).fetchone()
|
||||||
|
if not row:
|
||||||
|
return None, None
|
||||||
|
name = row["investor_name"]
|
||||||
|
header = [f"Investor: {name}"]
|
||||||
|
notes = (row["notes"] or "").strip()
|
||||||
|
if notes:
|
||||||
|
header.append("CRM notes:\n" + notes)
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT e.subject, e.body_text, e.snippet, e.sent_at, e.thread_id FROM emails e "
|
||||||
|
"JOIN email_investor_links l ON l.email_id = e.id "
|
||||||
|
"WHERE l.fundraising_investor_id = ? AND e.is_matched = 1 "
|
||||||
|
"ORDER BY e.sent_at DESC LIMIT 20", (investor_id,)).fetchall()
|
||||||
|
except Exception:
|
||||||
|
rows = [] # email tables may be absent / not yet captured
|
||||||
|
active, background = [], []
|
||||||
|
if rows:
|
||||||
|
active_thread = rows[0]["thread_id"]
|
||||||
|
for em in rows:
|
||||||
|
body = (em["body_text"] or em["snippet"] or "")[:1500].strip()
|
||||||
|
block = f"({(em['sent_at'] or '')[:10]}) {em['subject'] or '(no subject)'}\n{body}"
|
||||||
|
in_active = active_thread is not None and em["thread_id"] == active_thread
|
||||||
|
(active if in_active else background).append(block)
|
||||||
|
sections = ["\n".join(header)]
|
||||||
|
if active:
|
||||||
|
sections.append("=== Active conversation (the most recent thread — this is what you are replying to) ===\n"
|
||||||
|
+ "\n\n".join(reversed(active[:6])))
|
||||||
|
if background:
|
||||||
|
sections.append("=== Earlier emails (background only, not the active thread) ===\n"
|
||||||
|
+ "\n\n".join(background[:4]))
|
||||||
|
return name, "\n\n".join(sections)
|
||||||
|
|
||||||
|
|
||||||
|
# Keyword cues used to pick the sender's prior emails of the SAME PURPOSE as the draft
|
||||||
|
# (so the voice few-shot matches what they're writing, not just whatever is most recent).
|
||||||
|
PURPOSE_PATTERNS = {
|
||||||
|
"intro": ["introduc", "nice to meet", "reaching out", "wanted to connect", "by way of introduction", "e-meet"],
|
||||||
|
"follow_up": ["follow up", "following up", "circle back", "circling back", "checking in",
|
||||||
|
"wanted to revisit", "any thoughts", "wanted to follow", "touching base"],
|
||||||
|
"fund_update": ["update", "progress", "quarter", "deployed", "portfolio", "milestone", "closing", "fund iii"],
|
||||||
|
"meeting_follow_up": ["great to meet", "great speaking", "thanks for the call", "thanks for your time",
|
||||||
|
"after our", "following our", "enjoyed our", "great to connect", "great chatting"],
|
||||||
|
"nurture": ["checking in", "hope you", "thinking of you", "stay in touch", "wanted to share", "thought you"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _voice_examples(conn, sender_email, outreach_type=None, limit=8):
|
||||||
|
"""The sender's OWN sent LP emails OF THE SAME PURPOSE — used as voice few-shot AND
|
||||||
|
surfaced for transparency (no black box). Larger sample, purpose-weighted (not just
|
||||||
|
recent). Returns (blocks_for_model, meta_for_ui); meta is the sender's own emails."""
|
||||||
|
if not sender_email:
|
||||||
|
return [], []
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT subject, body_text, snippet, sent_at, to_emails_json FROM emails "
|
||||||
|
"WHERE LOWER(from_email) = LOWER(?) AND is_matched = 1 "
|
||||||
|
"AND body_text IS NOT NULL AND TRIM(body_text) <> '' "
|
||||||
|
"ORDER BY sent_at DESC LIMIT 80", (sender_email,)).fetchall()
|
||||||
|
except Exception:
|
||||||
|
return [], []
|
||||||
|
pats = PURPOSE_PATTERNS.get(outreach_type or "", [])
|
||||||
|
scored = []
|
||||||
|
for idx, r in enumerate(rows):
|
||||||
|
text = ((r["subject"] or "") + " " + (r["body_text"] or r["snippet"] or "")).lower()
|
||||||
|
score = sum(1 for p in pats if p in text)
|
||||||
|
scored.append((score, -idx, r)) # purpose match first, then more recent
|
||||||
|
scored.sort(key=lambda x: (x[0], x[1]), reverse=True)
|
||||||
|
blocks, meta = [], []
|
||||||
|
for score, _neg_idx, r in scored[:limit]:
|
||||||
|
body = (r["body_text"] or r["snippet"] or "")[:900].strip()
|
||||||
|
if not body:
|
||||||
|
continue
|
||||||
|
blocks.append(f"Example — {r['subject'] or '(no subject)'}\n{body}")
|
||||||
|
to = ""
|
||||||
|
try:
|
||||||
|
arr = json.loads(r["to_emails_json"] or "[]")
|
||||||
|
if arr:
|
||||||
|
to = arr[0].get("email") if isinstance(arr[0], dict) else arr[0]
|
||||||
|
except Exception:
|
||||||
|
to = ""
|
||||||
|
meta.append({"subject": r["subject"] or "(no subject)", "date": (r["sent_at"] or "")[:10],
|
||||||
|
"to": to, "on_topic": score > 0})
|
||||||
|
return blocks, meta
|
||||||
|
|
||||||
|
|
||||||
|
def _draft_with_claude(aa, thesis, type_desc, deident_context, deident_voice, guidance):
|
||||||
|
voice_block = ""
|
||||||
|
if deident_voice:
|
||||||
|
voice_block = ("\n\nHere are examples of how THIS sender actually writes (de-identified). Match their "
|
||||||
|
"voice, tone, sentence rhythm, openers, and sign-off — not just the rules above:\n\n"
|
||||||
|
+ "\n\n---\n\n".join(deident_voice))
|
||||||
|
system = (
|
||||||
|
"You are Ten31's outreach copilot. Draft ONE ready-to-send LP outreach email in the SENDER's voice. "
|
||||||
|
f"VOICE RULES (follow exactly): {aa.VOICE}" + voice_block + "\n\n"
|
||||||
|
"Ten31 invests in critical infrastructure across bitcoin, AI, energy, and freedom technologies. "
|
||||||
|
"The spine: fiat is being debased while AI drives the marginal cost of the reproducible toward "
|
||||||
|
"zero, so durable value accrues to what stays provably scarce, and the monetary premium accrues "
|
||||||
|
"to bitcoin as the apex non-debasable reserve asset. AI is the abundance engine and bitcoin is "
|
||||||
|
"the scarcity anchor. Ten31 owns the scarce links of that one supply chain. This is an "
|
||||||
|
"asset-value and capital-flow conviction, not a claim that the world transacts or settles in "
|
||||||
|
"bitcoin. Current working thesis:\n" + aa._render_thesis(thesis) + "\n\n"
|
||||||
|
"The recipient's context below is DE-IDENTIFIED: people, firms, and amounts appear as placeholders "
|
||||||
|
"like [PERSON_1], [ORG_1], [AMOUNT_1]. Keep every placeholder EXACTLY as written and NEVER invent new "
|
||||||
|
"ones — they are swapped back to real values after you reply. Reply to the ACTIVE conversation; use the "
|
||||||
|
"earlier emails only as background. Output a subject line, then the email body. Do NOT fabricate facts, "
|
||||||
|
"numbers, returns, or commitments that are not present in the context or the thesis.")
|
||||||
|
user = (f"Outreach type: {type_desc}\n\n"
|
||||||
|
f"Recipient context (de-identified):\n{deident_context}\n\n"
|
||||||
|
+ (f"Additional guidance from the sender: {guidance}\n\n" if (guidance or "").strip() else "")
|
||||||
|
+ "Draft the email now.")
|
||||||
|
resp = aa._client().messages.create(
|
||||||
|
model=aa.MODEL, max_tokens=1200,
|
||||||
|
system=[{"type": "text", "text": system, "cache_control": {"type": "ephemeral"}}],
|
||||||
|
messages=[{"role": "user", "content": user}])
|
||||||
|
return "".join(b.text for b in resp.content if getattr(b, "type", None) == "text")
|
||||||
|
|
||||||
|
|
||||||
|
def draft_outreach(conn, investor_id, outreach_type, guidance, db_path, sender_email=None):
|
||||||
|
"""Draft tailored outreach for one investor, in the SENDER's voice (few-shot from
|
||||||
|
their own prior emails). FAILS CLOSED: if the scrub can't be prepared or Claude
|
||||||
|
hallucinates a placeholder, no de-anonymized draft is returned."""
|
||||||
|
name, context = _context(conn, investor_id)
|
||||||
|
if not name:
|
||||||
|
return {"status": "not_found"}
|
||||||
|
type_desc = OUTREACH_TYPES.get(outreach_type, OUTREACH_TYPES["follow_up"])
|
||||||
|
voice_blocks, voice_meta = _voice_examples(conn, sender_email, outreach_type)
|
||||||
|
|
||||||
|
# 1) Scrub the sender's voice examples + the recipient context TOGETHER (shared token
|
||||||
|
# space). The recipient context is free-prose email bodies, so the dictionary+regex
|
||||||
|
# floor is NOT enough — pass the local-Qwen NER backstop (as architect_grounding does)
|
||||||
|
# to tokenize unknown people/firms not in the CRM. FAILS CLOSED: if the local model is
|
||||||
|
# unreachable, _ner_local raises here and no de-anonymized draft is returned.
|
||||||
|
try:
|
||||||
|
sys.path.insert(0, os.path.dirname(_HERE)) # backend/ for the redaction package
|
||||||
|
from redaction.client import Boundary
|
||||||
|
from architect_grounding import _ner_local # local-Qwen NER backstop (sibling module)
|
||||||
|
boundary = Boundary(db_path=db_path, actor="closer", ner_fn=_ner_local)
|
||||||
|
scrubbed = boundary.scrub(list(voice_blocks) + [context], bucket=False, conn=conn)
|
||||||
|
except Exception as exc:
|
||||||
|
return {"status": "scrub_unavailable", "reason": str(exc)}
|
||||||
|
items = scrubbed["items"]
|
||||||
|
deident_voice, deident_target = items[:-1], items[-1]
|
||||||
|
handle = scrubbed["handle"]
|
||||||
|
|
||||||
|
# 2) Claude drafts over the de-identified context + voice + (non-sensitive) thesis.
|
||||||
|
try:
|
||||||
|
import architect_agent as aa
|
||||||
|
thesis = aa.at.get_thesis("core", db=db_path)
|
||||||
|
raw = _draft_with_claude(aa, thesis, type_desc, deident_target, deident_voice, guidance)
|
||||||
|
except Exception as exc:
|
||||||
|
boundary.forget(handle)
|
||||||
|
return {"status": "claude_not_configured", "reason": str(exc)}
|
||||||
|
|
||||||
|
# 3) Re-hydrate locally (strict: a hallucinated placeholder quarantines the draft).
|
||||||
|
rehy = boundary.rehydrate(raw, handle, strict=True, conn=conn)
|
||||||
|
boundary.forget(handle)
|
||||||
|
if rehy.get("error"):
|
||||||
|
return {"status": "rehydrate_failed"}
|
||||||
|
return {"status": "ok", "draft": rehy["text"], "investor_name": name,
|
||||||
|
"scrub_stats": scrubbed.get("stats", {}), "voice_examples": voice_meta}
|
||||||
@@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test the outreach agent's context assembly: it pulls the investor's CRM notes +
|
||||||
|
recent matched email into the de-identifiable context block. Synthetic data only
|
||||||
|
(guardrail #9). The scrub/Claude/rehydrate round-trip is exercised live in the preview.
|
||||||
|
Run: cd backend && python3 mcp/test_outreach.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import outreach_agent as oa # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
db = os.path.join(tempfile.mkdtemp(), "t.db")
|
||||||
|
c = sqlite3.connect(db)
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
c.executescript("""
|
||||||
|
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, notes TEXT);
|
||||||
|
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT, sent_at TEXT,
|
||||||
|
from_email TEXT, to_emails_json TEXT, thread_id TEXT, is_matched INT);
|
||||||
|
CREATE TABLE email_investor_links (id TEXT, email_id TEXT, fundraising_investor_id TEXT);
|
||||||
|
""")
|
||||||
|
c.execute("INSERT INTO fundraising_investors VALUES ('inv1','Harbor & Vine','Met at the conference; interested in Fund III.')")
|
||||||
|
c.executemany("INSERT INTO emails (id,subject,body_text,sent_at,thread_id,is_matched) VALUES (?,?,?,?,?,1)", [
|
||||||
|
("e1", "Re: Fund III", "Thanks for the call. We are still weighing the lock-up terms.", "2026-06-02T10:00:00", "t1"),
|
||||||
|
("e2", "Intro", "Good to meet you at the dinner.", "2026-05-01T10:00:00", "t0"),
|
||||||
|
("e3", "Spam", "ignore me", "2026-04-01T10:00:00", "t9"), # not linked -> excluded
|
||||||
|
])
|
||||||
|
c.executemany("INSERT INTO email_investor_links (id,email_id,fundraising_investor_id) VALUES (?,?, 'inv1')",
|
||||||
|
[("l1", "e1"), ("l2", "e2")])
|
||||||
|
c.commit()
|
||||||
|
|
||||||
|
name, ctx = oa._context(c, "inv1")
|
||||||
|
check(name == "Harbor & Vine", f"resolves investor name (got {name!r})")
|
||||||
|
check("Met at the conference" in ctx, "includes CRM notes")
|
||||||
|
check("lock-up terms" in ctx, "active-thread email present")
|
||||||
|
check("Good to meet you" in ctx, "earlier email present as background")
|
||||||
|
check("ignore me" not in ctx, "excludes email not linked to this investor")
|
||||||
|
check("Active conversation" in ctx and "Earlier emails" in ctx
|
||||||
|
and ctx.index("lock-up terms") < ctx.index("Good to meet you"),
|
||||||
|
"active thread is separated from background, active first")
|
||||||
|
|
||||||
|
# voice examples: the sender's own sent emails (few-shot + transparency)
|
||||||
|
c.execute("INSERT INTO emails (id,subject,body_text,sent_at,from_email,to_emails_json,thread_id,is_matched) "
|
||||||
|
"VALUES ('v1','My note','Hi there, quick update on the fund. Best, Grant',"
|
||||||
|
"'2026-06-01T10:00:00','grant@ten31.xyz','[{\"email\":\"lp@x.example\"}]','tv',1)")
|
||||||
|
c.commit()
|
||||||
|
blocks, meta = oa._voice_examples(c, "grant@ten31.xyz")
|
||||||
|
check(len(blocks) == 1 and "quick update on the fund" in blocks[0], "voice example pulls the sender's own email")
|
||||||
|
check(len(meta) == 1 and meta[0]["subject"] == "My note" and meta[0]["to"] == "lp@x.example",
|
||||||
|
"voice meta carries subject + recipient for transparency")
|
||||||
|
check(oa._voice_examples(c, None) == ([], []), "no sender -> no voice examples")
|
||||||
|
|
||||||
|
n2, c2 = oa._context(c, "missing")
|
||||||
|
check(n2 is None and c2 is None, "unknown investor -> (None, None)")
|
||||||
|
|
||||||
|
# type catalogue is intact
|
||||||
|
check(set(["intro", "follow_up", "fund_update", "meeting_follow_up", "nurture"]) <= set(oa.OUTREACH_TYPES),
|
||||||
|
"outreach types catalogue present")
|
||||||
|
|
||||||
|
# ── follow-up radar ──
|
||||||
|
rc = sqlite3.connect(os.path.join(tempfile.mkdtemp(), "radar.db"))
|
||||||
|
rc.row_factory = sqlite3.Row
|
||||||
|
rc.executescript("""
|
||||||
|
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, follow_up TEXT, graveyard TEXT);
|
||||||
|
CREATE TABLE emails (id TEXT PRIMARY KEY, from_email TEXT, sent_at TEXT, is_matched INT);
|
||||||
|
CREATE TABLE email_investor_links (id TEXT, email_id TEXT, fundraising_investor_id TEXT);
|
||||||
|
""")
|
||||||
|
rc.executemany("INSERT INTO fundraising_investors (id,investor_name,follow_up,graveyard) VALUES (?,?,?,?)", [
|
||||||
|
("owe", "Owe Reply LP", None, None), # they emailed last, 5d ago -> tier 0
|
||||||
|
("warm", "Warm Quiet LP", None, None), # we emailed last, 60d ago -> tier 2
|
||||||
|
("fresh", "Fresh LP", None, None), # we emailed 4d ago -> not surfaced
|
||||||
|
("buried", "Buried LP", None, "1"), # graveyard -> excluded
|
||||||
|
])
|
||||||
|
OURS = "grant@ten31.xyz"
|
||||||
|
em = [
|
||||||
|
("o1", "lp@owe.example", "2026-06-04T10:00:00", "owe"), # inbound, 5 days before 06-09
|
||||||
|
("w1", OURS, "2026-04-10T10:00:00", "warm"), # outbound, ~60 days
|
||||||
|
("w0", "lp@warm.example", "2026-04-01T10:00:00", "warm"), # 2nd email for history
|
||||||
|
("f1", OURS, "2026-06-05T10:00:00", "fresh"), # outbound, 4 days -> too recent
|
||||||
|
("b1", "lp@buried.example", "2026-01-01T10:00:00", "buried"),
|
||||||
|
]
|
||||||
|
for eid, frm, sent, inv in em:
|
||||||
|
rc.execute("INSERT INTO emails (id,from_email,sent_at,is_matched) VALUES (?,?,?,1)", (eid, frm, sent))
|
||||||
|
rc.execute("INSERT INTO email_investor_links (id,email_id,fundraising_investor_id) VALUES (?,?,?)", (eid + "l", eid, inv))
|
||||||
|
rc.commit()
|
||||||
|
radar = oa.follow_up_radar(rc, [OURS], "2026-06-09T00:00:00", warm_days=45)
|
||||||
|
names = [x["name"] for x in radar]
|
||||||
|
check("Owe Reply LP" in names and "Warm Quiet LP" in names, f"surfaces owe-reply + warm-quiet (got {names})")
|
||||||
|
check("Fresh LP" not in names, "recent contact not surfaced")
|
||||||
|
check("Buried LP" not in names, "graveyard excluded")
|
||||||
|
check(radar[0]["name"] == "Owe Reply LP" and radar[0]["tier"] == 0, "owe-a-reply ranked first (tier 0)")
|
||||||
|
owe = next(x for x in radar if x["name"] == "Owe Reply LP")
|
||||||
|
check("owe a reply" in owe["reason"] and owe["suggested_type"] == "follow_up", "owe-reply reason + suggested type")
|
||||||
|
warm = next(x for x in radar if x["name"] == "Warm Quiet LP")
|
||||||
|
check(warm["tier"] == 2 and warm["suggested_type"] == "nurture", "warm-quiet is tier 2, suggests nurture")
|
||||||
|
|
||||||
|
if FAILS:
|
||||||
|
print(f"\nFAILED ({len(FAILS)})")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("\nALL PASS (outreach context assembly)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Regression test for the outreach NER-backstop wiring (v0.1.0:74).
|
||||||
|
|
||||||
|
The outreach draft path scrubs free-prose LP context (CRM notes + email bodies) before
|
||||||
|
it reaches Claude. The dictionary+regex floor only tokenizes KNOWN CRM entities, so an
|
||||||
|
UNKNOWN person/firm mentioned in an email body would otherwise reach Claude in the clear.
|
||||||
|
The v74 fix wired the local-Qwen NER backstop into draft_outreach (outreach_agent.py:
|
||||||
|
`Boundary(..., ner_fn=_ner_local)`) and made it FAIL CLOSED when the local model is down.
|
||||||
|
|
||||||
|
This drives the real draft_outreach with Claude and the NER model stubbed (offline,
|
||||||
|
synthetic — guardrail #9) and proves:
|
||||||
|
(1) an unknown name in an email body is tokenized AWAY from the Claude payload;
|
||||||
|
(2) it is re-hydrated locally so the human still sees the real name;
|
||||||
|
(3) the interaction_log captures no sensitive value;
|
||||||
|
(4) when the local NER model raises (unreachable), the path returns scrub_unavailable
|
||||||
|
and Claude is never called.
|
||||||
|
|
||||||
|
Run: cd backend && python3 mcp/test_outreach_redaction.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
sys.path.insert(0, _HERE) # backend/mcp
|
||||||
|
sys.path.insert(0, os.path.dirname(_HERE)) # backend (for the redaction package)
|
||||||
|
|
||||||
|
import outreach_agent as oa # noqa: E402
|
||||||
|
import architect_grounding as G # noqa: E402
|
||||||
|
import architect_agent as aa # noqa: E402 (imports OK offline; client is lazy)
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
UNKNOWN = "Penelope Ashworth-Vane" # a person in NO CRM table -> only NER can catch her
|
||||||
|
INVESTOR = "Harbor & Vine" # a known org (fundraising_investors) -> dictionary floor
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def make_db():
|
||||||
|
path = os.path.join(tempfile.mkdtemp(), "crm.db")
|
||||||
|
c = sqlite3.connect(path)
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
c.executescript("""
|
||||||
|
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, notes TEXT);
|
||||||
|
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT, sent_at TEXT,
|
||||||
|
from_email TEXT, to_emails_json TEXT, thread_id TEXT, is_matched INT);
|
||||||
|
CREATE TABLE email_investor_links (id TEXT, email_id TEXT, fundraising_investor_id TEXT);
|
||||||
|
CREATE TABLE interaction_log (id TEXT PRIMARY KEY, ts TEXT, actor_type TEXT, actor_id TEXT, action TEXT,
|
||||||
|
target_type TEXT, target_id TEXT, payload TEXT, source TEXT, created_at TEXT);
|
||||||
|
""")
|
||||||
|
c.execute("INSERT INTO fundraising_investors VALUES ('inv1',?,?)",
|
||||||
|
(INVESTOR, "Warm on Fund III; weighing lock-up terms."))
|
||||||
|
# The active-thread email body names an UNKNOWN person in free prose.
|
||||||
|
c.execute("INSERT INTO emails (id,subject,body_text,sent_at,thread_id,is_matched) VALUES "
|
||||||
|
"('e1','Re: Fund III',?,?,'t1',1)",
|
||||||
|
(f"Thanks for the call. My partner {UNKNOWN} still has a lock-up objection.", "2026-06-02T10:00:00"))
|
||||||
|
c.execute("INSERT INTO email_investor_links (id,email_id,fundraising_investor_id) VALUES ('l1','e1','inv1')")
|
||||||
|
c.commit()
|
||||||
|
return path, c
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
db_path, conn = make_db()
|
||||||
|
|
||||||
|
# Stub the thesis fetch (avoid the thesis DB dependency) and Claude. The NER stub stands
|
||||||
|
# in for the local-Qwen model; _draft_with_claude echoes the de-identified text back so
|
||||||
|
# re-hydration is exercised and we can inspect exactly what would have reached Claude.
|
||||||
|
aa.at.get_thesis = lambda *a, **k: {}
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_claude(aa_mod, thesis, type_desc, deident_target, deident_voice, guidance):
|
||||||
|
captured["target"] = deident_target
|
||||||
|
return deident_target # passthrough -> rehydrate must restore the real name
|
||||||
|
|
||||||
|
oa._draft_with_claude = fake_claude
|
||||||
|
G._ner_local = lambda text: [(UNKNOWN, "PERSON")] # local model UP, finds the unknown name
|
||||||
|
|
||||||
|
# ── A) unknown name is tokenized away from Claude, restored locally ──
|
||||||
|
print("\n[A — NER backstop tokenizes an unknown name in outreach]")
|
||||||
|
res = oa.draft_outreach(conn, "inv1", "follow_up", "", db_path, sender_email=None)
|
||||||
|
check(res.get("status") == "ok", f"draft ok (status={res.get('status')})")
|
||||||
|
sent = captured.get("target", "")
|
||||||
|
check(UNKNOWN not in sent, "unknown name absent from the Claude payload (NER tokenized it)")
|
||||||
|
check(INVESTOR not in sent, "known investor org absent from the Claude payload (dictionary floor)")
|
||||||
|
check("lock-up" in sent, "objection substance survives to Claude")
|
||||||
|
check(UNKNOWN in res.get("draft", ""), "unknown name re-hydrated locally for the human")
|
||||||
|
|
||||||
|
blob = " ".join(r[0] for r in conn.execute("SELECT payload FROM interaction_log WHERE payload IS NOT NULL"))
|
||||||
|
check(UNKNOWN not in blob and INVESTOR not in blob, "interaction_log carries NO sensitive value")
|
||||||
|
|
||||||
|
# ── B) FAIL CLOSED: local NER model unreachable -> no Claude call ──
|
||||||
|
print("\n[B — fail closed: local NER model down]")
|
||||||
|
called = {"claude": False}
|
||||||
|
|
||||||
|
def boom(text):
|
||||||
|
raise RuntimeError("Spark Control unreachable")
|
||||||
|
|
||||||
|
G._ner_local = boom
|
||||||
|
oa._draft_with_claude = lambda *a, **k: called.__setitem__("claude", True) or a[3]
|
||||||
|
res2 = oa.draft_outreach(conn, "inv1", "follow_up", "", db_path, sender_email=None)
|
||||||
|
check(res2.get("status") == "scrub_unavailable", f"status scrub_unavailable (got {res2.get('status')})")
|
||||||
|
check(bool(res2.get("reason")), "scrub_unavailable carries the propagated NER failure reason (non-vacuous)")
|
||||||
|
check(called["claude"] is False, "Claude was NOT called when the NER model is down (fail closed)")
|
||||||
|
check("draft" not in res2, "no draft returned when scrub fails closed")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (outreach NER-backstop wiring + fail-closed)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -113,4 +113,8 @@ ALTER TABLE contacts ADD COLUMN deleted_at TEXT;
|
|||||||
ALTER TABLE organizations ADD COLUMN deleted_at TEXT;
|
ALTER TABLE organizations ADD COLUMN deleted_at TEXT;
|
||||||
ALTER TABLE opportunities ADD COLUMN deleted_at TEXT;
|
ALTER TABLE opportunities ADD COLUMN deleted_at TEXT;
|
||||||
ALTER TABLE communications ADD COLUMN deleted_at TEXT;
|
ALTER TABLE communications ADD COLUMN deleted_at TEXT;
|
||||||
ALTER TABLE lp_profiles ADD COLUMN deleted_at TEXT;
|
-- lp_profiles ALTER removed (v0.1.0:104): the lp_profiles table is dropped in
|
||||||
|
-- 0008_drop_retired_tables.sql and is no longer created by init_db(), so this
|
||||||
|
-- ALTER would fail "no such table" on a fresh install. Live DBs already applied
|
||||||
|
-- this migration (with the original ALTER) before lp_profiles was dropped, so
|
||||||
|
-- removing the line here only affects fresh DBs — same end state either way.
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
-- Reversal of 0005_grid_pipeline_link.sql (manual; .down files are never auto-applied).
|
||||||
|
--
|
||||||
|
-- SQLite < 3.35 cannot DROP COLUMN. The added column is nullable and ignored by any code
|
||||||
|
-- path predating it, so leaving it in place is harmless. The index drops freely. On
|
||||||
|
-- SQLite >= 3.35 the column itself may also be dropped.
|
||||||
|
DROP INDEX IF EXISTS idx_opportunities_fr_investor;
|
||||||
|
-- ALTER TABLE opportunities DROP COLUMN fundraising_investor_id; -- SQLite >= 3.35 only
|
||||||
@@ -0,0 +1,22 @@
|
|||||||
|
-- Grid → Pipeline adoption — a durable link from a fundraising-grid investor to its
|
||||||
|
-- Pipeline opportunity row.
|
||||||
|
--
|
||||||
|
-- ADDITIVE + REVERSIBLE (CLAUDE.md guardrail #3): adds one nullable column + index.
|
||||||
|
-- Until now the grid's "Create Opportunity" button fired a one-shot POST with no
|
||||||
|
-- back-reference, so a grid investor could spawn unlimited duplicate opportunities and
|
||||||
|
-- an opp never knew which grid row it belonged to. opportunities.fundraising_investor_id
|
||||||
|
-- records the link (set by the new POST /api/fundraising/pipeline/link endpoint), making
|
||||||
|
-- the relationship dedup-able and reconcilable. "Is this investor in the pipeline?" and
|
||||||
|
-- "what stage?" are then DERIVED from a live join on this column — deliberately not a
|
||||||
|
-- denormalized mirror flag on fundraising_investors, which would only reintroduce the
|
||||||
|
-- two-model drift this CRM exists to fight.
|
||||||
|
--
|
||||||
|
-- fundraising_investor_id is a LOGICAL foreign key to fundraising_investors(id). It is
|
||||||
|
-- intentionally NOT a declared SQLite FOREIGN KEY: opportunities are soft-deleted (never
|
||||||
|
-- hard-deleted) and fundraising_investors rows are rebuilt on every grid save, so there
|
||||||
|
-- is nothing to cascade; SQLite's ALTER TABLE ADD COLUMN cannot add an enforced FK
|
||||||
|
-- cleanly anyway. Nullable so every existing opportunity stays valid — a manually-created,
|
||||||
|
-- non-grid opportunity simply has NULL here.
|
||||||
|
ALTER TABLE opportunities ADD COLUMN fundraising_investor_id TEXT;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_opportunities_fr_investor ON opportunities(fundraising_investor_id);
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
-- Manual rollback for 0006_reminders.sql (never auto-applied).
|
||||||
|
-- Drops the whole reminders feature table. Per the never-hard-delete guardrail this
|
||||||
|
-- discards reminder history, so only run it to reverse a bad migration on a dev/copy DB.
|
||||||
|
DROP INDEX IF EXISTS idx_reminders_assignee;
|
||||||
|
DROP INDEX IF EXISTS idx_reminders_due;
|
||||||
|
DROP INDEX IF EXISTS idx_reminders_status;
|
||||||
|
DROP INDEX IF EXISTS idx_reminders_investor;
|
||||||
|
DROP TABLE IF EXISTS reminders;
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
-- Reminders & follow-ups — a real tickler/task model tied to the fundraising grid.
|
||||||
|
--
|
||||||
|
-- ADDITIVE + REVERSIBLE (CLAUDE.md guardrail #3): one new table + indexes; nothing
|
||||||
|
-- existing is touched. Until now the only follow-up surfaces were the grid's binary
|
||||||
|
-- `follow_up` checkbox (no date, owner, or status) and communications.next_action_date
|
||||||
|
-- (tied to a single logged comm). This gives investors first-class reminders with a due
|
||||||
|
-- date, status lifecycle, assignee, and provenance — the foundation for "who needs a
|
||||||
|
-- follow-up?" queries, the daily digest's due/overdue section, and (later) bot-proposed
|
||||||
|
-- reminders behind the Matrix approval gate.
|
||||||
|
--
|
||||||
|
-- investor_id is a LOGICAL foreign key to fundraising_investors(id) — deliberately NOT a
|
||||||
|
-- declared SQLite FOREIGN KEY, matching opportunities.fundraising_investor_id (migration
|
||||||
|
-- 0005). fundraising_investors rows are upserted by source_row_id on every grid save with
|
||||||
|
-- a STABLE id (so the link survives saves), but a row dropped from the grid is DELETEd —
|
||||||
|
-- there is nothing to cascade, and reconcile_grid_reminders() cancels the orphans on the
|
||||||
|
-- next save (the pipeline reconciler's twin). investor_name is denormalized so a reminder
|
||||||
|
-- stays readable in history even after its grid row is gone. investor_id is nullable: a
|
||||||
|
-- reminder can be a standalone team task not tied to any investor.
|
||||||
|
--
|
||||||
|
-- contact_id is an optional logical FK to contacts(id) (the specific person). assignee_id
|
||||||
|
-- is a logical ref to users(id) (NULL = team-wide). created_by holds a users.id OR a
|
||||||
|
-- non-user sentinel ('bot'/'automation'), so it is plain TEXT with no FK.
|
||||||
|
CREATE TABLE IF NOT EXISTS reminders (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
investor_id TEXT, -- logical FK -> fundraising_investors.id (NULL = standalone task)
|
||||||
|
investor_name TEXT, -- denormalized; survives grid-row deletion
|
||||||
|
contact_id TEXT, -- optional logical FK -> contacts.id
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
details TEXT,
|
||||||
|
due_date TEXT, -- ISO date 'YYYY-MM-DD' (or datetime)
|
||||||
|
status TEXT NOT NULL DEFAULT 'open', -- open | done | snoozed | cancelled
|
||||||
|
snoozed_until TEXT,
|
||||||
|
assignee_id TEXT, -- logical ref -> users.id; NULL = team-wide
|
||||||
|
created_by TEXT, -- users.id, or 'bot' / 'automation'
|
||||||
|
source TEXT NOT NULL DEFAULT 'human', -- human | bot | automation
|
||||||
|
completed_at TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now')),
|
||||||
|
deleted_at TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reminders_investor ON reminders(investor_id) WHERE deleted_at IS NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reminders_status ON reminders(status) WHERE deleted_at IS NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reminders_due ON reminders(due_date) WHERE deleted_at IS NULL;
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_reminders_assignee ON reminders(assignee_id) WHERE deleted_at IS NULL;
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
-- Reversal of 0007_pipeline_stages_v2.sql (manual; .down files are never auto-applied).
|
||||||
|
--
|
||||||
|
-- BEST-EFFORT: the 6->4 stage collapse is lossy and cannot be perfectly inverted (the
|
||||||
|
-- pattern other .down files here share -- e.g. 0005 cannot DROP COLUMN on old SQLite). It
|
||||||
|
-- restores VALID legacy 6-stage values, choosing a representative for each collapsed pair:
|
||||||
|
-- engaged was outreach OR meeting -> 'meeting' (representative)
|
||||||
|
-- diligence -> 'due_diligence' (exact)
|
||||||
|
-- commitment was committed OR funded -> 'committed' (representative)
|
||||||
|
-- Opportunities archived from the stray 'lost' value still carry stage = 'lost' but cannot be
|
||||||
|
-- re-identified as "archived by this migration" vs archived for other reasons, so they are
|
||||||
|
-- left archived; un-archive (clear deleted_at) manually if a rollback truly needs them back.
|
||||||
|
UPDATE opportunities SET stage = 'meeting' WHERE stage = 'engaged';
|
||||||
|
UPDATE opportunities SET stage = 'due_diligence' WHERE stage = 'diligence';
|
||||||
|
UPDATE opportunities SET stage = 'committed' WHERE stage = 'commitment';
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
-- Pipeline funnel v2 — collapse the inherited 6-stage opportunity funnel into the locked
|
||||||
|
-- 4-stage per-investor funnel: lead -> engaged -> diligence -> commitment, terminal at
|
||||||
|
-- commitment. See ROADMAP "Pipeline stages + investor flags/labels -- LOCKED SPEC" (2026-06-19)
|
||||||
|
-- and server.PIPELINE_STAGES.
|
||||||
|
--
|
||||||
|
-- DATA-ONLY + DEPLOYMENT-STATE-INVARIANT (migrations guide): targets stage values
|
||||||
|
-- structurally, so it is a no-op on a fresh DB (no opportunities) and remaps deterministically
|
||||||
|
-- on a populated one.
|
||||||
|
-- outreach, meeting -> engaged (a two-way conversation has begun; "meeting" was an
|
||||||
|
-- activity, not a position, so it folds in here)
|
||||||
|
-- due_diligence -> diligence
|
||||||
|
-- committed, funded -> commitment (terminal; post-commit $ lives in the grid fund cell,
|
||||||
|
-- and fund admin owns post-commitment -- no "funded" stage)
|
||||||
|
UPDATE opportunities SET stage = 'engaged' WHERE stage IN ('outreach', 'meeting');
|
||||||
|
UPDATE opportunities SET stage = 'diligence' WHERE stage = 'due_diligence';
|
||||||
|
UPDATE opportunities SET stage = 'commitment' WHERE stage IN ('committed', 'funded');
|
||||||
|
|
||||||
|
-- The stray legacy 'lost' value is not in the new settable enum, and a lost deal is a dead
|
||||||
|
-- deal: ARCHIVE (soft-delete) the opportunity rather than leave an un-settable stage on a live
|
||||||
|
-- row. The grid investor row is left fully intact (the grid is canonical); graveyarding the
|
||||||
|
-- investor stays a human action, never an auto-mutation (human-in-the-loop guardrail). The
|
||||||
|
-- stage text is left as 'lost' on the archived row for provenance -- it is filtered out
|
||||||
|
-- everywhere by deleted_at IS NULL.
|
||||||
|
UPDATE opportunities SET deleted_at = datetime('now'), updated_at = datetime('now')
|
||||||
|
WHERE stage = 'lost' AND deleted_at IS NULL;
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
-- 0008_drop_retired_tables.down.sql (manual rollback only — never auto-applied)
|
||||||
|
--
|
||||||
|
-- Recreates the two dropped tables as EMPTY shells, matching the schema that existed
|
||||||
|
-- immediately before 0008 (lp_profiles includes the deleted_at column that migration
|
||||||
|
-- 0001 had added). Data is not restored — both tables were empty when dropped.
|
||||||
|
CREATE TABLE IF NOT EXISTS lp_profiles (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
contact_id TEXT NOT NULL UNIQUE REFERENCES contacts(id) ON DELETE CASCADE,
|
||||||
|
commitment_amount REAL DEFAULT 0,
|
||||||
|
funded_amount REAL DEFAULT 0,
|
||||||
|
commitment_date TEXT,
|
||||||
|
fund_name TEXT,
|
||||||
|
investor_type TEXT,
|
||||||
|
accredited INTEGER DEFAULT 0,
|
||||||
|
legal_docs_signed INTEGER DEFAULT 0,
|
||||||
|
signed_date TEXT,
|
||||||
|
wire_received INTEGER DEFAULT 0,
|
||||||
|
wire_date TEXT,
|
||||||
|
k1_sent INTEGER DEFAULT 0,
|
||||||
|
preferred_communication TEXT DEFAULT 'email',
|
||||||
|
notes TEXT,
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now')),
|
||||||
|
deleted_at TEXT
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_lp_profiles_contact ON lp_profiles(contact_id);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS feature_requests (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
page TEXT,
|
||||||
|
category TEXT DEFAULT 'general',
|
||||||
|
priority TEXT DEFAULT 'medium',
|
||||||
|
status TEXT DEFAULT 'new',
|
||||||
|
requested_by TEXT,
|
||||||
|
requested_by_user_id TEXT REFERENCES users(id),
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
updated_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_feature_requests_status ON feature_requests(status);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_feature_requests_created_at ON feature_requests(created_at);
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
-- 0008_drop_retired_tables.sql (v0.1.0:104)
|
||||||
|
--
|
||||||
|
-- ONE-OFF DESTRUCTIVE EXCEPTION to the never-hard-delete rule, explicitly approved.
|
||||||
|
-- Both tables are EMPTY and fully removed from the application code:
|
||||||
|
-- * lp_profiles — the legacy single-fund LP model, retired v0.1.0:78; the
|
||||||
|
-- fundraising_* grid is the canonical commitment record now.
|
||||||
|
-- * feature_requests — backed the in-app Feedback page, which was removed.
|
||||||
|
--
|
||||||
|
-- The never-hard-delete policy STILL STANDS for all real CRM and thesis data — this
|
||||||
|
-- is a deliberate, documented exception for two empty, retired tables so they don't
|
||||||
|
-- linger as dead schema. init_db() no longer creates either table, and migration
|
||||||
|
-- 0001's lp_profiles ALTER was removed, so a fresh DB never creates them and this
|
||||||
|
-- DROP is a harmless no-op there; on the live box it removes the existing empties.
|
||||||
|
DROP TABLE IF EXISTS lp_profiles;
|
||||||
|
DROP TABLE IF EXISTS feature_requests;
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
"""nl_query — the safe, read-only natural-language query surface (W2).
|
||||||
|
|
||||||
|
The LLM's job (added later) is only to map a question to a {intent, slots} pair; everything
|
||||||
|
that touches the database lives here behind a strict validator and a fixed, hand-written,
|
||||||
|
parameterized query catalog. See runner.py (the trust boundary) and intents.py (the catalog).
|
||||||
|
"""
|
||||||
|
from .runner import run_query, validate, catalog # noqa: F401
|
||||||
|
from .intents import INTENTS # noqa: F401
|
||||||
|
from .translate import translate, answer, build_system # noqa: F401
|
||||||
@@ -0,0 +1,440 @@
|
|||||||
|
"""NL-query intents — the curated, hand-written query catalog (W2, the safe core).
|
||||||
|
|
||||||
|
Each intent is a FIXED, reviewed, parameterized SQL query with a small set of typed
|
||||||
|
"slots" (the blanks a question fills in: a number of days, a name, a limit). There is NO
|
||||||
|
generic SQL/AST compiler and NO dynamically-built identifiers: every table and column name
|
||||||
|
is hardcoded in the query text, and every value the caller (or an LLM) supplies reaches
|
||||||
|
SQLite only as a bound `?` parameter. That is the whole trust model — a malformed or
|
||||||
|
hostile request can change a bound value, never the query structure. Adding a capability
|
||||||
|
means adding a reviewed entry here, not widening a language.
|
||||||
|
|
||||||
|
Soft-delete discipline (CLAUDE.md standing rule), per table:
|
||||||
|
- reminders / opportunities / communications carry `deleted_at` -> filter `deleted_at IS NULL`.
|
||||||
|
- emails have NO `deleted_at`; "live" means a non-tombstoned per-mailbox sighting exists
|
||||||
|
(`email_account_messages.deleted_at IS NULL`) — mirror the digest / query_email_activity.
|
||||||
|
- fundraising_investors/_contacts/_funds/_commitments are a HARD-REBUILT projection of the
|
||||||
|
grid blob with NO `deleted_at` column; the live/retired axis there is the `graveyard` flag.
|
||||||
|
Do NOT add `deleted_at IS NULL` to those tables — the column does not exist and the clause
|
||||||
|
would raise. Exclude `graveyard = 1` where the question means "live" investors.
|
||||||
|
|
||||||
|
Each run_* returns {columns, rows, summary, truncated}. `summary` is a DETERMINISTIC local
|
||||||
|
one-liner (never an LLM narrative) — results never leave the box to be summarized.
|
||||||
|
"""
|
||||||
|
import sqlite3
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Generous ceiling — the Matrix review room is two admins and the web app is internal, so
|
||||||
|
# dumping the full book is acceptable (per Grant); this only guards against an unbounded
|
||||||
|
# scan flooding a response. A list intent past this is reported truncated, never silently cut.
|
||||||
|
MAX_ROWS = 500
|
||||||
|
|
||||||
|
# 4-stage per-investor funnel in order, terminal at 'commitment' (mirrors server.PIPELINE_STAGES).
|
||||||
|
# Kept here so the pipeline intents have a stable rank without importing the server module
|
||||||
|
# (helpers take a conn; they never import server — house convention).
|
||||||
|
_STAGE_ORDER = ['lead', 'engaged', 'diligence', 'commitment']
|
||||||
|
_STAGE_RANK_SQL = (
|
||||||
|
"CASE stage WHEN 'lead' THEN 1 WHEN 'engaged' THEN 2 "
|
||||||
|
"WHEN 'diligence' THEN 3 WHEN 'commitment' THEN 4 ELSE 0 END")
|
||||||
|
|
||||||
|
|
||||||
|
# ── helpers ────────────────────────────────────────────────────────────────────────────
|
||||||
|
def _rows(cur):
|
||||||
|
"""Materialize a cursor as a list of plain dicts, independent of the connection's
|
||||||
|
row_factory (works whether rows come back as tuples or sqlite3.Row)."""
|
||||||
|
cols = [c[0] for c in cur.description]
|
||||||
|
return [dict(zip(cols, r)) for r in cur.fetchall()]
|
||||||
|
|
||||||
|
|
||||||
|
def like_contains(value):
|
||||||
|
"""Build a safe LIKE pattern for a free-text contains match. Escapes the LIKE
|
||||||
|
wildcards so a user/LLM value of '%' or '_' is treated literally — paired with
|
||||||
|
`LIKE ? ESCAPE '\\'` in the SQL, this stops '%' from matching the entire table."""
|
||||||
|
v = value.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
||||||
|
return f"%{v}%"
|
||||||
|
|
||||||
|
|
||||||
|
def _last_activity_by_investor(conn):
|
||||||
|
"""{fundraising_investors.id: latest activity ISO ts} across logged communications and
|
||||||
|
captured grid-linked emails — the per-investor recency signal behind the "gone quiet"
|
||||||
|
and "last contact" intents.
|
||||||
|
|
||||||
|
NB: this MIRRORS server.last_activity_by_investor() and its soft-delete joins (comms via
|
||||||
|
cm.deleted_at IS NULL; email via a live email_account_messages sighting). It is duplicated
|
||||||
|
rather than imported only to keep this module free of a server import (the main module runs
|
||||||
|
as __main__, so `import server` would re-execute it). Keep the two in sync; the soft-delete
|
||||||
|
test guards this copy. If a third caller appears, extract both to a shared module."""
|
||||||
|
out = {}
|
||||||
|
|
||||||
|
def _bump(inv_id, ts):
|
||||||
|
if inv_id and ts and (out.get(inv_id) is None or str(ts) > str(out[inv_id])):
|
||||||
|
out[inv_id] = ts
|
||||||
|
|
||||||
|
# Each leg is guarded: the comms/email tables can be absent on a minimal DB. This is a
|
||||||
|
# narrow, intentional tolerance for optional tables — NOT the broad error-swallowing the
|
||||||
|
# runner forbids (a failure in an intent's main query surfaces as query_failed).
|
||||||
|
try:
|
||||||
|
for r in conn.execute(
|
||||||
|
"SELECT fc.investor_id AS inv, MAX(cm.communication_date) AS last_ts "
|
||||||
|
"FROM communications cm JOIN fundraising_contacts fc ON fc.contact_id = cm.contact_id "
|
||||||
|
"WHERE cm.deleted_at IS NULL AND fc.contact_id IS NOT NULL GROUP BY fc.investor_id"):
|
||||||
|
_bump(r["inv"], r["last_ts"])
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
for r in conn.execute(
|
||||||
|
"SELECT eil.fundraising_investor_id AS inv, MAX(e.sent_at) AS last_ts "
|
||||||
|
"FROM email_investor_links eil JOIN emails e ON e.id = eil.email_id "
|
||||||
|
"WHERE eil.fundraising_investor_id IS NOT NULL AND EXISTS "
|
||||||
|
"(SELECT 1 FROM email_account_messages eam WHERE eam.email_id = e.id "
|
||||||
|
"AND eam.deleted_at IS NULL) GROUP BY eil.fundraising_investor_id"):
|
||||||
|
_bump(r["inv"], r["last_ts"])
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
pass
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _today():
|
||||||
|
return datetime.utcnow().date()
|
||||||
|
|
||||||
|
|
||||||
|
def _days_since(ts):
|
||||||
|
"""Whole days between an ISO date/datetime string and today (UTC). None if unparseable."""
|
||||||
|
if not ts:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
d = datetime.fromisoformat(str(ts)[:10].replace("Z", "")).date()
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
return (_today() - d).days
|
||||||
|
|
||||||
|
|
||||||
|
def _own_addresses(conn):
|
||||||
|
try:
|
||||||
|
return {(r[0] or "").lower().strip()
|
||||||
|
for r in conn.execute("SELECT email_address FROM email_accounts")} - {""}
|
||||||
|
except sqlite3.OperationalError:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
|
||||||
|
def _truncate(rows):
|
||||||
|
"""Apply the global ceiling, returning (rows, truncated)."""
|
||||||
|
if len(rows) > MAX_ROWS:
|
||||||
|
return rows[:MAX_ROWS], True
|
||||||
|
return rows, False
|
||||||
|
|
||||||
|
|
||||||
|
# ── investor intents ─────────────────────────────────────────────────────────────────────
|
||||||
|
def run_investors_cold(conn, slots):
|
||||||
|
"""Live investors not contacted in `days` days — never-contacted first, then oldest."""
|
||||||
|
days = slots["days"]
|
||||||
|
cutoff = (_today() - timedelta(days=days)).isoformat()
|
||||||
|
last = _last_activity_by_investor(conn)
|
||||||
|
invs = _rows(conn.execute(
|
||||||
|
"SELECT id, investor_name, lead, total_invested FROM fundraising_investors "
|
||||||
|
"WHERE graveyard = 0 ORDER BY investor_name"))
|
||||||
|
cold = []
|
||||||
|
for inv in invs:
|
||||||
|
ts = last.get(inv["id"])
|
||||||
|
if ts is None or str(ts)[:10] < cutoff:
|
||||||
|
cold.append({"investor_name": inv["investor_name"], "lead": inv["lead"],
|
||||||
|
"total_invested": inv["total_invested"],
|
||||||
|
"last_activity_at": ts, "days_since": _days_since(ts)})
|
||||||
|
# never-contacted (days_since None) first, then most-stale first
|
||||||
|
cold.sort(key=lambda r: (r["days_since"] is not None, -(r["days_since"] or 0)))
|
||||||
|
rows, trunc = _truncate(cold)
|
||||||
|
return {"columns": ["investor_name", "lead", "total_invested", "last_activity_at", "days_since"],
|
||||||
|
"rows": rows, "truncated": trunc,
|
||||||
|
"summary": f"{len(cold)} live investor(s) not contacted in {days}+ days."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_investor_lookup(conn, slots):
|
||||||
|
"""One investor's profile: contacts (name/email/title/city), committed total, per-fund
|
||||||
|
commitments, lead. Name matched as a contains (an LLM/user may pass a partial)."""
|
||||||
|
pat = like_contains(slots["name"])
|
||||||
|
invs = _rows(conn.execute(
|
||||||
|
"SELECT id, investor_name, lead, lead_source, total_invested, follow_up, graveyard "
|
||||||
|
"FROM fundraising_investors WHERE investor_name LIKE ? ESCAPE '\\' "
|
||||||
|
"ORDER BY graveyard, investor_name LIMIT 25", (pat,)))
|
||||||
|
for inv in invs:
|
||||||
|
inv["contacts"] = _rows(conn.execute(
|
||||||
|
"SELECT full_name, email, title, city, state, country FROM fundraising_contacts "
|
||||||
|
"WHERE investor_id = ? ORDER BY sort_order, full_name", (inv["id"],)))
|
||||||
|
inv["commitments"] = _rows(conn.execute(
|
||||||
|
"SELECT f.fund_name, c.amount FROM fundraising_commitments c "
|
||||||
|
"JOIN fundraising_funds f ON f.id = c.fund_id WHERE c.investor_id = ? AND c.amount <> 0 "
|
||||||
|
"ORDER BY f.display_order", (inv["id"],)))
|
||||||
|
inv.pop("id", None)
|
||||||
|
return {"columns": ["investor_name", "lead", "lead_source", "total_invested",
|
||||||
|
"follow_up", "graveyard", "contacts", "commitments"],
|
||||||
|
"rows": invs, "truncated": False,
|
||||||
|
"summary": f"{len(invs)} investor(s) matching \"{slots['name']}\"."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_investors_by_city(conn, slots):
|
||||||
|
"""Investors with a contact located in `city` (contains match on the contact's city)."""
|
||||||
|
pat = like_contains(slots["city"])
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT i.investor_name, c.full_name AS contact, c.city, c.state, c.country, i.lead "
|
||||||
|
"FROM fundraising_contacts c JOIN fundraising_investors i ON i.id = c.investor_id "
|
||||||
|
"WHERE i.graveyard = 0 AND c.city LIKE ? ESCAPE '\\' "
|
||||||
|
"ORDER BY i.investor_name, c.full_name LIMIT ?", (pat, MAX_ROWS + 1)))
|
||||||
|
rows, trunc = _truncate(rows)
|
||||||
|
return {"columns": ["investor_name", "contact", "city", "state", "country", "lead"],
|
||||||
|
"rows": rows, "truncated": trunc,
|
||||||
|
"summary": f"{len(rows)} investor contact(s) in \"{slots['city']}\"."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_investors_by_lead(conn, slots):
|
||||||
|
"""Live investors owned by a given lead/team member (contains match on `lead`)."""
|
||||||
|
pat = like_contains(slots["lead"])
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT investor_name, lead, total_invested, follow_up FROM fundraising_investors "
|
||||||
|
"WHERE graveyard = 0 AND lead LIKE ? ESCAPE '\\' "
|
||||||
|
"ORDER BY total_invested DESC, investor_name LIMIT ?", (pat, MAX_ROWS + 1)))
|
||||||
|
rows, trunc = _truncate(rows)
|
||||||
|
return {"columns": ["investor_name", "lead", "total_invested", "follow_up"],
|
||||||
|
"rows": rows, "truncated": trunc,
|
||||||
|
"summary": f"{len(rows)} live investor(s) led by \"{slots['lead']}\"."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_top_investors_committed(conn, slots):
|
||||||
|
"""Top `limit` live investors by total committed capital across all funds."""
|
||||||
|
n = slots["limit"]
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT investor_name, total_invested, lead FROM fundraising_investors "
|
||||||
|
"WHERE graveyard = 0 AND total_invested > 0 "
|
||||||
|
"ORDER BY total_invested DESC, investor_name LIMIT ?", (n,)))
|
||||||
|
return {"columns": ["investor_name", "total_invested", "lead"], "rows": rows,
|
||||||
|
"truncated": False, "summary": f"Top {len(rows)} investor(s) by committed capital."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_investors_follow_up(conn, slots):
|
||||||
|
"""Investors we owe a follow-up to: those with an OPEN reminder, overdue first. Uses the
|
||||||
|
W1 reminders table (the richer follow-up layer) joined to the current grid name."""
|
||||||
|
today = _today().isoformat()
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT COALESCE(i.investor_name, r.investor_name) AS investor_name, r.title, "
|
||||||
|
"r.due_date, r.status, r.assignee_id, "
|
||||||
|
"CASE WHEN r.due_date IS NOT NULL AND substr(r.due_date,1,10) < ? THEN 1 ELSE 0 END AS overdue "
|
||||||
|
"FROM reminders r LEFT JOIN fundraising_investors i ON i.id = r.investor_id "
|
||||||
|
"WHERE r.deleted_at IS NULL AND r.status = 'open' AND r.investor_id IS NOT NULL "
|
||||||
|
"ORDER BY (r.due_date IS NULL), r.due_date ASC LIMIT ?", (today, MAX_ROWS + 1)))
|
||||||
|
rows, trunc = _truncate(rows)
|
||||||
|
return {"columns": ["investor_name", "title", "due_date", "status", "overdue"],
|
||||||
|
"rows": rows, "truncated": trunc,
|
||||||
|
"summary": f"{len(rows)} investor(s) with an open follow-up reminder."}
|
||||||
|
|
||||||
|
|
||||||
|
# ── pipeline intents ──────────────────────────────────────────────────────────────────────
|
||||||
|
def run_pipeline_top(conn, slots):
|
||||||
|
"""Top `limit` live pipeline opportunities by stage (furthest along first), with the
|
||||||
|
investor, owner, and most-recent activity."""
|
||||||
|
n = slots["limit"]
|
||||||
|
last = _last_activity_by_investor(conn)
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT o.fundraising_investor_id AS inv_id, "
|
||||||
|
"COALESCE(i.investor_name, o.name) AS investor_name, o.stage, o.expected_amount, "
|
||||||
|
"o.probability, u.full_name AS owner FROM opportunities o "
|
||||||
|
"LEFT JOIN fundraising_investors i ON i.id = o.fundraising_investor_id "
|
||||||
|
"LEFT JOIN users u ON u.id = o.owner_id "
|
||||||
|
"WHERE o.deleted_at IS NULL "
|
||||||
|
f"ORDER BY {_STAGE_RANK_SQL} DESC, o.expected_amount DESC LIMIT ?", (n,)))
|
||||||
|
for r in rows:
|
||||||
|
r["last_activity_at"] = last.get(r.pop("inv_id"))
|
||||||
|
return {"columns": ["investor_name", "stage", "expected_amount", "probability", "owner",
|
||||||
|
"last_activity_at"],
|
||||||
|
"rows": rows, "truncated": False,
|
||||||
|
"summary": f"Top {len(rows)} live pipeline opportunit(ies) by stage."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_pipeline_totals(conn, slots):
|
||||||
|
"""Total pipeline dollars and the split across each stage."""
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT stage, COUNT(*) AS count, COALESCE(SUM(expected_amount),0) AS expected_total, "
|
||||||
|
"COALESCE(SUM(commitment_amount),0) AS committed_total FROM opportunities "
|
||||||
|
f"WHERE deleted_at IS NULL GROUP BY stage ORDER BY {_STAGE_RANK_SQL}"))
|
||||||
|
total = sum(r["expected_total"] for r in rows)
|
||||||
|
count = sum(r["count"] for r in rows)
|
||||||
|
return {"columns": ["stage", "count", "expected_total", "committed_total"],
|
||||||
|
"rows": rows, "truncated": False,
|
||||||
|
"summary": f"${total:,.0f} expected across {count} live opportunit(ies) in "
|
||||||
|
f"{len(rows)} stage(s)."}
|
||||||
|
|
||||||
|
|
||||||
|
# ── email / communication intents ─────────────────────────────────────────────────────────
|
||||||
|
def run_recent_emails(conn, slots):
|
||||||
|
"""The most recent `limit` matched investor emails, optionally one direction.
|
||||||
|
Matched-only + soft-delete-correct (a live email_account_messages sighting), mirroring
|
||||||
|
the Communications panel's query_email_activity."""
|
||||||
|
n, direction = slots["limit"], slots["direction"]
|
||||||
|
where = ["EXISTS (SELECT 1 FROM email_account_messages eam WHERE eam.email_id = e.id "
|
||||||
|
"AND eam.deleted_at IS NULL)",
|
||||||
|
"EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id)"]
|
||||||
|
params = []
|
||||||
|
own = _own_addresses(conn)
|
||||||
|
if direction in ("inbound", "outbound") and own:
|
||||||
|
op = "IN" if direction == "outbound" else "NOT IN"
|
||||||
|
where.append(f"LOWER(e.from_email) {op} ({','.join('?' for _ in own)})")
|
||||||
|
params.extend(sorted(own))
|
||||||
|
sql = ("SELECT e.subject, e.from_name, e.from_email, e.sent_at, "
|
||||||
|
"(SELECT fi.investor_name FROM email_investor_links l "
|
||||||
|
" JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id "
|
||||||
|
" WHERE l.email_id = e.id AND l.fundraising_investor_id IS NOT NULL LIMIT 1) AS investor "
|
||||||
|
"FROM emails e WHERE " + " AND ".join(where) + " ORDER BY e.sent_at DESC LIMIT ?")
|
||||||
|
rows = _rows(conn.execute(sql, params + [n]))
|
||||||
|
label = {"inbound": "received", "outbound": "sent"}.get(direction, "")
|
||||||
|
return {"columns": ["sent_at", "subject", "from_name", "from_email", "investor"],
|
||||||
|
"rows": rows, "truncated": False,
|
||||||
|
"summary": f"{len(rows)} most-recent {label + ' ' if label else ''}investor email(s)."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_investor_last_contact(conn, slots):
|
||||||
|
"""When we last had any activity with investor X (matched by name)."""
|
||||||
|
pat = like_contains(slots["name"])
|
||||||
|
last = _last_activity_by_investor(conn)
|
||||||
|
invs = _rows(conn.execute(
|
||||||
|
"SELECT id, investor_name FROM fundraising_investors "
|
||||||
|
"WHERE investor_name LIKE ? ESCAPE '\\' ORDER BY graveyard, investor_name LIMIT 25", (pat,)))
|
||||||
|
rows = []
|
||||||
|
for inv in invs:
|
||||||
|
ts = last.get(inv["id"])
|
||||||
|
rows.append({"investor_name": inv["investor_name"], "last_activity_at": ts,
|
||||||
|
"days_since": _days_since(ts)})
|
||||||
|
return {"columns": ["investor_name", "last_activity_at", "days_since"], "rows": rows,
|
||||||
|
"truncated": False, "summary": f"Last contact for {len(rows)} investor(s) "
|
||||||
|
f"matching \"{slots['name']}\"."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_comms_by_user(conn, slots):
|
||||||
|
"""The most recent `limit` outbound **investor** emails sent by a given user (matched by
|
||||||
|
username or full name). MATCHED-ONLY: restricted to investor-linked email (an
|
||||||
|
email_investor_links row exists), mirroring query_email_activity / recent_emails — NOT the
|
||||||
|
user's entire sent corpus (internal/vendor/personal mail is captured but never surfaced
|
||||||
|
here). Soft-delete-correct (live sighting, is_sent)."""
|
||||||
|
n, pat = slots["limit"], like_contains(slots["user"])
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT e.subject, e.sent_at, u.full_name AS sender, "
|
||||||
|
"(SELECT fi.investor_name FROM email_investor_links l "
|
||||||
|
" JOIN fundraising_investors fi ON fi.id = l.fundraising_investor_id "
|
||||||
|
" WHERE l.email_id = e.id AND l.fundraising_investor_id IS NOT NULL LIMIT 1) AS investor "
|
||||||
|
"FROM emails e JOIN email_account_messages eam ON eam.email_id = e.id "
|
||||||
|
"AND eam.deleted_at IS NULL AND eam.is_sent = 1 "
|
||||||
|
"JOIN email_accounts ea ON ea.id = eam.account_id JOIN users u ON u.id = ea.user_id "
|
||||||
|
"WHERE (u.username LIKE ? ESCAPE '\\' OR u.full_name LIKE ? ESCAPE '\\') "
|
||||||
|
"AND EXISTS (SELECT 1 FROM email_investor_links l2 WHERE l2.email_id = e.id) "
|
||||||
|
"ORDER BY e.sent_at DESC LIMIT ?", (pat, pat, n)))
|
||||||
|
return {"columns": ["sent_at", "subject", "sender", "investor"], "rows": rows,
|
||||||
|
"truncated": False,
|
||||||
|
"summary": f"{len(rows)} recent email(s) sent by \"{slots['user']}\"."}
|
||||||
|
|
||||||
|
|
||||||
|
def run_email_counts_by_user(conn, slots):
|
||||||
|
"""Per-user counts of outbound **investor** emails over this week / month / year-to-date.
|
||||||
|
MATCHED-ONLY: counts only investor-linked email (an email_investor_links row exists),
|
||||||
|
mirroring query_email_activity / recent_emails — not the user's entire sent corpus.
|
||||||
|
Windows are calendar-based: week = since Monday, month = since the 1st, ytd = since Jan 1."""
|
||||||
|
today = _today()
|
||||||
|
wk = (today - timedelta(days=today.weekday())).isoformat()
|
||||||
|
mo = today.replace(day=1).isoformat()
|
||||||
|
yr = today.replace(month=1, day=1).isoformat()
|
||||||
|
where = ("WHERE eam.deleted_at IS NULL AND eam.is_sent = 1 "
|
||||||
|
"AND EXISTS (SELECT 1 FROM email_investor_links l WHERE l.email_id = e.id)")
|
||||||
|
params = [wk, mo, yr]
|
||||||
|
if slots.get("user"):
|
||||||
|
pat = like_contains(slots["user"])
|
||||||
|
where += " AND (u.username LIKE ? ESCAPE '\\' OR u.full_name LIKE ? ESCAPE '\\')"
|
||||||
|
params.extend([pat, pat])
|
||||||
|
rows = _rows(conn.execute(
|
||||||
|
"SELECT u.full_name AS user, u.username, "
|
||||||
|
"SUM(CASE WHEN substr(e.sent_at,1,10) >= ? THEN 1 ELSE 0 END) AS this_week, "
|
||||||
|
"SUM(CASE WHEN substr(e.sent_at,1,10) >= ? THEN 1 ELSE 0 END) AS this_month, "
|
||||||
|
"SUM(CASE WHEN substr(e.sent_at,1,10) >= ? THEN 1 ELSE 0 END) AS ytd "
|
||||||
|
"FROM users u JOIN email_accounts ea ON ea.user_id = u.id "
|
||||||
|
"JOIN email_account_messages eam ON eam.account_id = ea.id "
|
||||||
|
"JOIN emails e ON e.id = eam.email_id " + where +
|
||||||
|
" GROUP BY u.id HAVING ytd > 0 ORDER BY ytd DESC", params))
|
||||||
|
return {"columns": ["user", "this_week", "this_month", "ytd"], "rows": rows,
|
||||||
|
"truncated": False, "summary": f"Outbound email counts for {len(rows)} user(s)."}
|
||||||
|
|
||||||
|
|
||||||
|
# ── registry ──────────────────────────────────────────────────────────────────────────────
|
||||||
|
# key -> {summary, slots, run, example}. `slots` is consumed by the runner's validator and
|
||||||
|
# (later) surfaced to the local-model translator + the UI as the single source of truth for
|
||||||
|
# what is queryable. SlotSpec: {type: int|enum|text, ...constraints}.
|
||||||
|
INTENTS = {
|
||||||
|
"investors_cold": {
|
||||||
|
"summary": "Investors we haven't contacted in a while (default 90 days).",
|
||||||
|
"slots": {"days": {"type": "int", "default": 90, "min": 1, "max": 3650}},
|
||||||
|
"example": "Which investors haven't we reached out to in the last 3 months?",
|
||||||
|
"run": run_investors_cold,
|
||||||
|
},
|
||||||
|
"investor_lookup": {
|
||||||
|
"summary": "One investor's contacts, email, committed total and per-fund breakdown.",
|
||||||
|
"slots": {"name": {"type": "text", "required": True, "maxlen": 120}},
|
||||||
|
"example": "What is Acme Capital's email and how much have they committed across funds?",
|
||||||
|
"run": run_investor_lookup,
|
||||||
|
},
|
||||||
|
"investors_by_city": {
|
||||||
|
"summary": "Investors with a contact located in a given city.",
|
||||||
|
"slots": {"city": {"type": "text", "required": True, "maxlen": 80}},
|
||||||
|
"example": "Who are all the investors located in Austin?",
|
||||||
|
"run": run_investors_by_city,
|
||||||
|
},
|
||||||
|
"investors_by_lead": {
|
||||||
|
"summary": "Investors owned by a given lead / team member.",
|
||||||
|
"slots": {"lead": {"type": "text", "required": True, "maxlen": 80}},
|
||||||
|
"example": "Show me the investors led by Jonathan.",
|
||||||
|
"run": run_investors_by_lead,
|
||||||
|
},
|
||||||
|
"top_investors_committed": {
|
||||||
|
"summary": "Top investors by total committed capital.",
|
||||||
|
"slots": {"limit": {"type": "int", "default": 10, "min": 1, "max": MAX_ROWS}},
|
||||||
|
"example": "List our top 10 investors by committed capital.",
|
||||||
|
"run": run_top_investors_committed,
|
||||||
|
},
|
||||||
|
"investors_follow_up": {
|
||||||
|
"summary": "Investors we owe a follow-up to (have an open reminder), overdue first.",
|
||||||
|
"slots": {},
|
||||||
|
"example": "Which investors do we owe follow-ups to?",
|
||||||
|
"run": run_investors_follow_up,
|
||||||
|
},
|
||||||
|
"pipeline_top": {
|
||||||
|
"summary": "Top pipeline opportunities by stage, with investor, owner and last activity.",
|
||||||
|
"slots": {"limit": {"type": "int", "default": 10, "min": 1, "max": MAX_ROWS}},
|
||||||
|
"example": "List our top 10 pipeline investors by stage and last conversation.",
|
||||||
|
"run": run_pipeline_top,
|
||||||
|
},
|
||||||
|
"pipeline_totals": {
|
||||||
|
"summary": "Total pipeline dollars and the split across each stage.",
|
||||||
|
"slots": {},
|
||||||
|
"example": "What is our total pipeline in dollars, split by stage?",
|
||||||
|
"run": run_pipeline_totals,
|
||||||
|
},
|
||||||
|
"recent_emails": {
|
||||||
|
"summary": "The most recent investor emails (optionally inbound or outbound only).",
|
||||||
|
"slots": {"limit": {"type": "int", "default": 10, "min": 1, "max": 100},
|
||||||
|
"direction": {"type": "enum", "choices": ["any", "inbound", "outbound"],
|
||||||
|
"default": "any"}},
|
||||||
|
"example": "What were the last 10 investor emails we sent, and who to?",
|
||||||
|
"run": run_recent_emails,
|
||||||
|
},
|
||||||
|
"investor_last_contact": {
|
||||||
|
"summary": "When we last had any activity with a given investor.",
|
||||||
|
"slots": {"name": {"type": "text", "required": True, "maxlen": 120}},
|
||||||
|
"example": "When did we last reach out to Acme Capital?",
|
||||||
|
"run": run_investor_last_contact,
|
||||||
|
},
|
||||||
|
"comms_by_user": {
|
||||||
|
"summary": "Recent investor emails sent by a given team member.",
|
||||||
|
"slots": {"user": {"type": "text", "required": True, "maxlen": 80},
|
||||||
|
"limit": {"type": "int", "default": 10, "min": 1, "max": 100}},
|
||||||
|
"example": "What were the last investor emails sent by Grant?",
|
||||||
|
"run": run_comms_by_user,
|
||||||
|
},
|
||||||
|
"email_counts_by_user": {
|
||||||
|
"summary": "How many investor emails each user sent this week / month / year-to-date.",
|
||||||
|
"slots": {"user": {"type": "text", "required": False, "maxlen": 80}},
|
||||||
|
"example": "How many emails has Grant sent this week, this month, and year to date?",
|
||||||
|
"run": run_email_counts_by_user,
|
||||||
|
},
|
||||||
|
}
|
||||||
@@ -0,0 +1,127 @@
|
|||||||
|
"""NL-query runner — validate a {intent, slots} request, run the curated query, return rows.
|
||||||
|
|
||||||
|
This is the trust boundary. Whatever produced the request (a local model in W2, the web UI,
|
||||||
|
or a test) is untrusted: the runner accepts ONLY a known intent key and slot VALUES, coerces
|
||||||
|
each value to its declared type, and rejects anything off-spec — it never lets a caller name
|
||||||
|
a table/column, write SQL, or choose an operator. The intents do the rest with fixed,
|
||||||
|
parameterized SQL (see intents.py). All failure modes return a structured error dict; the
|
||||||
|
runner never raises to the caller (a bad `limit=abc` must not crash the request thread).
|
||||||
|
"""
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
from .intents import INTENTS
|
||||||
|
|
||||||
|
|
||||||
|
def _coerce_slot(name, spec, raw):
|
||||||
|
"""Coerce/validate one slot value against its spec. Returns (value, error). Exactly one
|
||||||
|
of the two is meaningful: error is None on success, else a human-readable string."""
|
||||||
|
t = spec["type"]
|
||||||
|
provided = raw is not None and not (isinstance(raw, str) and raw.strip() == "")
|
||||||
|
|
||||||
|
if not provided:
|
||||||
|
if "default" in spec:
|
||||||
|
return spec["default"], None
|
||||||
|
if spec.get("required"):
|
||||||
|
return None, f"slot '{name}' is required"
|
||||||
|
return None, None # optional, absent
|
||||||
|
|
||||||
|
if t == "int":
|
||||||
|
try:
|
||||||
|
v = int(raw)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None, f"slot '{name}' must be an integer (got {raw!r})"
|
||||||
|
if "min" in spec:
|
||||||
|
v = max(spec["min"], v)
|
||||||
|
if "max" in spec:
|
||||||
|
v = min(spec["max"], v)
|
||||||
|
return v, None
|
||||||
|
|
||||||
|
if t == "enum":
|
||||||
|
v = str(raw).strip().lower()
|
||||||
|
if v not in spec["choices"]:
|
||||||
|
if "default" in spec:
|
||||||
|
return spec["default"], None
|
||||||
|
return None, f"slot '{name}' must be one of {spec['choices']} (got {raw!r})"
|
||||||
|
return v, None
|
||||||
|
|
||||||
|
if t == "text":
|
||||||
|
v = str(raw).strip()
|
||||||
|
maxlen = spec.get("maxlen", 200)
|
||||||
|
if len(v) > maxlen:
|
||||||
|
v = v[:maxlen]
|
||||||
|
return v, None
|
||||||
|
|
||||||
|
return None, f"slot '{name}' has unknown type {t!r}" # registry bug, fail visibly
|
||||||
|
|
||||||
|
|
||||||
|
def validate(intent_key, raw_slots):
|
||||||
|
"""Validate an intent + raw slots WITHOUT running. Returns (clean_slots, error_dict).
|
||||||
|
Useful to the translator/UI for a dry-run check. error_dict is None on success."""
|
||||||
|
if intent_key not in INTENTS:
|
||||||
|
return None, {"error": "unknown_intent", "intent": intent_key,
|
||||||
|
"detail": f"unknown intent; known: {sorted(INTENTS)}"}
|
||||||
|
spec = INTENTS[intent_key]["slots"]
|
||||||
|
raw_slots = raw_slots or {}
|
||||||
|
# Reject unexpected slot keys rather than ignore them — a request shaped wrong is a
|
||||||
|
# misunderstanding worth surfacing, not silently dropping.
|
||||||
|
unexpected = [k for k in raw_slots if k not in spec]
|
||||||
|
if unexpected:
|
||||||
|
return None, {"error": "bad_slot", "intent": intent_key,
|
||||||
|
"detail": f"unexpected slot(s): {unexpected}; allowed: {sorted(spec)}"}
|
||||||
|
clean = {}
|
||||||
|
for name, sspec in spec.items():
|
||||||
|
v, err = _coerce_slot(name, sspec, raw_slots.get(name))
|
||||||
|
if err:
|
||||||
|
return None, {"error": "bad_slot", "intent": intent_key, "detail": err}
|
||||||
|
if v is not None or "default" in sspec:
|
||||||
|
clean[name] = v
|
||||||
|
return clean, None
|
||||||
|
|
||||||
|
|
||||||
|
def run_query(conn, intent_key, raw_slots=None, *, audit_fn=None, actor=None, source="api"):
|
||||||
|
"""Validate and execute a curated NL query. Always returns a dict — either a result
|
||||||
|
{intent, slots, columns, rows, row_count, truncated, summary} or an error
|
||||||
|
{error, intent, detail}. Records an audit row via audit_fn (if given) so a query made
|
||||||
|
through a leaked/automated credential is detectable.
|
||||||
|
|
||||||
|
audit_fn signature: audit_fn({actor, source, intent, slots, row_count, error}).
|
||||||
|
"""
|
||||||
|
clean, err = validate(intent_key, raw_slots)
|
||||||
|
if err:
|
||||||
|
if audit_fn:
|
||||||
|
try:
|
||||||
|
audit_fn({"actor": actor, "source": source, "intent": intent_key,
|
||||||
|
"slots": raw_slots, "row_count": 0, "error": err["error"]})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return err
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = INTENTS[intent_key]["run"](conn, clean)
|
||||||
|
except sqlite3.Error as exc:
|
||||||
|
# Surface a query failure (e.g. a missing optional table) as a visible error — never
|
||||||
|
# swallow it and hand back an empty result that reads as an authoritative "none".
|
||||||
|
if audit_fn:
|
||||||
|
try:
|
||||||
|
audit_fn({"actor": actor, "source": source, "intent": intent_key,
|
||||||
|
"slots": clean, "row_count": 0, "error": "query_failed"})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {"error": "query_failed", "intent": intent_key, "detail": str(exc)}
|
||||||
|
|
||||||
|
out = {"intent": intent_key, "slots": clean, "row_count": len(result.get("rows", [])),
|
||||||
|
**result}
|
||||||
|
if audit_fn:
|
||||||
|
try:
|
||||||
|
audit_fn({"actor": actor, "source": source, "intent": intent_key,
|
||||||
|
"slots": clean, "row_count": out["row_count"], "error": None})
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def catalog():
|
||||||
|
"""The queryable surface as data: every intent's key, summary, slot specs and example.
|
||||||
|
Single source of truth for the W2 translator prompt and any UI hint list."""
|
||||||
|
return [{"intent": k, "summary": v["summary"], "slots": v["slots"],
|
||||||
|
"example": v.get("example", "")} for k, v in INTENTS.items()]
|
||||||
@@ -0,0 +1,237 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the W2 safe NL-query runner (the model-free core).
|
||||||
|
|
||||||
|
Boots the REAL schema (server.init_db against a temp DB — exact columns + all migrations),
|
||||||
|
inserts synthetic fundraising/email/reminder/pipeline data, and exercises every intent plus
|
||||||
|
the trust-boundary behaviour:
|
||||||
|
- each intent returns the right rows over the real schema;
|
||||||
|
- SOFT-DELETE is respected on both recency legs (a tombstoned communication and a tombstoned
|
||||||
|
email sighting never count), on reminders, and on opportunities; graveyard investors are
|
||||||
|
excluded from "live" intents;
|
||||||
|
- the validator rejects bad/unknown/unexpected slots WITHOUT crashing (the `?limit=abc` class);
|
||||||
|
- LIKE wildcards in a free-text slot are escaped (a city of "%" does NOT return everything);
|
||||||
|
- limits clamp to their caps; the audit hook fires with the intent + row count.
|
||||||
|
Synthetic data only — no real LP substance, no network, no model.
|
||||||
|
|
||||||
|
Run: cd backend && python3 nl_query/test_nl_query.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
os.environ["CRM_GMAIL_INTEGRATION_ENABLED"] = "1"
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/
|
||||||
|
import server # noqa: E402
|
||||||
|
import nl_query # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def _ago(days):
|
||||||
|
return (datetime.utcnow() - timedelta(days=days)).isoformat() + "Z"
|
||||||
|
|
||||||
|
|
||||||
|
TODAY = datetime.utcnow().date()
|
||||||
|
|
||||||
|
|
||||||
|
def seed(conn):
|
||||||
|
c = conn.execute
|
||||||
|
# users + mailboxes
|
||||||
|
c("INSERT INTO users (id, username, email, password_hash, full_name, role) VALUES "
|
||||||
|
"('u_grant','grant','grant@ten31.xyz','x','Grant Smith','admin'),"
|
||||||
|
"('u_jon','jonathan','jon@ten31.xyz','x','Jonathan Lee','member')")
|
||||||
|
c("INSERT INTO email_accounts (id, user_id, email_address, auth_method) VALUES "
|
||||||
|
"('a_grant','u_grant','grant@ten31.xyz','dwd'),"
|
||||||
|
"('a_jon','u_jon','jon@ten31.xyz','dwd')")
|
||||||
|
# funds
|
||||||
|
c("INSERT INTO fundraising_funds (id, column_id, fund_name, display_order) VALUES "
|
||||||
|
"('f1','c_f1','Fund I',1),('f2','c_f2','Fund II',2)")
|
||||||
|
|
||||||
|
# investors (graveyard flag is the live/retired axis; no deleted_at on this table)
|
||||||
|
def inv(iid, name, lead, total, grave=0):
|
||||||
|
c("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, "
|
||||||
|
"source_row_id, total_invested) VALUES (?,?,?,?,?,?)",
|
||||||
|
(iid, name, lead, grave, iid, total))
|
||||||
|
inv("i_acme", "Acme Capital", "Jonathan Lee", 5_000_000)
|
||||||
|
inv("i_beta", "Beta Partners", "Grant Smith", 2_000_000)
|
||||||
|
inv("i_cold", "Cold Co", "Grant Smith", 0) # never contacted
|
||||||
|
inv("i_delta", "Delta LP", "Grant Smith", 1_000_000) # only a (comms) signal
|
||||||
|
inv("i_ghost", "Graveyard Ghost", "Grant Smith", 9_999_999, grave=1)
|
||||||
|
|
||||||
|
# contacts (grid pills) + classic contact rows for the comms leg
|
||||||
|
c("INSERT INTO fundraising_contacts (id, investor_id, full_name, email, title, city, "
|
||||||
|
"contact_id, sort_order) VALUES "
|
||||||
|
"('fc_a','i_acme','Alice Acme','alice@acme.com','GP','Austin','cc_alice',0),"
|
||||||
|
"('fc_b','i_beta','Bob Beta','bob@beta.com','LP','Denver',NULL,0),"
|
||||||
|
"('fc_d','i_delta','Dana Delta','dana@delta.com','CFO','Miami','cc_dana',0)")
|
||||||
|
c("INSERT INTO contacts (id, first_name, last_name, email) VALUES "
|
||||||
|
"('cc_alice','Alice','Acme','alice@acme.com'),"
|
||||||
|
"('cc_dana','Dana','Delta','dana@delta.com')")
|
||||||
|
|
||||||
|
# commitments — Acme across two funds (3M + 2M = 5M); Beta one fund
|
||||||
|
c("INSERT INTO fundraising_commitments (id, investor_id, fund_id, amount) VALUES "
|
||||||
|
"('cm1','i_acme','f1',3_000_000),('cm2','i_acme','f2',2_000_000),"
|
||||||
|
"('cm3','i_beta','f1',2_000_000)")
|
||||||
|
|
||||||
|
# emails: matched + a per-mailbox sighting. is_sent + from_email decide direction.
|
||||||
|
def email(eid, frm, frm_name, days, inv_id, account, is_sent, deleted=False):
|
||||||
|
c("INSERT INTO emails (id, rfc_message_id, from_email, from_name, sent_at, subject, "
|
||||||
|
"is_matched, match_status) VALUES (?,?,?,?,?,?,1,'matched')",
|
||||||
|
(eid, "rfc_" + eid, frm, frm_name, _ago(days), "Re: " + eid))
|
||||||
|
c("INSERT INTO email_account_messages (id, email_id, account_id, gmail_message_id, "
|
||||||
|
"gmail_thread_id, is_sent, deleted_at) VALUES (?,?,?,?,?,?,?)",
|
||||||
|
("eam_" + eid, eid, account, "g_" + eid, "t_" + eid, is_sent,
|
||||||
|
_ago(days) if deleted else None))
|
||||||
|
c("INSERT INTO email_investor_links (id, email_id, fundraising_investor_id, "
|
||||||
|
"matched_address, match_kind) VALUES (?,?,?,?, 'exact_email')",
|
||||||
|
("eil_" + eid, eid, inv_id, frm))
|
||||||
|
email("ea_recent", "grant@ten31.xyz", "Grant Smith", 0, "i_acme", "a_grant", 1) # Acme: today
|
||||||
|
email("eb_old", "grant@ten31.xyz", "Grant Smith", 40, "i_beta", "a_grant", 1) # Beta: 40d
|
||||||
|
email("edel", "grant@ten31.xyz", "Grant Smith", 0, "i_beta", "a_grant", 1, deleted=True) # tombstoned
|
||||||
|
email("ej", "jon@ten31.xyz", "Jonathan Lee", 0, "i_acme", "a_jon", 1) # jonathan today
|
||||||
|
email("ein", "alice@acme.com", "Alice Acme", 3, "i_acme", "a_grant", 0) # inbound 3d
|
||||||
|
# an UNMATCHED sent email by Grant (NO email_investor_links row) — captured, but not to a
|
||||||
|
# known investor. The investor-email intents are matched-only, so it must be EXCLUDED from
|
||||||
|
# comms_by_user / email_counts_by_user; without the matched-only filter it would inflate both.
|
||||||
|
c("INSERT INTO emails (id, rfc_message_id, from_email, from_name, sent_at, subject, "
|
||||||
|
"is_matched, match_status) VALUES ('eunm','rfc_eunm','grant@ten31.xyz','Grant Smith',?,"
|
||||||
|
"'Internal: team lunch',0,'unmatched')", (_ago(0),))
|
||||||
|
c("INSERT INTO email_account_messages (id, email_id, account_id, gmail_message_id, "
|
||||||
|
"gmail_thread_id, is_sent, deleted_at) VALUES "
|
||||||
|
"('eam_eunm','eunm','a_grant','g_eunm','t_eunm',1,NULL)")
|
||||||
|
|
||||||
|
# communications (the other recency leg) — Delta has ONLY comms: one live (5d), one tombstoned
|
||||||
|
# (today). If the soft-delete filter broke, Delta would read as contacted today.
|
||||||
|
c("INSERT INTO communications (id, contact_id, type, communication_date, created_by) VALUES "
|
||||||
|
"('cmm_live','cc_dana','email',?,'u_grant')", (_ago(5),))
|
||||||
|
c("INSERT INTO communications (id, contact_id, type, communication_date, created_by, deleted_at) "
|
||||||
|
"VALUES ('cmm_del','cc_dana','email',?,'u_grant',?)", (_ago(0), _ago(0)))
|
||||||
|
|
||||||
|
# reminders — open(overdue) / open(future) / done / deleted / standalone
|
||||||
|
def rem(rid, inv_id, title, due, status="open", deleted=False):
|
||||||
|
c("INSERT INTO reminders (id, investor_id, investor_name, title, due_date, status, "
|
||||||
|
"deleted_at) VALUES (?,?,?,?,?,?,?)",
|
||||||
|
(rid, inv_id, title, title, due, status, _ago(0) if deleted else None))
|
||||||
|
rem("r_over", "i_beta", "Send deck", (TODAY - timedelta(days=1)).isoformat()) # overdue
|
||||||
|
rem("r_future", "i_acme", "Quarterly check-in", (TODAY + timedelta(days=10)).isoformat())
|
||||||
|
rem("r_done", "i_acme", "Old task", (TODAY - timedelta(days=2)).isoformat(), status="done")
|
||||||
|
rem("r_del", "i_acme", "Tombstoned", (TODAY - timedelta(days=2)).isoformat(), deleted=True)
|
||||||
|
rem("r_standalone", None, "Team chore", (TODAY - timedelta(days=1)).isoformat())
|
||||||
|
|
||||||
|
# opportunities — commitment / engaged (live) / two archived (the new model has no 'lost'
|
||||||
|
# stage: a dead deal is soft-deleted, so both excluded cases ride deleted_at)
|
||||||
|
def opp(oid, inv_id, contact, stage, expected, owner, deleted=False):
|
||||||
|
c("INSERT INTO opportunities (id, name, contact_id, stage, expected_amount, owner_id, "
|
||||||
|
"fundraising_investor_id, deleted_at) VALUES (?,?,?,?,?,?,?,?)",
|
||||||
|
(oid, oid, contact, stage, expected, owner, inv_id, _ago(0) if deleted else None))
|
||||||
|
# opp contact_id must reference a real contacts row (FK on); reuse the two we made
|
||||||
|
opp("o_acme", "i_acme", "cc_alice", "commitment", 4_000_000, "u_jon")
|
||||||
|
opp("o_beta", "i_beta", "cc_dana", "engaged", 1_000_000, "u_grant")
|
||||||
|
opp("o_lost", "i_acme", "cc_alice", "diligence", 9_000_000, "u_jon", deleted=True)
|
||||||
|
opp("o_del", "i_beta", "cc_dana", "diligence", 7_000_000, "u_grant", deleted=True)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def names(res):
|
||||||
|
return [r["investor_name"] for r in res["rows"]]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
conn = server.get_db()
|
||||||
|
seed(conn)
|
||||||
|
run = lambda *a, **k: nl_query.run_query(conn, *a, **k)
|
||||||
|
|
||||||
|
print("investors_cold")
|
||||||
|
r = run("investors_cold", {"days": 30})
|
||||||
|
check(names(r) == ["Cold Co", "Beta Partners"], f"cold(30) never-first then stale: {names(r)}")
|
||||||
|
check(run("investors_cold", {"days": 90})["row_count"] == 1, "cold(90): only never-contacted")
|
||||||
|
check("Graveyard Ghost" not in names(run("investors_cold", {"days": 3650})),
|
||||||
|
"cold excludes graveyard investors")
|
||||||
|
check("Delta LP" in names(run("investors_cold", {"days": 3})), "cold(3) sees Delta (comms 5d)")
|
||||||
|
check("Delta LP" not in names(run("investors_cold", {"days": 7})),
|
||||||
|
"cold(7): Delta's tombstoned comm (today) did NOT count")
|
||||||
|
|
||||||
|
print("investor_lookup")
|
||||||
|
r = run("investor_lookup", {"name": "acme"})
|
||||||
|
check(r["row_count"] == 1 and r["rows"][0]["total_invested"] == 5_000_000, "lookup total committed")
|
||||||
|
check({c["fund_name"] for c in r["rows"][0]["commitments"]} == {"Fund I", "Fund II"},
|
||||||
|
"lookup per-fund breakdown")
|
||||||
|
check(r["rows"][0]["contacts"][0]["email"] == "alice@acme.com", "lookup surfaces contact email")
|
||||||
|
|
||||||
|
print("investors_by_city / by_lead / top / follow_up")
|
||||||
|
check(names(run("investors_by_city", {"city": "Austin"})) == ["Acme Capital"], "by_city")
|
||||||
|
check(set(names(run("investors_by_lead", {"lead": "Grant"}))) == {"Beta Partners", "Cold Co", "Delta LP"},
|
||||||
|
"by_lead excludes graveyard + other leads")
|
||||||
|
check(names(run("top_investors_committed", {"limit": 2})) == ["Acme Capital", "Beta Partners"],
|
||||||
|
"top by committed (graveyard + zero excluded)")
|
||||||
|
r = run("investors_follow_up")
|
||||||
|
check(names(r) == ["Beta Partners", "Acme Capital"], f"follow_up overdue-first, open-only: {names(r)}")
|
||||||
|
check(r["rows"][0]["overdue"] == 1 and r["rows"][1]["overdue"] == 0, "follow_up overdue flag")
|
||||||
|
|
||||||
|
print("pipeline")
|
||||||
|
r = run("pipeline_totals")
|
||||||
|
stages = {row["stage"]: row for row in r["rows"]}
|
||||||
|
check(set(stages) == {"commitment", "engaged"}, f"pipeline_totals excludes archived/deleted: {set(stages)}")
|
||||||
|
check(stages["commitment"]["expected_total"] == 4_000_000, "pipeline_totals stage sum")
|
||||||
|
r = run("pipeline_top", {"limit": 10})
|
||||||
|
check(names(r) == ["Acme Capital", "Beta Partners"], "pipeline_top furthest-stage first")
|
||||||
|
check(r["rows"][0]["last_activity_at"] is not None, "pipeline_top enriches last activity")
|
||||||
|
|
||||||
|
print("emails")
|
||||||
|
check(run("recent_emails", {"direction": "outbound"})["row_count"] == 3,
|
||||||
|
"recent_emails(outbound): 3 live (tombstoned sighting excluded)")
|
||||||
|
check(run("recent_emails", {"direction": "inbound"})["row_count"] == 1, "recent_emails(inbound)")
|
||||||
|
check(run("recent_emails")["row_count"] == 4, "recent_emails(any): 4 live")
|
||||||
|
r = run("investor_last_contact", {"name": "beta"})
|
||||||
|
check(r["rows"][0]["days_since"] >= 39, "investor_last_contact days_since")
|
||||||
|
check(run("comms_by_user", {"user": "Grant"})["row_count"] == 2,
|
||||||
|
"comms_by_user: grant's 2 live MATCHED outbound (tombstoned + unmatched excluded)")
|
||||||
|
r = run("email_counts_by_user", {"user": "grant"})
|
||||||
|
check(r["rows"][0]["this_week"] == 1,
|
||||||
|
"email_counts this_week = 1 live matched (tombstoned + unmatched excluded)")
|
||||||
|
check(r["rows"][0]["ytd"] >= 1, "email_counts ytd")
|
||||||
|
|
||||||
|
print("trust boundary")
|
||||||
|
check(run("investors_cold", {"days": "abc"})["error"] == "bad_slot", "bad int slot -> bad_slot, no crash")
|
||||||
|
check(run("nope")["error"] == "unknown_intent", "unknown intent rejected")
|
||||||
|
check(run("pipeline_totals", {"foo": 1})["error"] == "bad_slot", "unexpected slot rejected")
|
||||||
|
check(run("investor_lookup", {})["error"] == "bad_slot", "missing required slot rejected")
|
||||||
|
check(run("investors_by_city", {"city": "%"})["row_count"] == 0,
|
||||||
|
"LIKE wildcard escaped — '%' does not match every row")
|
||||||
|
check(run("investors_cold", {"days": 0})["slots"]["days"] == 1, "int slot clamps to min")
|
||||||
|
check(run("top_investors_committed", {"limit": 99999})["slots"]["limit"] == nl_query.INTENTS
|
||||||
|
["top_investors_committed"]["slots"]["limit"]["max"], "int slot clamps to max")
|
||||||
|
|
||||||
|
print("audit hook + catalog")
|
||||||
|
seen = []
|
||||||
|
run("pipeline_totals", audit_fn=seen.append, actor="tester", source="test")
|
||||||
|
check(len(seen) == 1 and seen[0]["intent"] == "pipeline_totals" and seen[0]["error"] is None
|
||||||
|
and seen[0]["actor"] == "tester", "audit hook fires with intent/actor/no-error")
|
||||||
|
run("nope", audit_fn=seen.append)
|
||||||
|
check(seen[-1]["error"] == "unknown_intent", "audit hook fires on rejection too")
|
||||||
|
check(len(nl_query.catalog()) == len(nl_query.INTENTS), "catalog covers every intent")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"{len(FAILS)} FAILED")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,139 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Endpoint tests for the W2 NL-query HTTP surface (POST /api/query/nl, GET /api/query/catalog).
|
||||||
|
|
||||||
|
Boots the REAL server against a temp DB and exercises the wiring end-to-end: auth gating
|
||||||
|
(bot/admin only), the direct {intent, slots} mode, the soft-error shape, and the status
|
||||||
|
mapping. The local model is forced UNAVAILABLE by pointing SPARK_CONTROL_URL at a dead local
|
||||||
|
port, so the {question} path exercises the 503 path deterministically without any Spark.
|
||||||
|
Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 nl_query/test_nl_query_endpoint.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
os.environ["CRM_GMAIL_INTEGRATION_ENABLED"] = "1"
|
||||||
|
# Dead port -> the local-model leg fails fast, so the {question} path returns 503 deterministically
|
||||||
|
# (set before server/config import; load_env uses setdefault so this wins over any repo .env).
|
||||||
|
os.environ["SPARK_CONTROL_URL"] = "http://127.0.0.1:1"
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/
|
||||||
|
import server # noqa: E402
|
||||||
|
import nl_query # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(port, method, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
payload = json.dumps(body) if body is not None else None
|
||||||
|
if payload is not None:
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = json.loads(raw) if raw else None
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
def _data(d):
|
||||||
|
return (d or {}).get("data") or {}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
db = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
db.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) VALUES "
|
||||||
|
"('u_admin','grant','g@t.x','x','Grant','admin',1),"
|
||||||
|
"('u_mem','mem','m@t.x','x','Mem','member',1)")
|
||||||
|
db.execute("INSERT INTO fundraising_investors (id,investor_name,lead,graveyard,source_row_id,"
|
||||||
|
"total_invested) VALUES ('a','Acme Capital','Jon',0,'a',5000000),"
|
||||||
|
"('b','Beta Partners','Grant',0,'b',2000000),('g','Ghost','Grant',1,'g',9000000)")
|
||||||
|
db.commit()
|
||||||
|
db.close()
|
||||||
|
admin = server.create_token("u_admin", "grant", "admin")
|
||||||
|
member = server.create_token("u_mem", "mem", "member")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
print("direct {intent, slots} mode")
|
||||||
|
st, d = _req(port, "POST", "/api/query/nl", admin,
|
||||||
|
{"intent": "top_investors_committed", "slots": {"limit": 2}})
|
||||||
|
rows = _data(d).get("rows", [])
|
||||||
|
check(st == 200 and [r["investor_name"] for r in rows] == ["Acme Capital", "Beta Partners"],
|
||||||
|
f"admin direct query -> 200 + rows (got {st})")
|
||||||
|
check(_data(d).get("intent") == "top_investors_committed", "response echoes interpreted intent")
|
||||||
|
|
||||||
|
print("soft errors + validation")
|
||||||
|
st, d = _req(port, "POST", "/api/query/nl", admin, {"intent": "made_up"})
|
||||||
|
check(st == 200 and _data(d).get("error") == "unknown_intent",
|
||||||
|
f"bad intent -> 200 with data.error=unknown_intent (got {st}, {_data(d).get('error')})")
|
||||||
|
st, d = _req(port, "POST", "/api/query/nl", admin, {})
|
||||||
|
check(st == 400, f"neither question nor intent -> 400 (got {st})")
|
||||||
|
|
||||||
|
print("auth gating")
|
||||||
|
st, _ = _req(port, "POST", "/api/query/nl", member,
|
||||||
|
{"intent": "top_investors_committed"})
|
||||||
|
check(st == 403, f"member -> 403 (got {st})")
|
||||||
|
st, _ = _req(port, "POST", "/api/query/nl", None, {"intent": "top_investors_committed"})
|
||||||
|
check(st == 401, f"unauthenticated -> 401 (got {st})")
|
||||||
|
|
||||||
|
print("catalog")
|
||||||
|
st, d = _req(port, "GET", "/api/query/catalog", admin)
|
||||||
|
check(st == 200 and isinstance(d.get("data"), list) and len(d["data"]) == len(nl_query.INTENTS),
|
||||||
|
f"catalog -> 200 with every intent (got {st})")
|
||||||
|
st, _ = _req(port, "GET", "/api/query/catalog", member)
|
||||||
|
check(st == 403, f"catalog member -> 403 (got {st})")
|
||||||
|
|
||||||
|
print("question path with the local model down")
|
||||||
|
st, d = _req(port, "POST", "/api/query/nl", admin,
|
||||||
|
{"question": "who are our top investors by committed capital?"})
|
||||||
|
check(st == 503 and _data(d).get("error") == "model_unavailable",
|
||||||
|
f"question + dead model -> 503 model_unavailable (got {st}, {_data(d).get('error')})")
|
||||||
|
check(_data(d).get("question"), "question echoed back even on outage")
|
||||||
|
|
||||||
|
print("audit trail")
|
||||||
|
db = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
n = db.execute("SELECT COUNT(*) FROM audit_log WHERE entity_type='nl_query'").fetchone()[0]
|
||||||
|
db.close()
|
||||||
|
check(n >= 2, f"executed queries are audited (entity_type=nl_query rows: {n})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"{len(FAILS)} FAILED")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,107 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the W2 NL translator (question -> {intent, slots}) — the local-model leg.
|
||||||
|
|
||||||
|
The model is stubbed via an injected chat_fn, so this runs fully offline (no Spark, no
|
||||||
|
network). Covers:
|
||||||
|
- build_system() exposes the whole intent catalog as the model's closed vocabulary;
|
||||||
|
- translate() returns the parsed {intent, slots} and DROPS slot keys the intent doesn't
|
||||||
|
declare (model noise), while every surviving value is still validated downstream;
|
||||||
|
- the translation failure modes: no intent fit -> no_match; unparseable -> no_match;
|
||||||
|
local model unreachable -> model_unavailable (so the endpoint can 503);
|
||||||
|
- answer() chains translate + the validated runner end-to-end, and a HALLUCINATED intent
|
||||||
|
from the model is still rejected by the validator (the model output is never trusted).
|
||||||
|
|
||||||
|
Run: cd backend && python3 nl_query/test_translate.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/
|
||||||
|
import server # noqa: E402
|
||||||
|
import nl_query # noqa: E402
|
||||||
|
|
||||||
|
T = nl_query # exercise the public API (translate/answer/build_system are re-exported)
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("build_system")
|
||||||
|
sysprompt = nl_query.build_system()
|
||||||
|
check(all(k in sysprompt for k in nl_query.INTENTS), "system prompt lists every intent key")
|
||||||
|
check("days (integer, default 90)" in sysprompt, "system prompt renders int slot + default")
|
||||||
|
check("one of any|inbound|outbound" in sysprompt, "system prompt renders enum choices")
|
||||||
|
|
||||||
|
print("translate")
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake(prompt, system):
|
||||||
|
captured["system"] = system
|
||||||
|
captured["prompt"] = prompt
|
||||||
|
return {"intent": "investors_cold", "slots": {"days": 90, "bogus": "x"}}
|
||||||
|
|
||||||
|
r = T.translate("who's gone quiet for 3 months?", chat_fn=fake)
|
||||||
|
check(r == {"intent": "investors_cold", "slots": {"days": 90}},
|
||||||
|
f"routes to intent + drops unknown slot 'bogus': {r}")
|
||||||
|
check(nl_query.INTENTS and "investors_cold" in captured["system"], "chat_fn received the catalog")
|
||||||
|
check(captured["prompt"] == "who's gone quiet for 3 months?", "chat_fn received the question")
|
||||||
|
|
||||||
|
check(T.translate("x", chat_fn=lambda q, s: {"intent": None})["error"] == "no_match",
|
||||||
|
"intent null -> no_match")
|
||||||
|
check(T.translate("x", chat_fn=lambda q, s: None)["error"] == "no_match",
|
||||||
|
"unparseable model reply -> no_match")
|
||||||
|
check(T.translate("", chat_fn=lambda q, s: {"intent": "x"})["error"] == "no_match",
|
||||||
|
"empty question -> no_match (no model call needed)")
|
||||||
|
|
||||||
|
def boom(q, s):
|
||||||
|
raise RuntimeError("spark down")
|
||||||
|
|
||||||
|
check(T.translate("x", chat_fn=boom)["error"] == "model_unavailable",
|
||||||
|
"local model unreachable -> model_unavailable")
|
||||||
|
|
||||||
|
print("answer (end-to-end through the validated runner)")
|
||||||
|
server.init_db()
|
||||||
|
conn = server.get_db()
|
||||||
|
conn.execute("INSERT INTO fundraising_investors (id, investor_name, lead, graveyard, "
|
||||||
|
"source_row_id, total_invested) VALUES "
|
||||||
|
"('a','Acme Capital','Jon',0,'a',5000000),"
|
||||||
|
"('b','Beta Partners','Grant',0,'b',2000000),"
|
||||||
|
"('g','Ghost','Grant',1,'g',9000000)")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
r = T.answer(conn, "top investors",
|
||||||
|
chat_fn=lambda q, s: {"intent": "top_investors_committed", "slots": {"limit": 2}})
|
||||||
|
check([x["investor_name"] for x in r["rows"]] == ["Acme Capital", "Beta Partners"],
|
||||||
|
"answer() runs the translated query")
|
||||||
|
check(r["question"] == "top investors", "answer() echoes the original question")
|
||||||
|
|
||||||
|
r = T.answer(conn, "nonsense", chat_fn=lambda q, s: {"intent": "made_up_intent", "slots": {}})
|
||||||
|
check(r.get("error") == "unknown_intent", "hallucinated intent is rejected by the validator")
|
||||||
|
check(r["question"] == "nonsense", "answer() echoes question on error too")
|
||||||
|
|
||||||
|
r = T.answer(conn, "anything", chat_fn=boom)
|
||||||
|
check(r.get("error") == "model_unavailable", "answer() surfaces a model outage")
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"{len(FAILS)} FAILED")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
"""NL-query translator — plain-English question -> {intent, slots} on the LOCAL model.
|
||||||
|
|
||||||
|
The model's ONLY job is to pick one curated intent and fill its typed slots; it never
|
||||||
|
touches the database, never sees a row, and never writes SQL. Its output is untrusted and
|
||||||
|
is handed straight to the runner's validator (runner.validate), which is the trust boundary.
|
||||||
|
|
||||||
|
LOCAL-ONLY BY CONSTRUCTION. Translation runs on the local Qwen via Spark Control
|
||||||
|
(SPARK_CONTROL_URL), the same sanctioned local leg as intake/digest — so the question never
|
||||||
|
leaves the box and there is NO Claude path and NO redaction boundary to manage here (that
|
||||||
|
was the whole point of the W2 simplification: the answer is sensitive and never leaves; the
|
||||||
|
question is generic English and is translated locally). If the local model ever proves too
|
||||||
|
weak, a Claude-behind-redaction translator could be slotted in as an alternative `chat_fn`
|
||||||
|
WITHOUT changing the validator/executor — but it is deliberately not built.
|
||||||
|
|
||||||
|
`chat_fn(prompt, system) -> dict|None` is injectable so the whole translation leg is testable
|
||||||
|
offline without Spark. The default calls the ingest Spark client (lazy import — it ships in
|
||||||
|
the Docker image, not the bare CRM).
|
||||||
|
"""
|
||||||
|
from .intents import INTENTS
|
||||||
|
from .runner import run_query
|
||||||
|
|
||||||
|
|
||||||
|
def _default_chat_json(prompt, system):
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "ingest"))
|
||||||
|
import llm # noqa: E402 (ingest Spark client; raises if Spark is unreachable)
|
||||||
|
return llm.chat_json(prompt, system=system, max_tokens=400)
|
||||||
|
|
||||||
|
|
||||||
|
def _render_slot(name, spec):
|
||||||
|
t = spec["type"]
|
||||||
|
if t == "int":
|
||||||
|
extra = f", default {spec['default']}" if "default" in spec else ""
|
||||||
|
return f"{name} (integer{extra})"
|
||||||
|
if t == "enum":
|
||||||
|
extra = f", default {spec['default']}" if "default" in spec else ""
|
||||||
|
return f"{name} (one of {'|'.join(spec['choices'])}{extra})"
|
||||||
|
req = ", required" if spec.get("required") else ", optional"
|
||||||
|
return f"{name} (text{req})"
|
||||||
|
|
||||||
|
|
||||||
|
def build_system():
|
||||||
|
"""The system prompt: the full intent catalog as the model's closed vocabulary."""
|
||||||
|
lines = [
|
||||||
|
"You translate a question about a venture fund's investor database into ONE "
|
||||||
|
"structured query. Respond with ONLY a JSON object and nothing else:",
|
||||||
|
' {"intent": "<one key below, or null>", "slots": {<slot>: <value>}}',
|
||||||
|
"",
|
||||||
|
"Rules:",
|
||||||
|
"- Choose the single best-fitting intent. If none fits, return {\"intent\": null}.",
|
||||||
|
"- Use ONLY the slot names listed for the chosen intent; omit a slot to accept its default.",
|
||||||
|
"- Convert natural durations to the integer a slot wants: '3 months'->90, 'a quarter'->90, "
|
||||||
|
"'6 weeks'->42, 'a year'/'year to date'->365.",
|
||||||
|
"- Copy names, cities and people verbatim from the question into text slots.",
|
||||||
|
"- No commentary, no markdown, JSON only.",
|
||||||
|
"",
|
||||||
|
"Intents:",
|
||||||
|
]
|
||||||
|
for key, spec in INTENTS.items():
|
||||||
|
slots = spec["slots"]
|
||||||
|
slot_str = "; ".join(_render_slot(n, s) for n, s in slots.items()) or "(none)"
|
||||||
|
lines.append(f"- {key}: {spec['summary']}")
|
||||||
|
lines.append(f" slots: {slot_str}")
|
||||||
|
if spec.get("example"):
|
||||||
|
lines.append(f" e.g. \"{spec['example']}\"")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def translate(question, *, chat_fn=None):
|
||||||
|
"""Map a question to {intent, slots} on the local model. Returns that dict, or an error
|
||||||
|
dict {error, detail}: 'model_unavailable' (local model unreachable -> the endpoint 503s)
|
||||||
|
or 'no_match' (the model could not map the question to any intent)."""
|
||||||
|
chat_fn = chat_fn or _default_chat_json
|
||||||
|
q = (question or "").strip()
|
||||||
|
if not q:
|
||||||
|
return {"error": "no_match", "detail": "empty question"}
|
||||||
|
try:
|
||||||
|
data = chat_fn(q, build_system())
|
||||||
|
except Exception as exc: # connection/runtime failure on the LOCAL model
|
||||||
|
return {"error": "model_unavailable", "detail": str(exc)}
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return {"error": "no_match", "detail": "model returned no parseable JSON"}
|
||||||
|
intent = data.get("intent")
|
||||||
|
if intent in (None, "", "null", "none"):
|
||||||
|
return {"error": "no_match", "detail": "no intent fit the question"}
|
||||||
|
slots = data.get("slots")
|
||||||
|
slots = slots if isinstance(slots, dict) else {}
|
||||||
|
# Drop slot KEYS the chosen intent doesn't declare — model noise, not a safety concern
|
||||||
|
# (every surviving VALUE still goes through full type validation in the runner). Unknown
|
||||||
|
# intents are left as-is so the runner rejects them as unknown_intent.
|
||||||
|
if intent in INTENTS:
|
||||||
|
allowed = INTENTS[intent]["slots"]
|
||||||
|
slots = {k: v for k, v in slots.items() if k in allowed}
|
||||||
|
return {"intent": intent, "slots": slots}
|
||||||
|
|
||||||
|
|
||||||
|
def answer(conn, question, *, chat_fn=None, audit_fn=None, actor=None, source="api"):
|
||||||
|
"""End-to-end: translate a question locally, then run it through the validated runner.
|
||||||
|
Returns the runner's result (with the interpreted intent/slots, so a human can see how
|
||||||
|
the question was read) plus the original question, or a translation error dict."""
|
||||||
|
t = translate(question, chat_fn=chat_fn)
|
||||||
|
if t.get("error"):
|
||||||
|
return {**t, "question": question}
|
||||||
|
result = run_query(conn, t["intent"], t["slots"],
|
||||||
|
audit_fn=audit_fn, actor=actor, source=source)
|
||||||
|
result["question"] = question
|
||||||
|
return result
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Dev harness — fire questions at the LOCAL model and print how each is translated.
|
||||||
|
|
||||||
|
Lets you eyeball whether the local Qwen maps real questions to the right curated query
|
||||||
|
(intent + slots), against your real Spark, with NO UI, auth, HTTP, or deploy. This is the
|
||||||
|
cheap way to validate translation quality before building the web/Matrix surfaces. It only
|
||||||
|
translates (it does not touch the DB), so no data is needed and nothing leaves the box.
|
||||||
|
|
||||||
|
NOT shipped and NOT a test (no `test_` prefix) — a developer convenience.
|
||||||
|
|
||||||
|
Needs SPARK_CONTROL_URL set (read from the repo .env) and the Spark reachable.
|
||||||
|
Run:
|
||||||
|
python3 backend/nl_query/try_questions.py # the built-in sample set
|
||||||
|
python3 backend/nl_query/try_questions.py "when did we last email Acme?"
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # backend/
|
||||||
|
import nl_query # noqa: E402
|
||||||
|
|
||||||
|
SAMPLES = [
|
||||||
|
"Which investors haven't we reached out to in the last 3 months?",
|
||||||
|
"Which investors do we owe follow-ups to?",
|
||||||
|
"What is Acme Capital's email and how much have they committed across funds?",
|
||||||
|
"When did we last reach out to Acme Capital?",
|
||||||
|
"What were the last 10 investor emails we sent, and who to?",
|
||||||
|
"What were the last 10 investor emails we received?",
|
||||||
|
"Who are all the investors located in Austin?",
|
||||||
|
"List our top 10 investors by committed capital.",
|
||||||
|
"List our top 10 pipeline investors by stage and the most recent conversation.",
|
||||||
|
"What is our total pipeline in dollars, split by stage?",
|
||||||
|
"What were the last investor emails sent by Grant?",
|
||||||
|
"How many emails has Jonathan sent this week, this month, and year to date?",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
questions = sys.argv[1:] or SAMPLES
|
||||||
|
print(f"Translating {len(questions)} question(s) on the local model "
|
||||||
|
f"(SPARK_CONTROL_URL={os.environ.get('SPARK_CONTROL_URL', '(unset)')})\n")
|
||||||
|
for q in questions:
|
||||||
|
r = nl_query.translate(q)
|
||||||
|
if r.get("error"):
|
||||||
|
print(f" ? {q}\n -> [{r['error']}] {r.get('detail', '')}\n")
|
||||||
|
else:
|
||||||
|
print(f" ? {q}\n -> {r['intent']} slots={r['slots']}\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Aggregate test runner for the backend suite.
|
||||||
|
|
||||||
|
The backend tests are standalone scripts (each with `if __name__ == "__main__"`, no
|
||||||
|
pytest). This discovers every backend/**/test_*.py and runs each in its OWN subprocess
|
||||||
|
(tests set os.environ and import `server` with different configs, so isolation matters),
|
||||||
|
prints a one-line PASS/FAIL per test, dumps output only for failures, and exits non-zero
|
||||||
|
if any test fails.
|
||||||
|
|
||||||
|
Run: python3 backend/run_tests.py (from the repo root)
|
||||||
|
or: cd backend && python3 run_tests.py
|
||||||
|
Filter: python3 backend/run_tests.py soft_delete redaction # substring match on path
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
BACKEND = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
def discover(filters):
|
||||||
|
found = []
|
||||||
|
for root, dirs, files in os.walk(BACKEND):
|
||||||
|
dirs[:] = [d for d in dirs if d != "__pycache__"]
|
||||||
|
for f in files:
|
||||||
|
if f.startswith("test_") and f.endswith(".py"):
|
||||||
|
path = os.path.join(root, f)
|
||||||
|
rel = os.path.relpath(path, BACKEND)
|
||||||
|
if not filters or any(flt in rel for flt in filters):
|
||||||
|
found.append(path)
|
||||||
|
return sorted(found)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
filters = sys.argv[1:]
|
||||||
|
tests = discover(filters)
|
||||||
|
if not tests:
|
||||||
|
print("No tests matched.")
|
||||||
|
sys.exit(1)
|
||||||
|
print(f"Running {len(tests)} backend test(s)\n")
|
||||||
|
|
||||||
|
passed, failed = [], []
|
||||||
|
t0 = time.time()
|
||||||
|
for path in tests:
|
||||||
|
rel = os.path.relpath(path, BACKEND)
|
||||||
|
proc = subprocess.run([sys.executable, path], cwd=BACKEND,
|
||||||
|
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||||
|
if proc.returncode == 0:
|
||||||
|
passed.append(rel)
|
||||||
|
print(f" PASS {rel}")
|
||||||
|
else:
|
||||||
|
failed.append(rel)
|
||||||
|
print(f" FAIL {rel}")
|
||||||
|
sys.stdout.write(proc.stdout.decode("utf-8", "replace").rstrip() + "\n")
|
||||||
|
|
||||||
|
print(f"\n{len(passed)}/{len(tests)} passed in {time.time() - t0:.1f}s")
|
||||||
|
if failed:
|
||||||
|
print("FAILED:")
|
||||||
|
for f in failed:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
-- contacts_census.sql — A/B/C census for the contacts <-> fundraising_contacts consolidation.
|
||||||
|
-- COUNTS ONLY: no names, emails, or amounts — nothing identifying leaves the box.
|
||||||
|
--
|
||||||
|
-- Run wherever you have the DB:
|
||||||
|
-- on the box (container shell): sqlite3 /data/crm.db < contacts_census.sql
|
||||||
|
-- against a local copy: sqlite3 data/crm.db < contacts_census.sql
|
||||||
|
--
|
||||||
|
-- Canonical link: fundraising_contacts.contact_id -> contacts.id (migration 0004).
|
||||||
|
-- A = contact linked to >=1 grid pill (healthy overlap — already done)
|
||||||
|
-- B = live contact with NO grid pill (needs a grid row/pill created)
|
||||||
|
-- C = grid pill with contact_id IS NULL (needs a contacts row created)
|
||||||
|
-- Sanity: A + B should equal "total live contacts".
|
||||||
|
|
||||||
|
.headers on
|
||||||
|
.mode column
|
||||||
|
|
||||||
|
WITH
|
||||||
|
linked AS ( -- contacts referenced by at least one grid pill
|
||||||
|
SELECT DISTINCT c.id
|
||||||
|
FROM contacts c
|
||||||
|
WHERE c.deleted_at IS NULL
|
||||||
|
AND EXISTS (SELECT 1 FROM fundraising_contacts fc WHERE fc.contact_id = c.id)
|
||||||
|
),
|
||||||
|
unlinked AS ( -- B: live contacts with no pill pointing at them
|
||||||
|
SELECT c.id, c.contact_type
|
||||||
|
FROM contacts c
|
||||||
|
WHERE c.deleted_at IS NULL
|
||||||
|
AND NOT EXISTS (SELECT 1 FROM fundraising_contacts fc WHERE fc.contact_id = c.id)
|
||||||
|
)
|
||||||
|
SELECT 'total live contacts' AS metric, (SELECT COUNT(*) FROM contacts WHERE deleted_at IS NULL) AS n
|
||||||
|
UNION ALL SELECT 'A: linked (contact <-> grid pill)', (SELECT COUNT(*) FROM linked)
|
||||||
|
UNION ALL SELECT 'B: contacts-only (no grid pill)', (SELECT COUNT(*) FROM unlinked)
|
||||||
|
UNION ALL SELECT ' ...of B, contact_type=investor', (SELECT COUNT(*) FROM unlinked WHERE contact_type='investor')
|
||||||
|
UNION ALL SELECT ' ...of B, contact_type=prospect', (SELECT COUNT(*) FROM unlinked WHERE contact_type='prospect')
|
||||||
|
UNION ALL SELECT ' ...of B, with >=1 live communication', (SELECT COUNT(*) FROM unlinked u WHERE EXISTS (SELECT 1 FROM communications cm WHERE cm.contact_id=u.id AND cm.deleted_at IS NULL))
|
||||||
|
UNION ALL SELECT ' ...of B, with >=1 live opportunity', (SELECT COUNT(*) FROM unlinked u WHERE EXISTS (SELECT 1 FROM opportunities o WHERE o.contact_id=u.id AND o.deleted_at IS NULL))
|
||||||
|
UNION ALL SELECT 'C: pill-only (contact_id IS NULL)', (SELECT COUNT(*) FROM fundraising_contacts WHERE contact_id IS NULL)
|
||||||
|
UNION ALL SELECT ' dangling pills (contact_id set, no live contact)',
|
||||||
|
(SELECT COUNT(*) FROM fundraising_contacts fc
|
||||||
|
WHERE fc.contact_id IS NOT NULL
|
||||||
|
AND NOT EXISTS (SELECT 1 FROM contacts c WHERE c.id=fc.contact_id AND c.deleted_at IS NULL))
|
||||||
|
UNION ALL SELECT 'context: total grid pills (fundraising_contacts)', (SELECT COUNT(*) FROM fundraising_contacts)
|
||||||
|
UNION ALL SELECT 'context: total grid rows (fundraising_investors)', (SELECT COUNT(*) FROM fundraising_investors);
|
||||||
@@ -11,8 +11,8 @@ What it builds (into a SEPARATE dev DB, never crm.db):
|
|||||||
core migration (backend/migrations/), so the canonical/interaction/graph
|
core migration (backend/migrations/), so the canonical/interaction/graph
|
||||||
tables exist.
|
tables exist.
|
||||||
* A classic-model dataset: organizations, contacts (investors + prospects),
|
* A classic-model dataset: organizations, contacts (investors + prospects),
|
||||||
opportunities across pipeline stages, communications with entity-rich prose
|
opportunities across pipeline stages, and communications with entity-rich
|
||||||
notes, and lp_profiles.
|
prose notes.
|
||||||
* A fundraising grid (fundraising_state.grid_json) populated via the real
|
* A fundraising grid (fundraising_state.grid_json) populated via the real
|
||||||
sync_fundraising_relational() code path, so the normalized mirror + the
|
sync_fundraising_relational() code path, so the normalized mirror + the
|
||||||
grid->classic bridge behave exactly as in production.
|
grid->classic bridge behave exactly as in production.
|
||||||
@@ -179,7 +179,7 @@ def main():
|
|||||||
f"Prospect sourced via {random.choice(['X DM', 'warm intro', 'podcast'])}.", uid, now()))
|
f"Prospect sourced via {random.choice(['X DM', 'warm intro', 'podcast'])}.", uid, now()))
|
||||||
contacts.append((cid, first, last, org_name, "prospect"))
|
contacts.append((cid, first, last, org_name, "prospect"))
|
||||||
|
|
||||||
# ── opportunities + lp_profiles + communications ──
|
# ── opportunities + communications ──
|
||||||
stages = server.PIPELINE_STAGES
|
stages = server.PIPELINE_STAGES
|
||||||
for idx, (cid, first, last, org_name, ctype) in enumerate(contacts):
|
for idx, (cid, first, last, org_name, ctype) in enumerate(contacts):
|
||||||
person = f"{first} {last}"
|
person = f"{first} {last}"
|
||||||
@@ -193,25 +193,12 @@ def main():
|
|||||||
"expected_amount, probability, fund_name, description, next_step, owner_id, priority, updated_at) "
|
"expected_amount, probability, fund_name, description, next_step, owner_id, priority, updated_at) "
|
||||||
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
||||||
(gen(), f"{org_name or person} — {fund_label}", cid, org_ids.get(org_name) if org_name else None,
|
(gen(), f"{org_name or person} — {fund_label}", cid, org_ids.get(org_name) if org_name else None,
|
||||||
stage, amt if stage in ("committed", "funded") else 0, amt,
|
stage, amt if stage == "commitment" else 0, amt,
|
||||||
{"lead": 10, "outreach": 25, "meeting": 40, "due_diligence": 60, "committed": 90, "funded": 100}[stage],
|
{"lead": 10, "engaged": 35, "diligence": 60, "commitment": 90}[stage],
|
||||||
fund_label, f"Potential {fund_label} allocation for {person}.",
|
fund_label, f"Potential {fund_label} allocation for {person}.",
|
||||||
random.choice(["Send deck", "Schedule call", "Await IC", "Send subdocs"]),
|
random.choice(["Send deck", "Schedule call", "Await IC", "Send subdocs"]),
|
||||||
uid, random.choice(["low", "medium", "high"]), now()))
|
uid, random.choice(["low", "medium", "high"]), now()))
|
||||||
|
|
||||||
# lp_profile for ~closed investors
|
|
||||||
if ctype == "investor" and idx % 2 == 0:
|
|
||||||
amt = random.choice(AMOUNTS)
|
|
||||||
conn.execute(
|
|
||||||
"INSERT INTO lp_profiles (id, contact_id, commitment_amount, funded_amount, commitment_date, "
|
|
||||||
"fund_name, investor_type, accredited, legal_docs_signed, wire_received, k1_sent, notes, updated_at) "
|
|
||||||
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
|
||||||
(gen(), cid, amt, amt if idx % 4 == 0 else 0, past(120),
|
|
||||||
random.choice(list(FUND_LABELS.values())),
|
|
||||||
random.choice(["family_office", "institutional", "endowment", "individual"]),
|
|
||||||
1, 1 if idx % 3 else 0, 1 if idx % 4 == 0 else 0, 0,
|
|
||||||
f"Closed LP. Accreditation on file. Primary contact {person}.", now()))
|
|
||||||
|
|
||||||
# 2-4 communications each, entity-rich prose
|
# 2-4 communications each, entity-rich prose
|
||||||
for k in range(random.randint(2, 4)):
|
for k in range(random.randint(2, 4)):
|
||||||
ctype_comm, subj, body = random.choice(COMM_TEMPLATES)
|
ctype_comm, subj, body = random.choice(COMM_TEMPLATES)
|
||||||
@@ -275,7 +262,7 @@ def main():
|
|||||||
|
|
||||||
print(f"\nSynthetic dev DB written to: {db}")
|
print(f"\nSynthetic dev DB written to: {db}")
|
||||||
print(" Classic model:")
|
print(" Classic model:")
|
||||||
for t in ("organizations", "contacts", "opportunities", "communications", "lp_profiles"):
|
for t in ("organizations", "contacts", "opportunities", "communications"):
|
||||||
print(f" {t:<24} {count(t)}")
|
print(f" {t:<24} {count(t)}")
|
||||||
print(" Fundraising grid (after real sync):")
|
print(" Fundraising grid (after real sync):")
|
||||||
for t in ("fundraising_investors", "fundraising_contacts", "fundraising_funds",
|
for t in ("fundraising_investors", "fundraising_contacts", "fundraising_funds",
|
||||||
|
|||||||
+2024
-390
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,99 @@
|
|||||||
|
"""Minimal outbound SMTP sender for the CRM (daily digest, test sends).
|
||||||
|
|
||||||
|
Config comes ONLY from SMTP_* environment variables. Two ways those get set:
|
||||||
|
* dev / bare run: a local .env (see .env.example).
|
||||||
|
* Start9 box: docker_entrypoint.sh reads the files the "Configure Digest SMTP"
|
||||||
|
StartOS action writes under /data/secrets/smtp/ and exports them as env.
|
||||||
|
The backend never reads those files directly, so dev and prod share one path.
|
||||||
|
|
||||||
|
This is the package's OWN dedicated mailbox (per-package custom SMTP) — it is
|
||||||
|
independent of any StartOS system-wide SMTP account; nothing here calls into the
|
||||||
|
platform. Stdlib only (smtplib/ssl/email), consistent with the rest of runtime.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import smtplib
|
||||||
|
import ssl
|
||||||
|
from email.message import EmailMessage
|
||||||
|
|
||||||
|
|
||||||
|
class SMTPNotConfigured(Exception):
|
||||||
|
"""Raised when SMTP_* env is absent — callers turn this into a clear 'not
|
||||||
|
configured' response rather than a 500."""
|
||||||
|
|
||||||
|
|
||||||
|
def smtp_configured():
|
||||||
|
return bool(os.environ.get("SMTP_HOST", "").strip())
|
||||||
|
|
||||||
|
|
||||||
|
def load_smtp_config():
|
||||||
|
host = os.environ.get("SMTP_HOST", "").strip()
|
||||||
|
if not host:
|
||||||
|
raise SMTPNotConfigured("SMTP_HOST is not set")
|
||||||
|
# Port/security come from a free-text action field; normalize defensively.
|
||||||
|
try:
|
||||||
|
port = int(str(os.environ.get("SMTP_PORT", "") or "587").strip())
|
||||||
|
except ValueError:
|
||||||
|
port = 587
|
||||||
|
security = (os.environ.get("SMTP_SECURITY", "") or "starttls").strip().lower()
|
||||||
|
if security not in ("starttls", "tls", "none"):
|
||||||
|
security = "starttls"
|
||||||
|
return {
|
||||||
|
"host": host,
|
||||||
|
"port": port,
|
||||||
|
"from_addr": os.environ.get("SMTP_FROM", "").strip(),
|
||||||
|
"username": os.environ.get("SMTP_USERNAME", "").strip(),
|
||||||
|
"password": os.environ.get("SMTP_PASSWORD", ""),
|
||||||
|
"security": security,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _connect(cfg, timeout):
|
||||||
|
"""Open an authenticated SMTP connection per the configured security mode.
|
||||||
|
'tls' = implicit TLS (SMTPS, usually 465); 'starttls' = upgrade on 587;
|
||||||
|
'none' = plaintext (for a LAN relay that does its own transport security)."""
|
||||||
|
if cfg["security"] == "tls":
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
server = smtplib.SMTP_SSL(cfg["host"], cfg["port"], timeout=timeout, context=ctx)
|
||||||
|
else:
|
||||||
|
server = smtplib.SMTP(cfg["host"], cfg["port"], timeout=timeout)
|
||||||
|
server.ehlo()
|
||||||
|
if cfg["security"] == "starttls":
|
||||||
|
server.starttls(context=ssl.create_default_context())
|
||||||
|
server.ehlo()
|
||||||
|
if cfg["username"]:
|
||||||
|
server.login(cfg["username"], cfg["password"])
|
||||||
|
return server
|
||||||
|
|
||||||
|
|
||||||
|
def send_email(to_addrs, subject, body, *, html=None, cfg=None, timeout=30):
|
||||||
|
"""Send one message. `to_addrs` is a str or list; `body` is plain text and
|
||||||
|
`html` an optional HTML alternative. Returns {'sent_to', 'from'} on success;
|
||||||
|
raises SMTPNotConfigured / ValueError / smtplib.SMTPException otherwise."""
|
||||||
|
cfg = cfg or load_smtp_config()
|
||||||
|
if isinstance(to_addrs, str):
|
||||||
|
to_addrs = [to_addrs]
|
||||||
|
to_addrs = [a for a in (str(x).strip() for x in to_addrs) if a]
|
||||||
|
if not to_addrs:
|
||||||
|
raise ValueError("no recipients")
|
||||||
|
|
||||||
|
from_addr = cfg["from_addr"] or cfg["username"]
|
||||||
|
if not from_addr:
|
||||||
|
raise SMTPNotConfigured("no SMTP_FROM or SMTP_USERNAME to use as sender")
|
||||||
|
|
||||||
|
msg = EmailMessage()
|
||||||
|
msg["From"] = from_addr
|
||||||
|
msg["To"] = ", ".join(to_addrs)
|
||||||
|
msg["Subject"] = subject
|
||||||
|
msg.set_content(body)
|
||||||
|
if html:
|
||||||
|
msg.add_alternative(html, subtype="html")
|
||||||
|
|
||||||
|
server = _connect(cfg, timeout)
|
||||||
|
try:
|
||||||
|
server.send_message(msg, from_addr=from_addr, to_addrs=to_addrs)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
server.quit()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return {"sent_to": to_addrs, "from": from_addr}
|
||||||
@@ -0,0 +1,126 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Regression test for the /assets/ path-traversal containment fix (v0.1.0:74).
|
||||||
|
|
||||||
|
Before the fix, get_path()/urlparse did NOT normalize '..', so an unauthenticated
|
||||||
|
GET /assets/../../data/crm.db (raw client, no client-side normalization) escaped the
|
||||||
|
frontend root and read any file the process could — the LP DB, the JWT secret, the
|
||||||
|
Gmail key. The fix resolves the target with os.path.realpath and 404s anything that
|
||||||
|
does not stay under FRONTEND_ROOT (server.py, the `/assets/` branch of do_GET).
|
||||||
|
|
||||||
|
This boots the REAL server in-process against a throwaway frontend root, plants a
|
||||||
|
decoy "secret" OUTSIDE that root, and proves: (1) traversal vectors that resolve to a
|
||||||
|
real readable file outside the root still 404 and leak no bytes; (2) the live crm.db
|
||||||
|
path is 404'd; (3) URL-encoded separators don't help; (4) a legit in-bounds asset
|
||||||
|
still serves 200 (the fix isn't over-broad). Synthetic only (guardrail #9).
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_assets_traversal.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
# Lay out a throwaway tree BEFORE importing server (FRONTEND_DIR/ROOT resolve at import):
|
||||||
|
# base/frontend/{index.html,assets/app.css} <- the served root
|
||||||
|
# base/secret.txt <- a real file a traversal would target
|
||||||
|
# base/data/crm.db <- the live DB, created by init_db()
|
||||||
|
_BASE = tempfile.mkdtemp()
|
||||||
|
_FRONTEND = os.path.join(_BASE, "frontend")
|
||||||
|
os.makedirs(os.path.join(_FRONTEND, "assets"))
|
||||||
|
_DATA = os.path.join(_BASE, "data")
|
||||||
|
os.makedirs(_DATA)
|
||||||
|
with open(os.path.join(_FRONTEND, "index.html"), "w") as f:
|
||||||
|
f.write("<!doctype html><title>crm</title>")
|
||||||
|
_CSS_MARKER = "/* legit-asset-marker-7f3a */"
|
||||||
|
with open(os.path.join(_FRONTEND, "assets", "app.css"), "w") as f:
|
||||||
|
f.write(_CSS_MARKER)
|
||||||
|
_SECRET_MARKER = "TOPSECRET-JWT-zq19"
|
||||||
|
with open(os.path.join(_BASE, "secret.txt"), "w") as f:
|
||||||
|
f.write(_SECRET_MARKER)
|
||||||
|
|
||||||
|
os.environ["CRM_FRONTEND_DIR"] = _FRONTEND
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a): # keep the test output clean
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _get(port, path):
|
||||||
|
"""Raw GET with the path sent verbatim — http.client does NOT normalize '..',
|
||||||
|
which is exactly the unauthenticated raw-client threat the fix defends against."""
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
conn.request("GET", path)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
body = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
return resp.status, body
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db() # creates base/data/crm.db and the full schema
|
||||||
|
check(os.path.exists(os.environ["CRM_DB_PATH"]), "init_db created the live crm.db (a real traversal target)")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
# ── legit in-bounds asset still serves (containment is not over-broad) ──
|
||||||
|
print("\n[legit asset]")
|
||||||
|
st, body = _get(port, "/assets/app.css")
|
||||||
|
check(st == 200, f"in-bounds /assets/app.css serves 200 (got {st})")
|
||||||
|
check(_CSS_MARKER in body, "in-bounds asset body is served intact")
|
||||||
|
|
||||||
|
# ── traversal to a REAL file outside the root: 404, zero bytes leaked ──
|
||||||
|
print("\n[traversal -> decoy secret outside the root]")
|
||||||
|
for vec in ["/assets/../../secret.txt",
|
||||||
|
"/assets/../../../secret.txt",
|
||||||
|
"/assets/..%2f..%2fsecret.txt", # urlparse won't decode %2f
|
||||||
|
"/assets/..%2F..%2Fsecret.txt"]: # …nor uppercase %2F (some clients send it)
|
||||||
|
st, body = _get(port, vec)
|
||||||
|
check(st == 404, f"{vec} -> 404 (got {st})")
|
||||||
|
check(_SECRET_MARKER not in body, f"{vec} leaks no secret bytes")
|
||||||
|
|
||||||
|
# ── traversal to the live crm.db (the headline vector from the eval) ──
|
||||||
|
print("\n[traversal -> live crm.db]")
|
||||||
|
for vec in ["/assets/../../data/crm.db",
|
||||||
|
"/assets/../data/crm.db",
|
||||||
|
"/assets/..%2f..%2fdata%2fcrm.db"]:
|
||||||
|
st, body = _get(port, vec)
|
||||||
|
check(st == 404, f"{vec} -> 404 (got {st})")
|
||||||
|
check("SQLite format 3" not in body, f"{vec} leaks no DB header")
|
||||||
|
|
||||||
|
# ── deep absolute-style escape ──
|
||||||
|
print("\n[deep escape]")
|
||||||
|
st, body = _get(port, "/assets/../../../../../../../../etc/passwd")
|
||||||
|
check(st == 404, f"/assets/../../etc/passwd -> 404 (got {st})")
|
||||||
|
check("root:" not in body, "/etc/passwd not leaked")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (assets path-traversal containment)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,224 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the mobile Contacts card's grid-derived signals (Phase 8a).
|
||||||
|
|
||||||
|
GET /api/contacts enriches each classic contact with two read-only, live-derived fields
|
||||||
|
sourced from the fundraising grid (the canonical investor model), for the mobile card:
|
||||||
|
- `committed` -> the linked investor's total_invested (>0 drives the existing-LP avatar ring),
|
||||||
|
mirroring existing_investor_by_source_row (committed capital, not graveyard);
|
||||||
|
- `pipeline_stage` -> that investor's live derived stage (drives the card's stage pill),
|
||||||
|
or null when the investor isn't in the pipeline.
|
||||||
|
- `priority` -> that investor's priority flag (drives the mobile Contacts Priority sort, 8d).
|
||||||
|
- `source_row_id` -> that investor's grid row id (the "Open investor in Grid" deep-link target, 8h),
|
||||||
|
present for ANY grid-linked contact (even a zero-commit prospect), null otherwise.
|
||||||
|
A contact with no grid link (pure classic/legacy contact) gets committed 0 / stage null / priority false
|
||||||
|
/ source_row_id null.
|
||||||
|
Signals are derived fresh on read and never stored on the contact. Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_contacts_grid_signals.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(port, method, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
payload = None
|
||||||
|
if body is not None:
|
||||||
|
payload = json.dumps(body)
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
# One fund column so a non-zero cell rolls up into total_invested (the "existing LP" signal).
|
||||||
|
COLUMNS = [{"id": "fund1", "label": "Fund III", "isFund": True}]
|
||||||
|
ROW_ACME = {"id": "rowAcme", "investor_name": "Acme Capital", "priority": True, "fund1": 250000,
|
||||||
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]}
|
||||||
|
ROW_BETA = {"id": "rowBeta", "investor_name": "Beta Capital", "fund1": 0,
|
||||||
|
"contacts": [{"name": "Pat Roe", "email": "pat@beta.com", "title": ""}]}
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
return sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = _db()
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
# A pure classic contact with NO fundraising-grid link (not an investor).
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name,email,contact_type,status) "
|
||||||
|
"VALUES ('cLegacy','Vendor','Vince','vince@vendor.com','other','active')")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _by_email(contacts, email):
|
||||||
|
return next((c for c in contacts if (c.get("email") or "").lower() == email), None)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
st, _ = _req(port, "PUT", "/api/fundraising/state", token,
|
||||||
|
{"grid": {"columns": COLUMNS, "rows": [ROW_ACME, ROW_BETA]}, "views": []})
|
||||||
|
check(st == 200, f"seed grid via PUT /state (got {st})")
|
||||||
|
|
||||||
|
# Put Acme into the pipeline at 'engaged' so its contact's card shows a stage pill.
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/link", token,
|
||||||
|
{"source_row_id": "rowAcme", "stage": "engaged"})
|
||||||
|
check(st in (200, 201), f"link Acme to pipeline @engaged (got {st}, {d})")
|
||||||
|
|
||||||
|
st, d = _req(port, "GET", "/api/contacts?limit=500", token)
|
||||||
|
contacts = (d or {}).get("data") or []
|
||||||
|
check(st == 200 and contacts, f"GET /api/contacts (got {st}, {len(contacts)} contacts)")
|
||||||
|
|
||||||
|
jane = _by_email(contacts, "jane@acme.com")
|
||||||
|
pat = _by_email(contacts, "pat@beta.com")
|
||||||
|
vince = _by_email(contacts, "vince@vendor.com")
|
||||||
|
check(jane is not None, "Acme's synced contact Jane Doe is in the directory")
|
||||||
|
check(pat is not None, "Beta's synced contact Pat Roe is in the directory")
|
||||||
|
check(vince is not None, "the pure classic contact Vince is in the directory")
|
||||||
|
|
||||||
|
# ── existing-LP ring signal: committed reflects the linked investor's rollup ──
|
||||||
|
print("\n[committed: existing-LP ring driven by the linked investor's total_invested]")
|
||||||
|
check((jane or {}).get("committed") == 250000,
|
||||||
|
f"Jane.committed == 250000 (existing LP) (got {(jane or {}).get('committed')})")
|
||||||
|
check((pat or {}).get("committed") == 0,
|
||||||
|
f"Pat.committed == 0 (zero-commit prospect, no ring) (got {(pat or {}).get('committed')})")
|
||||||
|
check((vince or {}).get("committed") == 0,
|
||||||
|
f"Vince.committed == 0 (no grid link) (got {(vince or {}).get('committed')})")
|
||||||
|
|
||||||
|
# ── stage-pill signal: pipeline_stage is the investor's live derived stage ──
|
||||||
|
print("\n[pipeline_stage: stage pill driven by the investor's live opp stage]")
|
||||||
|
check((jane or {}).get("pipeline_stage") == "engaged",
|
||||||
|
f"Jane.pipeline_stage == 'engaged' (got {(jane or {}).get('pipeline_stage')!r})")
|
||||||
|
check((pat or {}).get("pipeline_stage") is None,
|
||||||
|
f"Pat.pipeline_stage is None (not in pipeline) (got {(pat or {}).get('pipeline_stage')!r})")
|
||||||
|
check((vince or {}).get("pipeline_stage") is None,
|
||||||
|
f"Vince.pipeline_stage is None (no grid link) (got {(vince or {}).get('pipeline_stage')!r})")
|
||||||
|
|
||||||
|
# ── priority signal: flagged investor → contact's Priority-sort key (8d) ──
|
||||||
|
print("\n[priority: Contacts Priority sort driven by the investor's priority flag]")
|
||||||
|
check((jane or {}).get("priority") is True,
|
||||||
|
f"Jane.priority is True (Acme flagged) (got {(jane or {}).get('priority')!r})")
|
||||||
|
check((pat or {}).get("priority") is False,
|
||||||
|
f"Pat.priority is False (Beta not flagged) (got {(pat or {}).get('priority')!r})")
|
||||||
|
check((vince or {}).get("priority") is False,
|
||||||
|
f"Vince.priority is False (no grid link) (got {(vince or {}).get('priority')!r})")
|
||||||
|
|
||||||
|
# ── source_row_id signal: the "Open investor in Grid" deep-link target (8h) ──
|
||||||
|
print("\n[source_row_id: Open-in-Grid deep-link target = the linked investor's grid row id]")
|
||||||
|
check((jane or {}).get("source_row_id") == "rowAcme",
|
||||||
|
f"Jane.source_row_id == 'rowAcme' (got {(jane or {}).get('source_row_id')!r})")
|
||||||
|
check((pat or {}).get("source_row_id") == "rowBeta",
|
||||||
|
f"Pat.source_row_id == 'rowBeta' (present for a zero-commit linked contact) (got {(pat or {}).get('source_row_id')!r})")
|
||||||
|
check((vince or {}).get("source_row_id") is None,
|
||||||
|
f"Vince.source_row_id is None (no grid link) (got {(vince or {}).get('source_row_id')!r})")
|
||||||
|
|
||||||
|
# ── the get-by-id endpoint carries the same signals (mobile detail sheet, 8b) ──
|
||||||
|
print("\n[get-by-id: /api/contacts/{id} also injects committed + pipeline_stage]")
|
||||||
|
st, d = _req(port, "GET", f"/api/contacts/{jane['id']}", token)
|
||||||
|
detail = (d or {}).get("data") or {}
|
||||||
|
check(st == 200 and detail.get("committed") == 250000 and detail.get("pipeline_stage") == "engaged"
|
||||||
|
and detail.get("priority") is True,
|
||||||
|
f"detail carries committed/pipeline_stage/priority (got committed={detail.get('committed')}, stage={detail.get('pipeline_stage')!r}, priority={detail.get('priority')!r})")
|
||||||
|
st, d = _req(port, "GET", f"/api/contacts/{vince['id']}", token)
|
||||||
|
vdetail = (d or {}).get("data") or {}
|
||||||
|
check(st == 200 and vdetail.get("committed") == 0 and vdetail.get("pipeline_stage") is None
|
||||||
|
and vdetail.get("priority") is False,
|
||||||
|
f"unlinked contact detail has committed 0 / stage None / priority False (got {vdetail.get('committed')}, {vdetail.get('pipeline_stage')!r}, {vdetail.get('priority')!r})")
|
||||||
|
|
||||||
|
# ── stage tracks the board: advancing the opp re-derives the contact's stage ──
|
||||||
|
print("\n[derived-live: advancing the board stage re-derives the contact's pill]")
|
||||||
|
opp_id = None
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
for r in (d or {}).get("data", {}).get("grid", {}).get("rows", []):
|
||||||
|
if r.get("id") == "rowAcme":
|
||||||
|
opp_id = r.get("opportunity_id")
|
||||||
|
st, _ = _req(port, "PATCH", f"/api/opportunities/{opp_id}/stage", token, {"stage": "diligence"})
|
||||||
|
check(st == 200, f"advance Acme's opp -> diligence (got {st})")
|
||||||
|
st, d = _req(port, "GET", "/api/contacts?limit=500", token)
|
||||||
|
jane2 = _by_email((d or {}).get("data") or [], "jane@acme.com")
|
||||||
|
check((jane2 or {}).get("pipeline_stage") == "diligence",
|
||||||
|
f"Jane.pipeline_stage re-derives to 'diligence' (got {(jane2 or {}).get('pipeline_stage')!r})")
|
||||||
|
|
||||||
|
# ── dedup: a contact linked to two investors exposes the highest-committed one ──
|
||||||
|
print("\n[dedup: highest-committed linked investor wins for a multi-linked contact]")
|
||||||
|
c = _db()
|
||||||
|
# Link Jane's classic contact to a SECOND, richer investor (direct rows — the grid sync
|
||||||
|
# makes one link per pill; this exercises the multi-link branch in contact_grid_signals).
|
||||||
|
c.execute("INSERT INTO fundraising_investors (id, investor_name, source_row_id, total_invested) "
|
||||||
|
"VALUES ('inv2','Mega Fund LP','rowMega',500000)")
|
||||||
|
c.execute("INSERT INTO fundraising_contacts (id, investor_id, full_name, contact_id) "
|
||||||
|
"VALUES ('fc2','inv2','Jane Doe',?)", (jane['id'],))
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
st, d = _req(port, "GET", f"/api/contacts/{jane['id']}", token)
|
||||||
|
jd = (d or {}).get("data") or {}
|
||||||
|
check(jd.get("committed") == 500000,
|
||||||
|
f"multi-linked contact exposes the higher committed (500000 > 250000) (got {jd.get('committed')})")
|
||||||
|
# The winning (higher-committed) link is Mega Fund LP, which is not flagged → priority follows it.
|
||||||
|
check(jd.get("priority") is False,
|
||||||
|
f"multi-linked contact's priority follows the higher-committed investor (Mega, unflagged) (got {jd.get('priority')!r})")
|
||||||
|
# The deep-link target also follows the winning link → Mega's grid row (rowMega), not rowAcme.
|
||||||
|
check(jd.get("source_row_id") == "rowMega",
|
||||||
|
f"multi-linked contact's source_row_id follows the higher-committed investor (rowMega) (got {jd.get('source_row_id')!r})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for m in FAILS:
|
||||||
|
print(" - " + m)
|
||||||
|
sys.exit(1)
|
||||||
|
print("All contacts-grid-signals tests passed.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,135 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Regression test for the dashboard KPI repoint + lp_profiles retirement (2026-06-16).
|
||||||
|
|
||||||
|
"Total Committed" used to SUM lp_profiles.commitment_amount — an orphaned table with no
|
||||||
|
reachable input path, so the dashboard read ~$0 while the real commitments lived in the
|
||||||
|
fundraising grid. It now sums fundraising_investors.total_invested (the canonical grid
|
||||||
|
rollup) with graveyarded (written-off) investors excluded, "Total Funded" is dropped
|
||||||
|
(the grid has no funded-vs-committed concept), and the /api/lp-profiles* + lp-breakdown
|
||||||
|
endpoints are gone.
|
||||||
|
|
||||||
|
v0.1.0:106 repointed "Total LPs" / "Prospects" off the retired contacts.contact_type onto
|
||||||
|
the canonical grid (investor entities): an LP = a grid investor with total_invested > 0
|
||||||
|
(graveyard excluded); a prospect = a live grid row with $0 committed (graveyard + the
|
||||||
|
'Untitled Investor' blank-row placeholder excluded).
|
||||||
|
|
||||||
|
This boots the REAL server against a temp DB, seeds grid investors (live LP, graveyarded,
|
||||||
|
live prospect, blank placeholder), and asserts: total_committed reflects the live grid
|
||||||
|
rollup only, total_lps / total_prospects use the grid-entity definitions, the metrics no
|
||||||
|
longer carry a total_funded key, and the retired routes 404. Synthetic only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_dashboard_report.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _get(port, path, token):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
conn.request("GET", path, headers={"Authorization": "Bearer " + token})
|
||||||
|
resp = conn.getresponse()
|
||||||
|
body = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if body:
|
||||||
|
try:
|
||||||
|
data = json.loads(body)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
# live investor committed 3,000,000; graveyarded investor committed 500,000 (must be excluded)
|
||||||
|
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested,graveyard) "
|
||||||
|
"VALUES ('fiLive','Harbor LP','rowLive',3000000,0)")
|
||||||
|
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested,graveyard) "
|
||||||
|
"VALUES ('fiDead','Passed LP','rowDead',500000,1)")
|
||||||
|
# a live prospect (in the grid, $0 committed) and a blank placeholder row — the prospect
|
||||||
|
# count includes the former and excludes the latter ('Untitled Investor' = a blank grid row)
|
||||||
|
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested,graveyard) "
|
||||||
|
"VALUES ('fiProspect','Prospect Co','rowProspect',0,0)")
|
||||||
|
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested,graveyard) "
|
||||||
|
"VALUES ('fiBlank','Untitled Investor','rowBlank',0,0)")
|
||||||
|
# one live + one soft-deleted contact: total_contacts must count only the live one
|
||||||
|
# (guards the deleted_at filter added alongside the contact_type repoint)
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name) VALUES ('ctLive','Ann','Live')")
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name,deleted_at) "
|
||||||
|
"VALUES ('ctGone','Bob','Gone','2026-06-01T00:00:00Z')")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
print("\n[dashboard total_committed comes from the grid, graveyard excluded]")
|
||||||
|
st, dash = _get(port, "/api/reports/dashboard", token)
|
||||||
|
check(st == 200, f"GET dashboard -> 200 (got {st})")
|
||||||
|
metrics = (dash or {}).get("data", {}).get("metrics", {})
|
||||||
|
check(metrics.get("total_committed") == 3000000,
|
||||||
|
f"total_committed = live grid rollup only (3,000,000; got {metrics.get('total_committed')})")
|
||||||
|
check("total_funded" not in metrics,
|
||||||
|
f"total_funded key dropped from metrics (got keys {sorted(metrics)})")
|
||||||
|
|
||||||
|
print("\n[Total LPs / Prospects derived from the grid, not the retired contacts.contact_type]")
|
||||||
|
check(metrics.get("total_lps") == 1,
|
||||||
|
f"total_lps = grid investors committed>0, graveyard excluded (1; got {metrics.get('total_lps')})")
|
||||||
|
check(metrics.get("total_prospects") == 1,
|
||||||
|
f"total_prospects = grid rows with $0 committed; graveyard + 'Untitled Investor' excluded (1; got {metrics.get('total_prospects')})")
|
||||||
|
check(metrics.get("total_contacts") == 1,
|
||||||
|
f"total_contacts excludes soft-deleted contacts (1; got {metrics.get('total_contacts')})")
|
||||||
|
|
||||||
|
print("\n[retired lp_profiles endpoints 404]")
|
||||||
|
for path in ("/api/lp-profiles", "/api/lp-profiles/anything", "/api/reports/lp-breakdown"):
|
||||||
|
st, _ = _get(port, path, token)
|
||||||
|
check(st == 404, f"GET {path} -> 404 (got {st})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (dashboard KPI repoint + lp_profiles retirement)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,419 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the daily activity digest (Phase B): the per-user + per-investor
|
||||||
|
activity queries (soft-delete filtered), inbound dedup, the two-section body, the
|
||||||
|
local-summary fallback, the DB-backed policy resolver, and the scheduler's
|
||||||
|
once-per-day / send-hour / policy / force guards.
|
||||||
|
|
||||||
|
The local Spark model and the mail transport are stubbed — no network. Synthetic
|
||||||
|
data only (guardrail #9).
|
||||||
|
Run: cd backend && python3 test_digest_builder.py
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(tempfile.mkdtemp(), "crm.db")
|
||||||
|
os.environ.setdefault("CRM_DATA_DIR", os.path.dirname(os.environ["CRM_DB_PATH"]))
|
||||||
|
os.environ["CRM_DIGEST_ENABLED"] = "1" # so the non-force scheduler path is live
|
||||||
|
|
||||||
|
import digest_builder # noqa: E402
|
||||||
|
from email_integration import digest_scheduler # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
SINCE = "2026-06-17T00:00:00Z"
|
||||||
|
UNTIL = "2026-06-18T00:00:00Z"
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def _conn():
|
||||||
|
conn = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
conn = _conn()
|
||||||
|
conn.executescript("""
|
||||||
|
CREATE TABLE users (id TEXT PRIMARY KEY, username TEXT, full_name TEXT,
|
||||||
|
email TEXT, role TEXT, is_active INT DEFAULT 1);
|
||||||
|
CREATE TABLE email_accounts (id TEXT PRIMARY KEY, user_id TEXT, email_address TEXT);
|
||||||
|
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT,
|
||||||
|
from_email TEXT, sent_at TEXT, is_matched INT DEFAULT 1);
|
||||||
|
CREATE TABLE email_account_messages (id TEXT PRIMARY KEY, email_id TEXT, account_id TEXT,
|
||||||
|
gmail_message_id TEXT, gmail_thread_id TEXT, is_sent INT DEFAULT 0, deleted_at TEXT);
|
||||||
|
CREATE TABLE email_investor_links (id TEXT PRIMARY KEY, email_id TEXT,
|
||||||
|
fundraising_investor_id TEXT, organization_id TEXT, contact_id TEXT, matched_address TEXT);
|
||||||
|
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT);
|
||||||
|
CREATE TABLE organizations (id TEXT PRIMARY KEY, name TEXT, deleted_at TEXT);
|
||||||
|
CREATE TABLE contacts (id TEXT PRIMARY KEY, first_name TEXT, last_name TEXT, deleted_at TEXT);
|
||||||
|
CREATE TABLE app_settings (key TEXT PRIMARY KEY, value_json TEXT, updated_at TEXT);
|
||||||
|
""")
|
||||||
|
conn.executemany("INSERT INTO users (id,username,full_name,email,role,is_active) VALUES (?,?,?,?,?,?)", [
|
||||||
|
("u1", "grant", "Grant Gilliam", "grant@ten31.xyz", "admin", 1),
|
||||||
|
("u2", "jk", "Jonathan K", "jk@ten31.xyz", "member", 1),
|
||||||
|
("u3", "retired", "Old Admin", "old@ten31.xyz", "admin", 0), # inactive -> excluded
|
||||||
|
])
|
||||||
|
conn.executemany("INSERT INTO email_accounts (id,user_id,email_address) VALUES (?,?,?)", [
|
||||||
|
("a1", "u1", "grant@ten31.xyz"), ("a2", "u2", "jk@ten31.xyz"), ("a3", "u3", "old@ten31.xyz"),
|
||||||
|
])
|
||||||
|
conn.executemany("INSERT INTO fundraising_investors (id,investor_name) VALUES (?,?)", [
|
||||||
|
("inv1", "Harbor & Vine"), ("inv2", "Brightwater Capital"), ("inv3", "Vela Partners"),
|
||||||
|
])
|
||||||
|
conn.executemany("INSERT INTO organizations (id,name,deleted_at) VALUES (?,?,?)", [
|
||||||
|
("org1", "Summit Fund", None), ("org2", "Deleted Org", "2026-06-01T00:00:00Z"),
|
||||||
|
])
|
||||||
|
conn.executemany("INSERT INTO contacts (id,first_name,last_name,deleted_at) VALUES (?,?,?,?)", [
|
||||||
|
("c1", "Jane", "Roe", None),
|
||||||
|
])
|
||||||
|
# emails: id, subject, body, from_email, sent_at, is_matched. Outbound = from one
|
||||||
|
# of our own mailboxes (grant@/jk@/old@ ten31.xyz); inbound = from outside.
|
||||||
|
conn.executemany("INSERT INTO emails (id,subject,body_text,from_email,sent_at,is_matched) VALUES (?,?,?,?,?,?)", [
|
||||||
|
("e1", "Fund III terms", "Discussing Fund III terms", "grant@ten31.xyz", "2026-06-17T14:00:00Z", 1),
|
||||||
|
("e2", "Re: allocation", "Question about allocation", "lp@brightwater.example", "2026-06-17T09:00:00Z", 1),
|
||||||
|
("e3", "Intro", "Summit intro thread", "jk@ten31.xyz", "2026-06-17T11:00:00Z", 1),
|
||||||
|
("e4", "Coffee", "Catch up note", "jane@roe.example", "2026-06-17T16:00:00Z", 1),
|
||||||
|
("e5", "Wire", "Wire instructions", "ir@summitcap.example", "2026-06-17T17:00:00Z", 1),
|
||||||
|
("e6", "Old", "ancient", "grant@ten31.xyz", "2026-06-10T10:00:00Z", 1), # out of window
|
||||||
|
("e7", "Tombstoned", "deleted sighting", "lp@harborvine.example", "2026-06-17T08:00:00Z", 1),
|
||||||
|
("e8", "Inactive", "from retired user", "old@ten31.xyz", "2026-06-17T12:00:00Z", 1),
|
||||||
|
("e9", "Unmatched", "not matched", "lp@harborvine.example", "2026-06-17T13:00:00Z", 0), # is_matched=0
|
||||||
|
("e10", "Group update", "inbound to two of us", "lp@vela.example", "2026-06-17T15:00:00Z", 1),
|
||||||
|
])
|
||||||
|
# sightings: id, email_id, account_id, is_sent, deleted_at
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO email_account_messages (id,email_id,account_id,gmail_message_id,gmail_thread_id,is_sent,deleted_at) "
|
||||||
|
"VALUES (?,?,?,?,?,?,?)", [
|
||||||
|
("s1", "e1", "a1", "g1", "t1", 1, None), # grant SENT
|
||||||
|
("s2", "e2", "a1", "g2", "t2", 0, None), # grant RECEIVED
|
||||||
|
("s3", "e3", "a2", "g3", "t3", 1, None), # jk SENT
|
||||||
|
("s4", "e4", "a1", "g4", "t4", 0, None), # grant RECEIVED (contact)
|
||||||
|
("s5", "e5", "a1", "g5", "t5", 0, None), # grant RECEIVED (deleted org)
|
||||||
|
("s6", "e6", "a1", "g6", "t6", 0, None), # out of window
|
||||||
|
("s7", "e7", "a1", "g7", "t7", 0, "2026-06-17T09:00:00Z"), # tombstoned
|
||||||
|
("s8", "e8", "a3", "g8", "t8", 1, None), # inactive user
|
||||||
|
("s9", "e9", "a1", "g9", "t9", 0, None), # unmatched email
|
||||||
|
("s10a", "e10", "a1", "g10a", "t10", 0, None), # e10 received by grant ...
|
||||||
|
("s10b", "e10", "a2", "g10b", "t10", 0, None), # ... and by jk (dedup target)
|
||||||
|
])
|
||||||
|
# investor links: id, email_id, fr_investor, org, contact, matched_address
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,organization_id,contact_id,matched_address) "
|
||||||
|
"VALUES (?,?,?,?,?,?)", [
|
||||||
|
("l1", "e1", "inv1", None, None, "lp@harborvine.example"),
|
||||||
|
("l2", "e2", "inv2", None, None, "lp@brightwater.example"),
|
||||||
|
("l3", "e3", None, "org1", None, "ir@summitfund.example"), # org name
|
||||||
|
("l4", "e4", None, None, "c1", "jane@roe.example"), # contact name
|
||||||
|
("l5", "e5", None, "org2", None, "ir@summitcap.example"), # deleted org -> address
|
||||||
|
("l6", "e6", "inv1", None, None, "lp@harborvine.example"),
|
||||||
|
("l7", "e7", "inv1", None, None, "lp@harborvine.example"),
|
||||||
|
("l8", "e8", "inv1", None, None, "lp@harborvine.example"),
|
||||||
|
("l9", "e9", "inv1", None, None, "lp@harborvine.example"),
|
||||||
|
("l10", "e10", "inv3", None, None, "lp@vela.example"),
|
||||||
|
])
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_collect():
|
||||||
|
conn = _conn()
|
||||||
|
groups = digest_builder.collect_user_activity(conn, SINCE, UNTIL)
|
||||||
|
conn.close()
|
||||||
|
check(len(groups) == 2, f"two active users with activity (grant, jk), got {len(groups)}")
|
||||||
|
by_user = {g["user_id"]: g for g in groups}
|
||||||
|
check("u3" not in by_user, "inactive user (u3) excluded")
|
||||||
|
|
||||||
|
grant = by_user.get("u1")
|
||||||
|
if not grant:
|
||||||
|
FAILS.append("grant group missing"); return
|
||||||
|
ids = set(e["email_id"] for e in grant["emails"])
|
||||||
|
check(ids == {"e1", "e2", "e4", "e5", "e10"},
|
||||||
|
f"grant has e1,e2,e4,e5,e10 (e6 out-of-window, e7 tombstoned, e9 unmatched excluded), got {sorted(ids)}")
|
||||||
|
check(grant["sent"] == 1 and grant["received"] == 4, f"grant 1 sent / 4 received, got {grant['sent']}/{grant['received']}")
|
||||||
|
e1 = next(e for e in grant["emails"] if e["email_id"] == "e1")
|
||||||
|
check(e1["direction"] == "sent", "e1 direction sent")
|
||||||
|
check(e1["investors"] == ["Harbor & Vine"], f"e1 investor = grid name, got {e1['investors']}")
|
||||||
|
e4 = next(e for e in grant["emails"] if e["email_id"] == "e4")
|
||||||
|
check(e4["investors"] == ["Jane Roe"], f"e4 investor = contact fallback name, got {e4['investors']}")
|
||||||
|
e5 = next(e for e in grant["emails"] if e["email_id"] == "e5")
|
||||||
|
check(e5["investors"] == ["ir@summitcap.example"], f"e5 investor = address (deleted org skipped), got {e5['investors']}")
|
||||||
|
|
||||||
|
jk = by_user.get("u2")
|
||||||
|
check(jk and jk["emails"][0]["investors"] == ["Summit Fund"], "jk e3 investor = org name")
|
||||||
|
|
||||||
|
|
||||||
|
def test_investor():
|
||||||
|
conn = _conn()
|
||||||
|
inv = digest_builder.collect_investor_activity(conn, SINCE, UNTIL)
|
||||||
|
conn.close()
|
||||||
|
by_name = {g["name"]: g for g in inv}
|
||||||
|
# Harbor & Vine, Brightwater, Vela Partners, Summit Fund, Jane Roe, ir@summitcap.example
|
||||||
|
check(len(inv) == 6, f"six investors with activity, got {len(inv)}: {sorted(by_name)}")
|
||||||
|
|
||||||
|
hv = by_name.get("Harbor & Vine")
|
||||||
|
check(hv and hv["outbound"] == 1 and hv["inbound"] == 0, f"Harbor & Vine = 1 out / 0 in, got {hv}")
|
||||||
|
check(hv and hv["emails"][0]["members"] == ["Grant Gilliam"], f"outbound attributed to sender, got {hv and hv['emails'][0]['members']}")
|
||||||
|
|
||||||
|
bw = by_name.get("Brightwater Capital")
|
||||||
|
check(bw and bw["inbound"] == 1 and bw["outbound"] == 0, f"Brightwater = 1 in / 0 out, got {bw}")
|
||||||
|
|
||||||
|
# e10 was received by TWO mailboxes (grant + jk) -> dedup to one inbound email
|
||||||
|
vela = by_name.get("Vela Partners")
|
||||||
|
check(vela and vela["total"] == 1 and vela["inbound"] == 1,
|
||||||
|
f"Vela inbound deduped across 2 sightings -> 1, got {vela}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_and_empty():
|
||||||
|
conn = _conn()
|
||||||
|
stub = lambda prompt, system=None, max_tokens=220: "Grant worked with Harbor & Vine on Fund III."
|
||||||
|
d = digest_builder.build_digest(conn, SINCE, UNTIL, chat_fn=stub)
|
||||||
|
check(d["has_activity"] is True, "build_digest has_activity True when there is activity")
|
||||||
|
check(d["user_count"] == 2 and d["email_count"] == 7 and d["investor_count"] == 6,
|
||||||
|
f"counts: 2 users / 7 emails / 6 investors, got {d['user_count']}/{d['email_count']}/{d['investor_count']}")
|
||||||
|
body = d["body"]
|
||||||
|
check("Daily Activity Digest" in body, "body has title")
|
||||||
|
check("BY TEAM MEMBER" in body and "BY INVESTOR" in body, "body has both sections")
|
||||||
|
check("Grant Gilliam" in body and "Jonathan K" in body, "body names both active users")
|
||||||
|
check("Harbor & Vine" in body and "Brightwater Capital" in body and "Vela Partners" in body,
|
||||||
|
"investor section lists investors")
|
||||||
|
check("Grant worked with Harbor & Vine on Fund III." in body, "body includes the local narrative")
|
||||||
|
|
||||||
|
empty = digest_builder.build_digest(conn, "2030-01-01T00:00:00Z", "2030-01-02T00:00:00Z", chat_fn=stub)
|
||||||
|
check(empty["has_activity"] is False, "empty window -> has_activity False")
|
||||||
|
check("No tracked email activity" in empty["body"], "empty window -> 'no activity' note (always-send)")
|
||||||
|
check("BY INVESTOR" not in empty["body"], "empty window -> no section headers")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_reminders_due():
|
||||||
|
"""The reminders-due section: overdue + due-today only (future / done / soft-deleted
|
||||||
|
excluded), rendered even on an empty email window. Creates + drops the reminders table
|
||||||
|
so the rest of the suite still exercises the table-absent path."""
|
||||||
|
from datetime import date, timedelta
|
||||||
|
conn = _conn()
|
||||||
|
conn.execute("""CREATE TABLE reminders (id TEXT PRIMARY KEY, investor_id TEXT,
|
||||||
|
investor_name TEXT, contact_id TEXT, title TEXT, details TEXT, due_date TEXT,
|
||||||
|
status TEXT DEFAULT 'open', snoozed_until TEXT, assignee_id TEXT, created_by TEXT,
|
||||||
|
source TEXT, completed_at TEXT, created_at TEXT, updated_at TEXT, deleted_at TEXT)""")
|
||||||
|
today = date.today().isoformat()
|
||||||
|
yest = (date.today() - timedelta(days=1)).isoformat()
|
||||||
|
future = (date.today() + timedelta(days=30)).isoformat()
|
||||||
|
conn.executemany(
|
||||||
|
"INSERT INTO reminders (id,investor_name,title,due_date,status,assignee_id,deleted_at) "
|
||||||
|
"VALUES (?,?,?,?,?,?,?)", [
|
||||||
|
("r1", "Harbor & Vine", "Send wire instructions", yest, "open", "u1", None), # overdue
|
||||||
|
("r2", "Brightwater Capital", "Call about allocation", today, "open", None, None), # due today
|
||||||
|
("r3", "Vela Partners", "Quarterly touch", future, "open", "u1", None), # future -> hidden
|
||||||
|
("r4", "Gone LP", "Done already", yest, "done", "u1", None), # done -> hidden
|
||||||
|
("r5", "Deleted LP", "Tombstoned", yest, "open", "u1", "2026-06-01T00:00:00Z"), # deleted -> hidden
|
||||||
|
])
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
due = digest_builder.collect_due_reminders(conn, today)
|
||||||
|
titles = {r["title"] for r in due}
|
||||||
|
check(titles == {"Send wire instructions", "Call about allocation"},
|
||||||
|
f"due collector = overdue + due-today only (got {titles})")
|
||||||
|
overdue = [r for r in due if r["overdue"]]
|
||||||
|
check(len(overdue) == 1 and overdue[0]["title"] == "Send wire instructions", "overdue flagged")
|
||||||
|
|
||||||
|
stub = lambda prompt, system=None, max_tokens=220: "narrative"
|
||||||
|
d = digest_builder.build_digest(conn, SINCE, UNTIL, chat_fn=stub)
|
||||||
|
check(d["reminder_count"] == 2, f"reminder_count = 2 (got {d['reminder_count']})")
|
||||||
|
check("REMINDERS DUE (2)" in d["body"], "body has reminders section header")
|
||||||
|
check("Overdue (1):" in d["body"] and "Due today (1):" in d["body"], "body splits overdue / due today")
|
||||||
|
check("Harbor & Vine — Send wire instructions" in d["body"]
|
||||||
|
and "[Grant Gilliam]" in d["body"], "reminder line shows investor + title + resolved assignee")
|
||||||
|
check("Quarterly touch" not in d["body"], "future reminder excluded from due section")
|
||||||
|
|
||||||
|
empty = digest_builder.build_digest(conn, "2030-01-01T00:00:00Z", "2030-01-02T00:00:00Z", chat_fn=stub)
|
||||||
|
check("No tracked email activity" in empty["body"] and "REMINDERS DUE (2)" in empty["body"],
|
||||||
|
"reminders render even on an empty email window (current-state addendum)")
|
||||||
|
|
||||||
|
conn.execute("DROP TABLE reminders")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_policy():
|
||||||
|
conn = _conn()
|
||||||
|
# No DB row yet: CRM_DIGEST_ENABLED=1 (set at import) seeds enabled; hour defaults 18.
|
||||||
|
pol = digest_builder.load_digest_policy(conn)
|
||||||
|
check(pol["enabled"] is True and pol["send_hour"] == 18, f"env seed -> enabled, hour 18, got {pol}")
|
||||||
|
# A DB row wins over the env seed (the admin-panel control).
|
||||||
|
conn.execute("INSERT OR REPLACE INTO app_settings (key,value_json,updated_at) VALUES (?,?,?)",
|
||||||
|
(digest_builder.DIGEST_POLICY_KEY, json.dumps({"enabled": False, "send_hour": 9}), "x"))
|
||||||
|
conn.commit()
|
||||||
|
pol2 = digest_builder.load_digest_policy(conn)
|
||||||
|
check(pol2["enabled"] is False and pol2["send_hour"] == 9, f"DB policy wins over env, got {pol2}")
|
||||||
|
conn.execute("DELETE FROM app_settings WHERE key = ?", (digest_builder.DIGEST_POLICY_KEY,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_summary_fallback():
|
||||||
|
grp = {"user_id": "u1", "full_name": "Grant Gilliam", "username": "grant",
|
||||||
|
"emails": [{"direction": "sent", "subject": "x", "investors": ["Harbor & Vine"], "text": "hi"}],
|
||||||
|
"investors": ["Harbor & Vine"], "sent": 1, "received": 0, "total": 1}
|
||||||
|
def boom(*a, **k):
|
||||||
|
raise RuntimeError("spark down")
|
||||||
|
out = digest_builder.summarize_user_day(grp, chat_fn=boom)
|
||||||
|
check("Grant Gilliam" in out and "1 sent" in out and "unavailable" in out.lower(),
|
||||||
|
f"fallback narrative on chat error, got: {out}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_scheduler_guards():
|
||||||
|
sent_calls = []
|
||||||
|
build_fn = lambda conn, since, until: {"subject": "S", "body": "B",
|
||||||
|
"has_activity": True, "user_count": 1, "email_count": 2}
|
||||||
|
def send_fn(conn, to_addrs, subject, body, sender_email=None):
|
||||||
|
sent_calls.append(list(to_addrs))
|
||||||
|
return {"transport": "stub"}
|
||||||
|
|
||||||
|
factory = _conn
|
||||||
|
utc = datetime(2026, 6, 18, 1, 0, tzinfo=timezone.utc)
|
||||||
|
|
||||||
|
# Before the send hour (09:00 local < 18:00) -> no send
|
||||||
|
r = digest_scheduler.maybe_send_digest(factory, now_local=datetime(2026, 6, 18, 9, 0),
|
||||||
|
now_utc=utc, build_fn=build_fn, send_fn=send_fn)
|
||||||
|
check(r["status"] == "before_send_hour" and not sent_calls, f"before send hour -> skip, got {r}")
|
||||||
|
|
||||||
|
# At/after the send hour -> sends once, only to the active admin (grant)
|
||||||
|
r = digest_scheduler.maybe_send_digest(factory, now_local=datetime(2026, 6, 18, 19, 0),
|
||||||
|
now_utc=utc, build_fn=build_fn, send_fn=send_fn)
|
||||||
|
check(r["status"] == "sent" and len(sent_calls) == 1, f"after send hour -> sends, got {r}")
|
||||||
|
check(sent_calls and sent_calls[-1] == ["grant@ten31.xyz"], f"recipients = active admins only, got {sent_calls[-1]}")
|
||||||
|
# The window cursor must advance to the send time so a missed day rolls forward
|
||||||
|
# (since, now] — not be left unset/stale.
|
||||||
|
conn = _conn()
|
||||||
|
cursor_at = digest_scheduler._get_setting(conn, digest_scheduler._LAST_AT_KEY)
|
||||||
|
conn.close()
|
||||||
|
check(cursor_at == digest_scheduler._utc_iso(utc),
|
||||||
|
f"window cursor advanced to send time, got {cursor_at}")
|
||||||
|
|
||||||
|
# Same local day again -> suppressed (once-per-day guard)
|
||||||
|
r = digest_scheduler.maybe_send_digest(factory, now_local=datetime(2026, 6, 18, 20, 0),
|
||||||
|
now_utc=utc, build_fn=build_fn, send_fn=send_fn)
|
||||||
|
check(r["status"] == "already_sent_today" and len(sent_calls) == 1, f"second send same day -> skip, got {r}")
|
||||||
|
|
||||||
|
# force=True ignores the hour + once-per-day guard, and does NOT touch the cursor
|
||||||
|
conn = _conn()
|
||||||
|
before = digest_scheduler._get_setting(conn, digest_scheduler._LAST_DATE_KEY)
|
||||||
|
conn.close()
|
||||||
|
r = digest_scheduler.maybe_send_digest(factory, force=True, now_local=datetime(2026, 6, 18, 3, 0),
|
||||||
|
now_utc=utc, build_fn=build_fn, send_fn=send_fn)
|
||||||
|
check(r["status"] == "sent" and len(sent_calls) == 2, f"force sends regardless of guards, got {r}")
|
||||||
|
conn = _conn()
|
||||||
|
after = digest_scheduler._get_setting(conn, digest_scheduler._LAST_DATE_KEY)
|
||||||
|
conn.close()
|
||||||
|
check(before == after, "force send does not advance the daily cursor")
|
||||||
|
|
||||||
|
# DB policy disabled -> daily path skips even past the hour; force still sends.
|
||||||
|
conn = _conn()
|
||||||
|
digest_scheduler._set_setting(conn, digest_builder.DIGEST_POLICY_KEY, {"enabled": False, "send_hour": 18})
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
r = digest_scheduler.maybe_send_digest(factory, now_local=datetime(2026, 6, 19, 19, 0),
|
||||||
|
now_utc=utc, build_fn=build_fn, send_fn=send_fn)
|
||||||
|
check(r["status"] == "disabled" and len(sent_calls) == 2, f"DB-disabled policy skips daily send, got {r}")
|
||||||
|
r = digest_scheduler.maybe_send_digest(factory, force=True, now_local=datetime(2026, 6, 19, 2, 0),
|
||||||
|
now_utc=utc, build_fn=build_fn, send_fn=send_fn)
|
||||||
|
check(r["status"] == "sent" and len(sent_calls) == 3, f"force overrides disabled policy, got {r}")
|
||||||
|
conn = _conn()
|
||||||
|
conn.execute("DELETE FROM app_settings WHERE key = ?", (digest_builder.DIGEST_POLICY_KEY,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_window_resolver():
|
||||||
|
from datetime import timedelta
|
||||||
|
nu = datetime(2026, 6, 16, 15, 0, tzinfo=timezone.utc)
|
||||||
|
nl = datetime(2026, 6, 16, 8, 0, tzinfo=timezone(timedelta(hours=-7))) # PDT
|
||||||
|
|
||||||
|
s, u = digest_builder.resolve_digest_window(now_utc=nu, now_local=nl)
|
||||||
|
check((s, u) == ("2026-06-15T15:00:00Z", "2026-06-16T15:00:00Z"), f"default = last 24h, got {(s,u)}")
|
||||||
|
|
||||||
|
s, u = digest_builder.resolve_digest_window(hours=48, now_utc=nu, now_local=nl)
|
||||||
|
check(s == "2026-06-14T15:00:00Z", f"hours=48 lookback, got {s}")
|
||||||
|
|
||||||
|
# since = a local calendar date -> that day's LOCAL midnight, expressed in UTC
|
||||||
|
s, u = digest_builder.resolve_digest_window(since="2026-06-01", now_utc=nu, now_local=nl)
|
||||||
|
check(s == "2026-06-01T07:00:00Z", f"since-date -> local midnight in UTC, got {s}")
|
||||||
|
|
||||||
|
# a since older than the cap clamps to MAX_WINDOW_DAYS (response echoes real window)
|
||||||
|
s, u = digest_builder.resolve_digest_window(since="2025-01-01", now_utc=nu, now_local=nl)
|
||||||
|
check(s == (nu - timedelta(days=digest_builder.MAX_WINDOW_DAYS)).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||||
|
f"over-cap since clamps to {digest_builder.MAX_WINDOW_DAYS}d, got {s}")
|
||||||
|
|
||||||
|
# since wins over hours when both supplied
|
||||||
|
s, u = digest_builder.resolve_digest_window(hours=1, since="2026-06-10", now_utc=nu, now_local=nl)
|
||||||
|
check(s.startswith("2026-06-10"), f"since wins over hours, got {s}")
|
||||||
|
|
||||||
|
# same-day boundary: since = today's local date, now later in the day -> valid
|
||||||
|
# window (local midnight is strictly before now), not a "start must be before now" raise
|
||||||
|
s, u = digest_builder.resolve_digest_window(since="2026-06-16", now_utc=nu, now_local=nl)
|
||||||
|
check(s == "2026-06-16T07:00:00Z" and u == "2026-06-16T15:00:00Z",
|
||||||
|
f"since=today -> [local midnight, now], got {(s, u)}")
|
||||||
|
|
||||||
|
for bad in [dict(hours=0), dict(hours="abc"), dict(since="nope"), dict(since="2027-01-01")]:
|
||||||
|
try:
|
||||||
|
digest_builder.resolve_digest_window(now_utc=nu, now_local=nl, **bad)
|
||||||
|
check(False, f"bad input {bad} should raise")
|
||||||
|
except ValueError:
|
||||||
|
check(True, f"bad input rejected: {bad}")
|
||||||
|
|
||||||
|
|
||||||
|
def test_send_digest_window():
|
||||||
|
sent = []
|
||||||
|
build_fn = lambda conn, since, until: {"subject": "S", "body": f"{since}|{until}",
|
||||||
|
"has_activity": True, "user_count": 1,
|
||||||
|
"email_count": 2, "investor_count": 1}
|
||||||
|
def send_fn(conn, to_addrs, subject, body, sender_email=None):
|
||||||
|
sent.append((list(to_addrs), body))
|
||||||
|
return {"transport": "stub"}
|
||||||
|
|
||||||
|
conn = _conn()
|
||||||
|
before = digest_scheduler._get_setting(conn, digest_scheduler._LAST_AT_KEY)
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
r = digest_scheduler.send_digest_window(_conn, since_iso="2026-05-01T00:00:00Z",
|
||||||
|
until_iso="2026-06-16T00:00:00Z",
|
||||||
|
build_fn=build_fn, send_fn=send_fn)
|
||||||
|
check(r["status"] == "sent" and r["window"] == ["2026-05-01T00:00:00Z", "2026-06-16T00:00:00Z"],
|
||||||
|
f"windowed send returns its window, got {r}")
|
||||||
|
check(sent and sent[-1][0] == ["grant@ten31.xyz"], f"windowed send -> active admins only, got {sent}")
|
||||||
|
|
||||||
|
conn = _conn()
|
||||||
|
after = digest_scheduler._get_setting(conn, digest_scheduler._LAST_AT_KEY)
|
||||||
|
conn.close()
|
||||||
|
check(before == after, "windowed manual send does not advance the daily cursor")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
setup()
|
||||||
|
print("collect_user_activity:"); test_collect()
|
||||||
|
print("collect_investor_activity:"); test_investor()
|
||||||
|
print("build_digest + empty:"); test_build_and_empty()
|
||||||
|
print("reminders due:"); test_reminders_due()
|
||||||
|
print("summary fallback:"); test_summary_fallback()
|
||||||
|
print("digest policy:"); test_policy()
|
||||||
|
print("window resolver:"); test_window_resolver()
|
||||||
|
print("windowed manual send:"); test_send_digest_window()
|
||||||
|
print("scheduler guards:"); test_scheduler_guards()
|
||||||
|
if FAILS:
|
||||||
|
print(f"\nFAILED ({len(FAILS)})")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("\nALL PASS (digest builder + scheduler)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -33,7 +33,7 @@ def setup():
|
|||||||
conn.executescript("""
|
conn.executescript("""
|
||||||
CREATE TABLE app_settings (key TEXT PRIMARY KEY, value_json TEXT, updated_at TEXT);
|
CREATE TABLE app_settings (key TEXT PRIMARY KEY, value_json TEXT, updated_at TEXT);
|
||||||
CREATE TABLE email_accounts (id TEXT, email_address TEXT, sync_enabled INT DEFAULT 1, sync_status TEXT, backfill_complete INT);
|
CREATE TABLE email_accounts (id TEXT, email_address TEXT, sync_enabled INT DEFAULT 1, sync_status TEXT, backfill_complete INT);
|
||||||
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT, from_email TEXT, sent_at TEXT, is_matched INT, match_status TEXT);
|
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT, from_name TEXT, from_email TEXT, sent_at TEXT, is_matched INT, match_status TEXT);
|
||||||
CREATE TABLE email_investor_links (id TEXT, email_id TEXT, fundraising_investor_id TEXT, organization_id TEXT, contact_id TEXT, match_confidence REAL);
|
CREATE TABLE email_investor_links (id TEXT, email_id TEXT, fundraising_investor_id TEXT, organization_id TEXT, contact_id TEXT, match_confidence REAL);
|
||||||
CREATE TABLE email_activity_proposals (id TEXT PRIMARY KEY, email_id TEXT UNIQUE, investor_id TEXT, investor_name TEXT,
|
CREATE TABLE email_activity_proposals (id TEXT PRIMARY KEY, email_id TEXT UNIQUE, investor_id TEXT, investor_name TEXT,
|
||||||
direction TEXT, summary TEXT, proposed_note TEXT, email_subject TEXT, email_date TEXT, status TEXT DEFAULT 'pending',
|
direction TEXT, summary TEXT, proposed_note TEXT, email_subject TEXT, email_date TEXT, status TEXT DEFAULT 'pending',
|
||||||
@@ -51,10 +51,10 @@ def setup():
|
|||||||
grid = {"columns": [], "rows": [{"id": "inv1", "investor_name": "Harbor & Vine", "notes": "existing note"}]}
|
grid = {"columns": [], "rows": [{"id": "inv1", "investor_name": "Harbor & Vine", "notes": "existing note"}]}
|
||||||
conn.execute("INSERT INTO fundraising_state (id,grid_json,views_json,version) VALUES ('main',?,?,1)", (json.dumps(grid), "[]"))
|
conn.execute("INSERT INTO fundraising_state (id,grid_json,views_json,version) VALUES ('main',?,?,1)", (json.dumps(grid), "[]"))
|
||||||
# e1 sent (from us), e2 received, both after cutoff; e3 before cutoff (excluded)
|
# e1 sent (from us), e2 received, both after cutoff; e3 before cutoff (excluded)
|
||||||
conn.executemany("INSERT INTO emails (id,subject,body_text,from_email,sent_at,is_matched,match_status) VALUES (?,?,?,?,?,1,'matched')", [
|
conn.executemany("INSERT INTO emails (id,subject,body_text,from_name,from_email,sent_at,is_matched,match_status) VALUES (?,?,?,?,?,?,1,'matched')", [
|
||||||
("e1", "Fund III", "Here is the update", "grant@ten31.xyz", "2026-06-01T10:00:00"),
|
("e1", "Fund III", "Here is the update", "Grant", "grant@ten31.xyz", "2026-06-01T10:00:00"),
|
||||||
("e2", "Re: Fund III", "Thanks, a question", "lp@harborvine.example", "2026-06-02T10:00:00"),
|
("e2", "Re: Fund III", "Thanks, a question", "Harbor LP", "lp@harborvine.example", "2026-06-02T10:00:00"),
|
||||||
("e3", "Old", "ancient", "lp@harborvine.example", "2025-01-01T10:00:00"),
|
("e3", "Old", "ancient", "Harbor LP", "lp@harborvine.example", "2025-01-01T10:00:00"),
|
||||||
])
|
])
|
||||||
conn.executemany("INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,match_confidence) VALUES (?,?, 'inv1', 1.0)",
|
conn.executemany("INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,match_confidence) VALUES (?,?, 'inv1', 1.0)",
|
||||||
[("l1", "e1"), ("l2", "e2"), ("l3", "e3")])
|
[("l1", "e1"), ("l2", "e2"), ("l3", "e3")])
|
||||||
@@ -77,7 +77,9 @@ def main():
|
|||||||
dirs = sorted(p["direction"] for p in props)
|
dirs = sorted(p["direction"] for p in props)
|
||||||
check(dirs == ["received", "sent"], f"directions sent+received, got {dirs}")
|
check(dirs == ["received", "sent"], f"directions sent+received, got {dirs}")
|
||||||
e1 = next(p for p in props if p["email_id"] == "e1")
|
e1 = next(p for p in props if p["email_id"] == "e1")
|
||||||
check(e1["direction"] == "sent" and "Sent" in e1["proposed_note"], "e1 (from us) is 'sent'")
|
check(e1["direction"] == "sent" and "Grant emailed Harbor & Vine" in e1["proposed_note"], "e1 (from us) names sender + investor")
|
||||||
|
e2 = next(p for p in props if p["email_id"] == "e2")
|
||||||
|
check(e2["direction"] == "received" and "emailed the team" in e2["proposed_note"], "e2 (inbound) reads '<sender> emailed the team'")
|
||||||
check("✉" in e1["proposed_note"] and "fundraising update" in e1["proposed_note"], "proposed note marked + has gist")
|
check("✉" in e1["proposed_note"] and "fundraising update" in e1["proposed_note"], "proposed note marked + has gist")
|
||||||
|
|
||||||
# grid must be UNTOUCHED before approval
|
# grid must be UNTOUCHED before approval
|
||||||
|
|||||||
@@ -0,0 +1,129 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test the Matrix review-bot bridge for email-activity proposals (Features 2/3):
|
||||||
|
the bot work-lists (to_post / open / to_close), the Matrix side-row mark helpers, and an
|
||||||
|
in-thread (source='matrix') decision that closes the thread — plus the bot-or-admin role gate.
|
||||||
|
Synthetic data only (guardrail #9). The local model is stubbed.
|
||||||
|
Run: cd backend && python3 test_email_proposal_matrix.py
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(tempfile.mkdtemp(), "crm.db")
|
||||||
|
os.environ.setdefault("CRM_DATA_DIR", os.path.dirname(os.environ["CRM_DB_PATH"]))
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
server._summarize_email_gist = lambda subject, body: "fundraising update; proposed a call"
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
conn = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.executescript("""
|
||||||
|
CREATE TABLE app_settings (key TEXT PRIMARY KEY, value_json TEXT, updated_at TEXT);
|
||||||
|
CREATE TABLE email_accounts (id TEXT, email_address TEXT, sync_enabled INT DEFAULT 1, sync_status TEXT, backfill_complete INT);
|
||||||
|
CREATE TABLE emails (id TEXT PRIMARY KEY, subject TEXT, body_text TEXT, snippet TEXT, from_name TEXT, from_email TEXT, sent_at TEXT, is_matched INT, match_status TEXT);
|
||||||
|
CREATE TABLE email_investor_links (id TEXT, email_id TEXT, fundraising_investor_id TEXT, organization_id TEXT, contact_id TEXT, match_confidence REAL);
|
||||||
|
CREATE TABLE email_activity_proposals (id TEXT PRIMARY KEY, email_id TEXT UNIQUE, investor_id TEXT, investor_name TEXT,
|
||||||
|
direction TEXT, summary TEXT, proposed_note TEXT, email_subject TEXT, email_date TEXT, status TEXT DEFAULT 'pending',
|
||||||
|
decided_by TEXT, decided_at TEXT, final_note TEXT, created_at TEXT);
|
||||||
|
CREATE TABLE email_proposal_matrix (proposal_id TEXT PRIMARY KEY, event_id TEXT, posted_at TEXT, closed_at TEXT, created_at TEXT);
|
||||||
|
CREATE TABLE users (id TEXT PRIMARY KEY, username TEXT);
|
||||||
|
CREATE TABLE fundraising_investors (id TEXT PRIMARY KEY, investor_name TEXT, notes TEXT);
|
||||||
|
CREATE TABLE fundraising_state (id TEXT PRIMARY KEY, grid_json TEXT, views_json TEXT, version INT,
|
||||||
|
updated_by TEXT REFERENCES users(id), updated_at TEXT);
|
||||||
|
CREATE TABLE interaction_log (id TEXT PRIMARY KEY, ts TEXT, actor_type TEXT, actor_id TEXT, action TEXT, target_type TEXT, target_id TEXT, payload TEXT, source TEXT, created_at TEXT);
|
||||||
|
""")
|
||||||
|
conn.execute("INSERT INTO users (id,username) VALUES ('user-1','grant')")
|
||||||
|
conn.execute("INSERT INTO app_settings VALUES ('email_activity_since', ?, ?)", (json.dumps("2026-01-01T00:00:00"), "x"))
|
||||||
|
conn.execute("INSERT INTO email_accounts (id,email_address) VALUES ('a','grant@ten31.xyz')")
|
||||||
|
conn.execute("INSERT INTO fundraising_investors (id,investor_name,notes) VALUES ('inv1','Harbor & Vine','existing note')")
|
||||||
|
grid = {"columns": [], "rows": [{"id": "inv1", "investor_name": "Harbor & Vine", "notes": "existing note"}]}
|
||||||
|
conn.execute("INSERT INTO fundraising_state (id,grid_json,views_json,version) VALUES ('main',?,?,1)", (json.dumps(grid), "[]"))
|
||||||
|
conn.executemany("INSERT INTO emails (id,subject,body_text,snippet,from_name,from_email,sent_at,is_matched,match_status) VALUES (?,?,?,?,?,?,?,1,'matched')", [
|
||||||
|
("e1", "Fund III", "Here is the update", "the quarterly update is attached", "Grant", "grant@ten31.xyz", "2026-06-01T10:00:00"),
|
||||||
|
("e2", "Re: Fund III", "Thanks, a question", "thanks — one question on terms", "LP Contact", "lp@harborvine.example", "2026-06-02T10:00:00"),
|
||||||
|
])
|
||||||
|
conn.executemany("INSERT INTO email_investor_links (id,email_id,fundraising_investor_id,match_confidence) VALUES (?,?, 'inv1', 1.0)",
|
||||||
|
[("l1", "e1"), ("l2", "e2")])
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
setup()
|
||||||
|
|
||||||
|
# role gate: bot passes the agent gate but is NOT an admin; member passes neither.
|
||||||
|
check(server.require_bot_or_admin({"role": "bot"}), "bot passes require_bot_or_admin")
|
||||||
|
check(server.require_bot_or_admin({"role": "admin"}), "admin passes require_bot_or_admin")
|
||||||
|
check(not server.require_bot_or_admin({"role": "member"}), "member does NOT pass require_bot_or_admin")
|
||||||
|
check(not server.require_admin({"role": "bot"}), "bot is NOT an admin (no user-mgmt/settings reach)")
|
||||||
|
|
||||||
|
check(server.propose_email_activity_notes().get("proposed") == 2, "drafts 2 proposals")
|
||||||
|
conn = server.get_db()
|
||||||
|
props = server.list_email_activity_proposals(conn, status="pending")
|
||||||
|
by_email = {p["email_id"]: p for p in props}
|
||||||
|
p_a, p_b = by_email["e1"], by_email["e2"]
|
||||||
|
|
||||||
|
# Both are pending + un-posted → both in to_post; card carries from/snippet/note context.
|
||||||
|
lists = server.list_bot_email_proposals(conn)
|
||||||
|
check(len(lists["to_post"]) == 2 and not lists["open"] and not lists["to_close"], "both proposals queued to_post")
|
||||||
|
card = next(it for it in lists["to_post"] if it["id"] == p_a["id"])
|
||||||
|
check(card.get("from_name") == "Grant" and "quarterly update" in (card.get("snippet") or ""), "card carries from_name + snippet")
|
||||||
|
check("✉" in (card.get("proposed_note") or ""), "card carries the drafted note")
|
||||||
|
|
||||||
|
# Post p_a to Matrix → it leaves to_post and becomes an open thread (event id recorded).
|
||||||
|
server.mark_proposal_matrix_posted(conn, p_a["id"], "evtA")
|
||||||
|
lists = server.list_bot_email_proposals(conn)
|
||||||
|
check(len(lists["to_post"]) == 1 and lists["to_post"][0]["id"] == p_b["id"], "posting p_a leaves only p_b to_post")
|
||||||
|
check(len(lists["open"]) == 1 and lists["open"][0]["id"] == p_a["id"] and lists["open"][0]["event_id"] == "evtA",
|
||||||
|
"posted p_a is an open thread carrying its event id")
|
||||||
|
|
||||||
|
# Decide p_a IN-THREAD on Matrix (approve + close in one transaction).
|
||||||
|
r = server.decide_email_activity_proposal(conn, p_a["id"], "approve", "user-1", source="matrix", close_matrix=True)
|
||||||
|
check(r.get("status") == "approved" and r.get("placed_in_grid") is True, "matrix approve appends to the grid")
|
||||||
|
lists = server.list_bot_email_proposals(conn)
|
||||||
|
check(not any(it["id"] == p_a["id"] for it in lists["open"] + lists["to_close"]),
|
||||||
|
"matrix-decided proposal is closed (not re-announced via to_close)")
|
||||||
|
src = conn.execute("SELECT source FROM interaction_log WHERE action='email.activity_approved'").fetchone()["source"]
|
||||||
|
check(src == "matrix", "matrix decision is audited source='matrix'")
|
||||||
|
|
||||||
|
# Web-decide path: post p_b, then dismiss it on the WEB (default source, no close) → the bot
|
||||||
|
# must see it in to_close so it can announce the web decision in-thread, then close.
|
||||||
|
server.mark_proposal_matrix_posted(conn, p_b["id"], "evtB")
|
||||||
|
server.decide_email_activity_proposal(conn, p_b["id"], "dismiss", "user-1") # web path: source crm_ui, no close
|
||||||
|
lists = server.list_bot_email_proposals(conn)
|
||||||
|
check(len(lists["to_close"]) == 1 and lists["to_close"][0]["id"] == p_b["id"] and lists["to_close"][0]["status"] == "dismissed",
|
||||||
|
"web-decided open thread surfaces in to_close")
|
||||||
|
src2 = conn.execute("SELECT source FROM interaction_log WHERE action='email.activity_dismissed'").fetchone()["source"]
|
||||||
|
check(src2 == "crm_ui", "web decision is audited source='crm_ui'")
|
||||||
|
|
||||||
|
server.mark_proposal_matrix_closed(conn, p_b["id"])
|
||||||
|
lists = server.list_bot_email_proposals(conn)
|
||||||
|
check(not lists["to_close"] and not lists["open"], "closing the thread clears the work-lists")
|
||||||
|
|
||||||
|
# Marking a non-existent proposal is a clean not_found, not a crash.
|
||||||
|
check(server.mark_proposal_matrix_posted(conn, "nope", "evtX").get("error") == "not_found", "mark posted on unknown id -> not_found")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if FAILS:
|
||||||
|
print(f"\nFAILED ({len(FAILS)})")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1)
|
||||||
|
print("\nALL PASS (email-proposal Matrix bridge)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,227 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Integration test for the version-safe single-row update endpoint (P3b).
|
||||||
|
|
||||||
|
POST /api/fundraising/update-row edits ONE investor row's name and/or contact pills by
|
||||||
|
reading the canonical grid blob fresh server-side and mutating only the target row — never
|
||||||
|
accepting a whole-grid payload (BRIEF §3a), so it can't clobber concurrent edits to other
|
||||||
|
rows. This boots the REAL server against a throwaway DB, seeds a two-row grid, then drives
|
||||||
|
the live HTTP endpoint to assert:
|
||||||
|
* rename + pill add/edit persist into the blob and bump the version;
|
||||||
|
* removing a pill drops it from the row + fundraising_contacts, but the classic contacts
|
||||||
|
directory entry is NOT hard-deleted (soft-delete-only convention);
|
||||||
|
* preserved pill fields (title/city/linkedin) survive an edit that only touches the name;
|
||||||
|
* the OTHER grid row is untouched (no whole-grid clobber);
|
||||||
|
* guards: missing row_id -> 400, unknown row_id -> 404, blank name -> 400,
|
||||||
|
no-op body (neither field) -> 400.
|
||||||
|
Synthetic data only (guardrail #9).
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_fundraising_update_row.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _post(port, path, token, payload):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
conn.request("POST", path, body=json.dumps(payload),
|
||||||
|
headers={"Authorization": "Bearer " + token, "Content-Type": "application/json"})
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = json.loads(raw) if raw else None
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
GRID = {
|
||||||
|
"columns": [
|
||||||
|
{"id": "investor_name", "label": "Investor Name", "type": "text"},
|
||||||
|
{"id": "contacts", "label": "Contacts", "type": "contacts"},
|
||||||
|
{"id": "notes", "label": "Notes", "type": "text"},
|
||||||
|
],
|
||||||
|
"rows": [
|
||||||
|
{"id": "row-1", "investor_name": "Acme Capital", "notes": "",
|
||||||
|
"contacts": [
|
||||||
|
{"name": "Jane Doe", "email": "jane@acme.com", "title": "Partner",
|
||||||
|
"city": "Austin", "state": "TX", "country": "USA", "location_query": "Austin",
|
||||||
|
"linkedin_url": "https://linkedin.com/in/janedoe"},
|
||||||
|
{"name": "Bob Roe", "email": "bob@acme.com", "title": ""},
|
||||||
|
]},
|
||||||
|
{"id": "row-2", "investor_name": "Beacon Fund", "notes": "untouched",
|
||||||
|
"contacts": [{"name": "Carl Vane", "email": "carl@beacon.com"}]},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version, updated_by) "
|
||||||
|
"VALUES ('main', ?, '[]', 1, 'u1') "
|
||||||
|
"ON CONFLICT(id) DO UPDATE SET grid_json=excluded.grid_json, views_json='[]', version=1",
|
||||||
|
(json.dumps(GRID),))
|
||||||
|
server.sync_fundraising_relational(c, server.sanitize_fundraising_grid(GRID), [], actor_user_id="u1")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def blob_rows():
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
row = c.execute("SELECT grid_json, version FROM fundraising_state WHERE id='main'").fetchone()
|
||||||
|
c.close()
|
||||||
|
grid = json.loads(row["grid_json"])
|
||||||
|
by_id = {r["id"]: r for r in grid["rows"]}
|
||||||
|
return by_id, int(row["version"])
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
# ── rename + add a third contact, edit Bob's title, keep Jane unchanged ──
|
||||||
|
print("\n[rename + pill add/edit]")
|
||||||
|
new_contacts = [
|
||||||
|
# Jane: only her name is "re-sent"; the client preserves her other fields by spread.
|
||||||
|
{"name": "Jane Doe", "email": "jane@acme.com", "title": "Partner",
|
||||||
|
"city": "Austin", "state": "TX", "country": "USA", "location_query": "Austin",
|
||||||
|
"linkedin_url": "https://linkedin.com/in/janedoe"},
|
||||||
|
{"name": "Bob Roe", "email": "bob@acme.com", "title": "Principal"}, # edited title
|
||||||
|
{"name": "Dana Fox", "email": "dana@acme.com", "title": "Analyst"}, # added
|
||||||
|
]
|
||||||
|
st, data = _post(port, "/api/fundraising/update-row", token,
|
||||||
|
{"row_id": "row-1", "investor_name": "Acme Capital LLC", "contacts": new_contacts})
|
||||||
|
check(st == 200, f"update-row -> 200 (got {st})")
|
||||||
|
by_id, version = blob_rows()
|
||||||
|
check(version == 2, f"version bumped 1 -> 2 (got {version})")
|
||||||
|
r1 = by_id.get("row-1", {})
|
||||||
|
check(r1.get("investor_name") == "Acme Capital LLC", f"name renamed in blob (got {r1.get('investor_name')!r})")
|
||||||
|
names = [c.get("name") for c in r1.get("contacts", [])]
|
||||||
|
check(names == ["Jane Doe", "Bob Roe", "Dana Fox"], f"three pills in order (got {names})")
|
||||||
|
bob = next((c for c in r1["contacts"] if c["name"] == "Bob Roe"), {})
|
||||||
|
check(bob.get("title") == "Principal", f"Bob's title edited (got {bob.get('title')!r})")
|
||||||
|
jane = next((c for c in r1["contacts"] if c["name"] == "Jane Doe"), {})
|
||||||
|
check(jane.get("linkedin_url") == "https://linkedin.com/in/janedoe" and jane.get("city") == "Austin",
|
||||||
|
f"Jane's preserved fields survived (got {jane})")
|
||||||
|
|
||||||
|
# ── the OTHER row is byte-for-byte untouched (no whole-grid clobber) ──
|
||||||
|
print("\n[other row untouched]")
|
||||||
|
r2 = by_id.get("row-2", {})
|
||||||
|
check(r2.get("investor_name") == "Beacon Fund" and r2.get("notes") == "untouched"
|
||||||
|
and [c.get("name") for c in r2.get("contacts", [])] == ["Carl Vane"],
|
||||||
|
f"row-2 unchanged (got {r2})")
|
||||||
|
|
||||||
|
# ── relational sync: classic contacts directory now has Dana; Dana also in fundraising_contacts ──
|
||||||
|
print("\n[relational + classic-contacts sync]")
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
dana = c.execute("SELECT id, deleted_at FROM contacts WHERE lower(email)='dana@acme.com'").fetchone()
|
||||||
|
check(bool(dana), "added contact Dana propagated to classic contacts directory")
|
||||||
|
inv = c.execute("SELECT id FROM fundraising_investors WHERE source_row_id='row-1'").fetchone()
|
||||||
|
fc_names = {r["full_name"] for r in c.execute(
|
||||||
|
"SELECT full_name FROM fundraising_contacts WHERE investor_id=?", (inv["id"],)).fetchall()}
|
||||||
|
check(fc_names == {"Jane Doe", "Bob Roe", "Dana Fox"}, f"fundraising_contacts mirrors the three pills (got {fc_names})")
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
# ── remove Bob: pill drops + fundraising_contacts drops, but classic contact NOT hard-deleted ──
|
||||||
|
print("\n[remove pill is soft on the classic directory]")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"row_id": "row-1", "contacts": [
|
||||||
|
{"name": "Jane Doe", "email": "jane@acme.com", "title": "Partner"},
|
||||||
|
{"name": "Dana Fox", "email": "dana@acme.com", "title": "Analyst"},
|
||||||
|
]})
|
||||||
|
check(st == 200, f"remove-pill update -> 200 (got {st})")
|
||||||
|
by_id, version = blob_rows()
|
||||||
|
names = [c.get("name") for c in by_id["row-1"].get("contacts", [])]
|
||||||
|
check(names == ["Jane Doe", "Dana Fox"], f"Bob removed from the row (got {names})")
|
||||||
|
check(version == 3, f"version bumped 2 -> 3 (got {version})")
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.row_factory = sqlite3.Row
|
||||||
|
inv = c.execute("SELECT id FROM fundraising_investors WHERE source_row_id='row-1'").fetchone()
|
||||||
|
fc_names = {r["full_name"] for r in c.execute(
|
||||||
|
"SELECT full_name FROM fundraising_contacts WHERE investor_id=?", (inv["id"],)).fetchall()}
|
||||||
|
check("Bob Roe" not in fc_names, f"Bob dropped from fundraising_contacts (got {fc_names})")
|
||||||
|
bob_classic = c.execute("SELECT id FROM contacts WHERE lower(email)='bob@acme.com'").fetchone()
|
||||||
|
check(bool(bob_classic), "removing a pill does NOT hard-delete the classic contacts row (soft-delete only)")
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
# ── name-only update (no contacts key) leaves the pill list intact ──
|
||||||
|
print("\n[name-only update preserves contacts]")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"row_id": "row-1", "investor_name": "Acme Capital"})
|
||||||
|
check(st == 200, f"name-only update -> 200 (got {st})")
|
||||||
|
by_id, _ = blob_rows()
|
||||||
|
names = [c.get("name") for c in by_id["row-1"].get("contacts", [])]
|
||||||
|
check(by_id["row-1"].get("investor_name") == "Acme Capital", "name updated again")
|
||||||
|
check(names == ["Jane Doe", "Dana Fox"], f"contacts untouched by a name-only edit (got {names})")
|
||||||
|
|
||||||
|
# ── a name-only pill (no email) is KEPT; a fully-blank pill is dropped ──
|
||||||
|
# (locks _sanitize_fundraising_contacts's emptiness rule = name OR email, not AND.)
|
||||||
|
print("\n[name-only pill kept, blank pill dropped]")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"row_id": "row-1", "contacts": [
|
||||||
|
{"name": "Jane Doe", "email": "jane@acme.com"},
|
||||||
|
{"name": "Erin Pope", "email": ""}, # name only -> kept
|
||||||
|
{"name": "", "email": ""}, # fully blank -> dropped
|
||||||
|
{"name": " ", "title": "Ghost"}, # whitespace-only name, no email -> dropped
|
||||||
|
]})
|
||||||
|
check(st == 200, f"name-only-pill update -> 200 (got {st})")
|
||||||
|
by_id, _ = blob_rows()
|
||||||
|
names = [c.get("name") for c in by_id["row-1"].get("contacts", [])]
|
||||||
|
check(names == ["Jane Doe", "Erin Pope"],
|
||||||
|
f"name-only pill kept, blank/whitespace pills dropped (got {names})")
|
||||||
|
|
||||||
|
# ── guards ──
|
||||||
|
print("\n[validation guards]")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"investor_name": "No Id"})
|
||||||
|
check(st == 400, f"missing row_id -> 400 (got {st})")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"row_id": "row-nope", "investor_name": "Ghost"})
|
||||||
|
check(st == 404, f"unknown row_id -> 404 (got {st})")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"row_id": "row-1", "investor_name": " "})
|
||||||
|
check(st == 400, f"blank name -> 400 (got {st})")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"row_id": "row-1"})
|
||||||
|
check(st == 400, f"no-op body (neither name nor contacts) -> 400 (got {st})")
|
||||||
|
st, _ = _post(port, "/api/fundraising/update-row", token, {"row_id": "row-1", "contacts": "nope"})
|
||||||
|
check(st == 400, f"contacts wrong type -> 400 (got {st})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (fundraising update-row: version-safe single-row name/pill edit)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,230 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for gmail_send (DWD Gmail-API send) + digest_mailer (transport routing).
|
||||||
|
|
||||||
|
No network: the credential provider and urllib.request.urlopen are monkeypatched.
|
||||||
|
Run directly or via backend/run_tests.py.
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
from email_integration import gmail_send
|
||||||
|
import digest_mailer
|
||||||
|
import smtp_send
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" ok " if cond else " XX ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Tok:
|
||||||
|
def __init__(self, t):
|
||||||
|
self.token = t
|
||||||
|
|
||||||
|
|
||||||
|
class _Provider:
|
||||||
|
def __init__(self):
|
||||||
|
self.calls = []
|
||||||
|
|
||||||
|
def access_token_for(self, email, scope):
|
||||||
|
self.calls.append((email, scope))
|
||||||
|
return _Tok("tok-" + email)
|
||||||
|
|
||||||
|
|
||||||
|
class _Resp:
|
||||||
|
def __init__(self, payload):
|
||||||
|
self._p = payload
|
||||||
|
|
||||||
|
def read(self):
|
||||||
|
return json.dumps(self._p).encode()
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *a):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# 1. _build_raw round-trips
|
||||||
|
raw = gmail_send._build_raw("a@x", ["b@x", "c@x"], "Subj", "Hello body")
|
||||||
|
decoded = base64.urlsafe_b64decode(raw).decode()
|
||||||
|
check("From: a@x" in decoded and "Subject: Subj" in decoded
|
||||||
|
and "b@x, c@x" in decoded and "Hello body" in decoded,
|
||||||
|
"_build_raw round-trips From/To/Subject/body")
|
||||||
|
|
||||||
|
# 2. send_via_gmail posts correctly (fake provider + fake urlopen)
|
||||||
|
captured = {}
|
||||||
|
orig_open = urllib.request.urlopen
|
||||||
|
orig_build = gmail_send._creds.build_provider
|
||||||
|
prov = _Provider()
|
||||||
|
gmail_send._creds.build_provider = lambda factory: prov
|
||||||
|
|
||||||
|
def fake_open(req, timeout=None):
|
||||||
|
captured["url"] = req.full_url
|
||||||
|
captured["auth"] = req.headers.get("Authorization")
|
||||||
|
captured["body"] = json.loads(req.data.decode())
|
||||||
|
return _Resp({"id": "msg123", "threadId": "thr123"})
|
||||||
|
|
||||||
|
urllib.request.urlopen = fake_open
|
||||||
|
try:
|
||||||
|
res = gmail_send.send_via_gmail("grant@ten31.xyz",
|
||||||
|
["a@ten31.xyz", "b@ten31.xyz"], "S", "B")
|
||||||
|
finally:
|
||||||
|
urllib.request.urlopen = orig_open
|
||||||
|
gmail_send._creds.build_provider = orig_build
|
||||||
|
|
||||||
|
check(res["message_id"] == "msg123", "send returns message_id")
|
||||||
|
check(res["from"] == "grant@ten31.xyz", "send reports from")
|
||||||
|
check(res["sent_to"] == ["a@ten31.xyz", "b@ten31.xyz"], "send reports recipients")
|
||||||
|
check(captured["url"].endswith("/users/grant%40ten31.xyz/messages/send"),
|
||||||
|
"posts to messages/send for the impersonated user")
|
||||||
|
check(captured["auth"] == "Bearer tok-grant@ten31.xyz", "uses the compose-scoped token")
|
||||||
|
check("raw" in captured["body"] and "message" not in captured["body"],
|
||||||
|
"send body is {raw:...} (not nested under message, unlike drafts)")
|
||||||
|
check(prov.calls and prov.calls[0][1] == gmail_send._creds.GMAIL_COMPOSE_SCOPE,
|
||||||
|
"requests the gmail.compose scope")
|
||||||
|
|
||||||
|
# 3. validation
|
||||||
|
try:
|
||||||
|
gmail_send.send_via_gmail("x@x", [], "s", "b"); ok = False
|
||||||
|
except ValueError:
|
||||||
|
ok = True
|
||||||
|
check(ok, "empty recipients -> ValueError")
|
||||||
|
try:
|
||||||
|
gmail_send.send_via_gmail("", ["a@x"], "s", "b"); ok = False
|
||||||
|
except ValueError:
|
||||||
|
ok = True
|
||||||
|
check(ok, "missing sender -> ValueError")
|
||||||
|
|
||||||
|
# 4. transport() selection
|
||||||
|
orig_avail = gmail_send.gmail_available
|
||||||
|
orig_smtp_cfg = smtp_send.smtp_configured
|
||||||
|
try:
|
||||||
|
gmail_send.gmail_available = lambda: True
|
||||||
|
check(digest_mailer.transport() == "gmail-dwd", "transport prefers gmail-dwd")
|
||||||
|
gmail_send.gmail_available = lambda: False
|
||||||
|
smtp_send.smtp_configured = lambda: True
|
||||||
|
check(digest_mailer.transport() == "smtp", "transport falls back to smtp")
|
||||||
|
smtp_send.smtp_configured = lambda: False
|
||||||
|
check(digest_mailer.transport() is None, "transport None when neither configured")
|
||||||
|
finally:
|
||||||
|
gmail_send.gmail_available = orig_avail
|
||||||
|
smtp_send.smtp_configured = orig_smtp_cfg
|
||||||
|
|
||||||
|
# 5. send_digest routes to gmail + honors CRM_DIGEST_SENDER
|
||||||
|
orig_send = gmail_send.send_via_gmail
|
||||||
|
sent = {}
|
||||||
|
try:
|
||||||
|
gmail_send.gmail_available = lambda: True
|
||||||
|
|
||||||
|
def fake_send(sender, to, subj, body, conn=None):
|
||||||
|
sent.update(sender=sender, to=to)
|
||||||
|
return {"sent_to": to, "from": sender, "message_id": "m"}
|
||||||
|
|
||||||
|
gmail_send.send_via_gmail = fake_send
|
||||||
|
os.environ["CRM_DIGEST_SENDER"] = "digest@ten31.xyz"
|
||||||
|
res = digest_mailer.send_digest(None, ["a@ten31.xyz"], "S", "B")
|
||||||
|
check(res["transport"] == "gmail-dwd", "send_digest tags transport gmail-dwd")
|
||||||
|
check(sent["sender"] == "digest@ten31.xyz", "send_digest uses CRM_DIGEST_SENDER")
|
||||||
|
finally:
|
||||||
|
gmail_send.send_via_gmail = orig_send
|
||||||
|
gmail_send.gmail_available = orig_avail
|
||||||
|
os.environ.pop("CRM_DIGEST_SENDER", None)
|
||||||
|
|
||||||
|
# 6. send_digest raises NoTransport when neither is available
|
||||||
|
try:
|
||||||
|
gmail_send.gmail_available = lambda: False
|
||||||
|
smtp_send.smtp_configured = lambda: False
|
||||||
|
try:
|
||||||
|
digest_mailer.send_digest(None, ["a@x"], "S", "B"); ok = False
|
||||||
|
except digest_mailer.NoTransport:
|
||||||
|
ok = True
|
||||||
|
check(ok, "send_digest raises NoTransport when no transport")
|
||||||
|
finally:
|
||||||
|
gmail_send.gmail_available = orig_avail
|
||||||
|
smtp_send.smtp_configured = orig_smtp_cfg
|
||||||
|
|
||||||
|
# 7. HTTPError from Gmail is wrapped as RuntimeError (not raised raw)
|
||||||
|
orig_build = gmail_send._creds.build_provider
|
||||||
|
gmail_send._creds.build_provider = lambda factory: _Provider()
|
||||||
|
orig_open2 = urllib.request.urlopen
|
||||||
|
|
||||||
|
def raise_http(req, timeout=None):
|
||||||
|
raise urllib.error.HTTPError(req.full_url, 403, "Forbidden", {},
|
||||||
|
io.BytesIO(b'{"error":"denied"}'))
|
||||||
|
|
||||||
|
urllib.request.urlopen = raise_http
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
gmail_send.send_via_gmail("g@x", ["a@x"], "s", "b"); ok = False
|
||||||
|
except RuntimeError as e:
|
||||||
|
ok = "403" in str(e)
|
||||||
|
except urllib.error.HTTPError:
|
||||||
|
ok = False # should have been wrapped, not raised raw
|
||||||
|
finally:
|
||||||
|
urllib.request.urlopen = orig_open2
|
||||||
|
gmail_send._creds.build_provider = orig_build
|
||||||
|
check(ok, "Gmail HTTPError wrapped as RuntimeError carrying the status")
|
||||||
|
|
||||||
|
# 8. default_sender: env wins; else DB first-admin; else None
|
||||||
|
class _FakeCur:
|
||||||
|
def __init__(self, row):
|
||||||
|
self._row = row
|
||||||
|
|
||||||
|
def fetchone(self):
|
||||||
|
return self._row
|
||||||
|
|
||||||
|
class _Conn:
|
||||||
|
def __init__(self, row):
|
||||||
|
self._row = row
|
||||||
|
|
||||||
|
def execute(self, *a, **k):
|
||||||
|
return _FakeCur(self._row)
|
||||||
|
|
||||||
|
os.environ.pop("CRM_DIGEST_SENDER", None)
|
||||||
|
check(digest_mailer.default_sender(_Conn({"email": "first@ten31.xyz"})) == "first@ten31.xyz",
|
||||||
|
"default_sender falls back to first active admin")
|
||||||
|
check(digest_mailer.default_sender(_Conn(None)) is None,
|
||||||
|
"default_sender returns None when no admin has an email")
|
||||||
|
os.environ["CRM_DIGEST_SENDER"] = "env@ten31.xyz"
|
||||||
|
check(digest_mailer.default_sender(_Conn({"email": "first@ten31.xyz"})) == "env@ten31.xyz",
|
||||||
|
"CRM_DIGEST_SENDER overrides the DB lookup")
|
||||||
|
os.environ.pop("CRM_DIGEST_SENDER", None)
|
||||||
|
|
||||||
|
# 9. send_digest tags transport 'smtp' on the fallback path
|
||||||
|
orig_avail2 = gmail_send.gmail_available
|
||||||
|
orig_smtp_cfg2 = smtp_send.smtp_configured
|
||||||
|
orig_smtp_send = smtp_send.send_email
|
||||||
|
try:
|
||||||
|
gmail_send.gmail_available = lambda: False
|
||||||
|
smtp_send.smtp_configured = lambda: True
|
||||||
|
smtp_send.send_email = lambda to, subj, body, **k: {"sent_to": to, "from": "f@x"}
|
||||||
|
res = digest_mailer.send_digest(None, ["a@x"], "S", "B")
|
||||||
|
check(res["transport"] == "smtp", "send_digest tags transport smtp on fallback")
|
||||||
|
finally:
|
||||||
|
gmail_send.gmail_available = orig_avail2
|
||||||
|
smtp_send.smtp_configured = orig_smtp_cfg2
|
||||||
|
smtp_send.send_email = orig_smtp_send
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(" -", f)
|
||||||
|
raise SystemExit(1)
|
||||||
|
print("ALL PASS (gmail_send + digest_mailer)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,202 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the mobile add-investor flow (Phase 8g).
|
||||||
|
|
||||||
|
Boots the REAL server against a temp DB and exercises the create path the mobile
|
||||||
|
"New investor" sheet drives:
|
||||||
|
- POST /api/fundraising/log-communication with create_investor_if_missing honors an
|
||||||
|
optional initial `priority` flag on the NEW row (and defaults it to False when omitted);
|
||||||
|
- the brand-new row's source_row_id resolves immediately for the follow-on
|
||||||
|
POST /api/fundraising/pipeline/link (the relational sync runs inside the create), so the
|
||||||
|
create -> link-at-stage handshake the UI does works end to end;
|
||||||
|
- a follow-on POST /api/reminders with the new row's source_row_id resolves to the synced
|
||||||
|
investor (the create -> reminder handshake).
|
||||||
|
Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_grid_add_investor.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(port, method, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
payload = None
|
||||||
|
if body is not None:
|
||||||
|
payload = json.dumps(body)
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
return sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = _db()
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _create(port, token, name, contact_name, **extra):
|
||||||
|
body = {
|
||||||
|
"investor_name": name, "create_investor_if_missing": True,
|
||||||
|
"contact": {"name": contact_name, "email": contact_name.split(" ")[0].lower() + "@firm.com"},
|
||||||
|
"type": "note", "body": extra.pop("note", ""), "append_note": bool(extra.pop("note_append", False)),
|
||||||
|
}
|
||||||
|
body.update(extra)
|
||||||
|
return _req(port, "POST", "/api/fundraising/log-communication", token, body)
|
||||||
|
|
||||||
|
|
||||||
|
def _grid_rows(port, token):
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
return {r["id"]: r for r in (d or {}).get("data", {}).get("grid", {}).get("rows", [])}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
# ── create with priority:true seeds the row's Priority flag ──
|
||||||
|
print("\n[create: optional initial priority flag honored]")
|
||||||
|
st, d = _create(port, token, "Acme Capital", "Jane Doe", priority=True, note="Intro call", note_append=True)
|
||||||
|
row = (d or {}).get("data", {}).get("row") or {}
|
||||||
|
acme_id = row.get("id")
|
||||||
|
check(st == 201, f"create -> 201 (got {st})")
|
||||||
|
check(row.get("priority") is True, f"returned row carries priority=true (got {row.get('priority')!r})")
|
||||||
|
rows = _grid_rows(port, token)
|
||||||
|
check(rows.get(acme_id, {}).get("priority") is True,
|
||||||
|
f"GET /state shows the new row priority=true (got {rows.get(acme_id, {}).get('priority')!r})")
|
||||||
|
check(len(rows.get(acme_id, {}).get("contacts", [])) == 1,
|
||||||
|
f"new row has its first contact (got {rows.get(acme_id, {}).get('contacts')})")
|
||||||
|
|
||||||
|
# ── create without priority defaults to False (no accidental flag) ──
|
||||||
|
print("\n[create: priority defaults False when omitted]")
|
||||||
|
st, d = _create(port, token, "Beta Partners", "Pat Roe") # no note, no priority
|
||||||
|
beta = (d or {}).get("data", {}).get("row") or {}
|
||||||
|
beta_id = beta.get("id")
|
||||||
|
check(st == 201, f"no-note create -> 201 (got {st})")
|
||||||
|
check(beta.get("priority") is False, f"omitted priority -> False (got {beta.get('priority')!r})")
|
||||||
|
|
||||||
|
# ── priority is honored ONLY on the create branch: logging against an EXISTING row
|
||||||
|
# with priority:true must not flip its flag (Beta was created without priority) ──
|
||||||
|
print("\n[invariant: priority on an existing-row log does NOT change its flag]")
|
||||||
|
st, _ = _req(port, "POST", "/api/fundraising/log-communication", token, {
|
||||||
|
"row_id": beta_id, "type": "note", "body": "follow-up", "append_note": True, "priority": True,
|
||||||
|
})
|
||||||
|
check(st in (200, 201), f"log against existing Beta -> ok (got {st})")
|
||||||
|
rows = _grid_rows(port, token)
|
||||||
|
check(rows.get(beta_id, {}).get("priority") is False,
|
||||||
|
f"existing-row priority untouched by the log's priority flag (got {rows.get(beta_id, {}).get('priority')!r})")
|
||||||
|
|
||||||
|
# ── create -> link handshake: the brand-new row links at the chosen stage ──
|
||||||
|
print("\n[create -> link: freshly-created row resolves for pipeline link at stage]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/link", token, {
|
||||||
|
"source_row_id": acme_id, "contact_index": 0, "name": "Acme Capital — Pipeline",
|
||||||
|
"stage": "engaged", "expected_amount": 0, "probability": 55, "fund_name": "",
|
||||||
|
})
|
||||||
|
opp = (d or {}).get("data") or {}
|
||||||
|
check(st == 201 and opp.get("stage") == "engaged",
|
||||||
|
f"link new row @engaged -> 201 (got {st}, stage={opp.get('stage')})")
|
||||||
|
rows = _grid_rows(port, token)
|
||||||
|
check(rows.get(acme_id, {}).get("pipeline") is True
|
||||||
|
and rows.get(acme_id, {}).get("pipeline_stage") == "engaged",
|
||||||
|
f"new row now in pipeline @engaged (got {rows.get(acme_id, {}).get('pipeline')}, "
|
||||||
|
f"{rows.get(acme_id, {}).get('pipeline_stage')})")
|
||||||
|
|
||||||
|
# ── create -> reminder handshake: source_row_id resolves to the synced investor ──
|
||||||
|
print("\n[create -> reminder: source_row_id resolves to the new investor]")
|
||||||
|
st, d = _req(port, "POST", "/api/reminders", token, {
|
||||||
|
"source_row_id": acme_id, "investor_name": "Acme Capital",
|
||||||
|
"title": "Send Fund III deck", "due_date": "2026-07-01", "details": "",
|
||||||
|
})
|
||||||
|
rem = (d or {}).get("data") or {}
|
||||||
|
check(st == 201, f"reminder create -> 201 (got {st})")
|
||||||
|
check(bool(rem.get("investor_id")) and rem.get("investor_name") == "Acme Capital",
|
||||||
|
f"reminder linked to the new investor (got id={rem.get('investor_id')!r}, "
|
||||||
|
f"name={rem.get('investor_name')!r})")
|
||||||
|
|
||||||
|
# ── card-intake contact fields land on the canonical contact (phone/city/linkedin) ──
|
||||||
|
# The Matrix card flow sends these on the contact dict; the upsert must persist them.
|
||||||
|
print("\n[contact fields: phone + city + linkedin persist on the contact]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/log-communication", token, {
|
||||||
|
"investor_name": "Fortitude Investment Group", "create_investor_if_missing": True,
|
||||||
|
"contact": {"name": "Daniel Raupp", "email": "draupp@fortitude.example",
|
||||||
|
"phone": "631-474-5610", "mobile": "631-922-1195", "city": "Setauket, NY",
|
||||||
|
"linkedin_url": "linkedin.com/in/danielraupp"},
|
||||||
|
"type": "note", "body": "from a business card", "append_note": True,
|
||||||
|
})
|
||||||
|
check(st == 201, f"create with contact fields -> 201 (got {st})")
|
||||||
|
c = _db()
|
||||||
|
crow = c.execute("SELECT phone, mobile, city, linkedin_url FROM contacts WHERE lower(email) = ?",
|
||||||
|
("draupp@fortitude.example",)).fetchone()
|
||||||
|
c.close()
|
||||||
|
check(crow is not None, "contact row exists")
|
||||||
|
check(bool(crow) and crow[0] == "631-474-5610", f"phone (office) persisted (got {crow[0] if crow else None!r})")
|
||||||
|
check(bool(crow) and crow[1] == "631-922-1195", f"mobile (cell) persisted (got {crow[1] if crow else None!r})")
|
||||||
|
check(bool(crow) and crow[2] == "Setauket, NY", f"city persisted (got {crow[2] if crow else None!r})")
|
||||||
|
check(bool(crow) and crow[3] == "linkedin.com/in/danielraupp",
|
||||||
|
f"linkedin persisted (got {crow[3] if crow else None!r})")
|
||||||
|
|
||||||
|
# ── unknown source_row_id is refused (guard) ──
|
||||||
|
print("\n[guard: reminder on an unknown source_row_id -> 404]")
|
||||||
|
st, _ = _req(port, "POST", "/api/reminders", token, {
|
||||||
|
"source_row_id": "nope", "title": "x",
|
||||||
|
})
|
||||||
|
check(st == 404, f"unknown source_row_id -> 404 (got {st})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print("\n" + ("ALL PASS" if not FAILS else f"{len(FAILS)} FAILURE(S):"))
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1 if FAILS else 0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,164 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Regression test for the Grid-detail communications timeline filter (Phase 8c, G6).
|
||||||
|
|
||||||
|
The mobile Grid detail's notes timeline pulls an investor-level communication stream
|
||||||
|
via GET /api/communications?source_row_id=<grid row id>. That filter (added to
|
||||||
|
handle_list_communications) maps the grid JSON row id → fundraising_investors.source_row_id
|
||||||
|
→ fundraising_contacts.contact_id → communications, so it must:
|
||||||
|
- return every communication across ALL the investor's contacts,
|
||||||
|
- stay isolated (one investor's row id never returns another's comms),
|
||||||
|
- respect soft-delete (cm.deleted_at IS NULL) through the join.
|
||||||
|
|
||||||
|
Boots the REAL server, seeds investors by driving the one-row log path (which creates the
|
||||||
|
grid row + contact + communication AND syncs the relational mirror the filter joins on),
|
||||||
|
then drives the live read path with a real token. Synthetic only (guardrail #9).
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_grid_comm_timeline.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
DEL = "2026-06-01T00:00:00"
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(method, port, path, token, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {"Authorization": "Bearer " + token}
|
||||||
|
payload = None
|
||||||
|
if body is not None:
|
||||||
|
payload = json.dumps(body)
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
def _log_comm(port, token, investor_name, contact, subject, create=False):
|
||||||
|
"""Drive the one-row log path; returns (status, grid_row_id)."""
|
||||||
|
st, data = _req("POST", port, "/api/fundraising/log-communication", token, {
|
||||||
|
"investor_name": investor_name,
|
||||||
|
"create_investor_if_missing": create,
|
||||||
|
"contact": contact,
|
||||||
|
"type": "note",
|
||||||
|
"subject": subject,
|
||||||
|
"body": subject,
|
||||||
|
"append_note": True,
|
||||||
|
})
|
||||||
|
row_id = ((data or {}).get("data", {}).get("row") or {}).get("id")
|
||||||
|
return st, row_id
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
conn = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
# password_hash is intentionally a non-bcrypt placeholder — we mint the token directly via
|
||||||
|
# create_token(), so the password-verify path is never exercised.
|
||||||
|
conn.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
# Investor A: two contacts, one communication per contact. Create seeds the row with
|
||||||
|
# Jane + logs "Intro call"; update-row adds John as a second pill (so the relational mirror
|
||||||
|
# links BOTH contacts to A's row); then a comm is logged against John. The timeline must
|
||||||
|
# aggregate across both contacts — the point of the source_row_id join over a single contact.
|
||||||
|
st, rowA = _log_comm(port, token, "Acme Capital",
|
||||||
|
{"name": "Jane Doe", "email": "jane@acme.example"}, "Intro call", create=True)
|
||||||
|
check(st == 201 and bool(rowA), f"create investor A via log path -> 201 + row id (got {st}, {rowA})")
|
||||||
|
st, _ = _req("POST", port, "/api/fundraising/update-row", token, {
|
||||||
|
"row_id": rowA, "investor_name": "Acme Capital",
|
||||||
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.example"},
|
||||||
|
{"name": "John Roe", "email": "john@acme.example"}],
|
||||||
|
})
|
||||||
|
check(st == 200, f"add John as a second contact on A via update-row (got {st})")
|
||||||
|
st, _ = _log_comm(port, token, "Acme Capital",
|
||||||
|
{"name": "John Roe", "email": "john@acme.example"}, "Follow-up email")
|
||||||
|
check(st == 201, f"second contact's comm logged on A (got {st})")
|
||||||
|
|
||||||
|
# Investor B: a separate investor, one communication (isolation control).
|
||||||
|
st, rowB = _log_comm(port, token, "Beacon Ventures",
|
||||||
|
{"name": "Sam Poe", "email": "sam@beacon.example"}, "Beacon note", create=True)
|
||||||
|
check(st == 201 and bool(rowB), f"create investor B via log path -> 201 + row id (got {st}, {rowB})")
|
||||||
|
|
||||||
|
# ── source_row_id returns the whole investor (across contacts) ──
|
||||||
|
print("\n[source_row_id timeline]")
|
||||||
|
st, data = _req("GET", port, f"/api/communications?source_row_id={rowA}", token)
|
||||||
|
subsA = {c.get("subject") for c in (data or {}).get("data", [])}
|
||||||
|
check(st == 200, f"GET timeline for A -> 200 (got {st})")
|
||||||
|
check(subsA == {"Intro call", "Follow-up email"},
|
||||||
|
f"A's timeline spans both contacts' comms (got {subsA})")
|
||||||
|
|
||||||
|
# ── isolation: A's row id never returns B's comms ──
|
||||||
|
print("\n[isolation]")
|
||||||
|
check("Beacon note" not in subsA, "A's timeline excludes investor B's comm")
|
||||||
|
st, dataB = _req("GET", port, f"/api/communications?source_row_id={rowB}", token)
|
||||||
|
subsB = {c.get("subject") for c in (dataB or {}).get("data", [])}
|
||||||
|
check(subsB == {"Beacon note"}, f"B's timeline is its own comm only (got {subsB})")
|
||||||
|
|
||||||
|
# ── soft-delete respected through the join ──
|
||||||
|
print("\n[soft-delete]")
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.execute("UPDATE communications SET deleted_at=? WHERE subject='Intro call'", (DEL,))
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
st, data2 = _req("GET", port, f"/api/communications?source_row_id={rowA}", token)
|
||||||
|
subsA2 = {c.get("subject") for c in (data2 or {}).get("data", [])}
|
||||||
|
check(subsA2 == {"Follow-up email"},
|
||||||
|
f"soft-deleted comm filtered from A's timeline (got {subsA2})")
|
||||||
|
|
||||||
|
# ── unknown row id returns empty, not an error ──
|
||||||
|
st, data3 = _req("GET", port, "/api/communications?source_row_id=does-not-exist", token)
|
||||||
|
check(st == 200 and (data3 or {}).get("data") == [],
|
||||||
|
f"unknown source_row_id -> 200 + empty (got {st}, {(data3 or {}).get('data')})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (grid comm timeline source_row_id filter)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,145 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Regression: GET /api/fundraising/state heals blank grid-pill emails from the relational mirror.
|
||||||
|
|
||||||
|
The grid blob is canonical for the mobile "Edit investor" sheet, but an email can reach a linked
|
||||||
|
classic contact (email capture / a contact edit) without ever being written back into the blob pill
|
||||||
|
— so the edit form showed an empty email for a contact the directory clearly had (Grant, 2026-06-20).
|
||||||
|
The state handler now fills a blank pill email from fundraising_contacts.email, else the linked
|
||||||
|
contacts.email, matched by pill order then name. This asserts:
|
||||||
|
- a blank pill whose linked contact has an email is HEALED on read;
|
||||||
|
- a blank pill whose linked contact is also blank stays blank;
|
||||||
|
- a pill that already carries an email in the blob is NEVER overwritten (fill-only).
|
||||||
|
Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_grid_email_heal.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _get_state(port, token):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
conn.request("GET", "/api/fundraising/state", headers={"Authorization": "Bearer " + token})
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
return resp.status, (json.loads(raw) if raw else None)
|
||||||
|
|
||||||
|
|
||||||
|
GRID = {
|
||||||
|
"columns": [{"id": "investor_name", "label": "Investor", "type": "text"},
|
||||||
|
{"id": "contacts", "label": "Contacts", "type": "contacts"}],
|
||||||
|
"rows": [
|
||||||
|
{"id": "rowW", "investor_name": "Wyoming", "notes": "",
|
||||||
|
"contacts": [{"name": "Philip Treick", "email": "", "title": ""},
|
||||||
|
{"name": "Jose Briones", "email": "", "title": ""}]},
|
||||||
|
{"id": "rowA", "investor_name": "Acme Capital", "notes": "",
|
||||||
|
"contacts": [{"name": "Jane Doe", "email": "keep@acme.com", "title": ""}]},
|
||||||
|
{"id": "rowO", "investor_name": "Orphan LP", "notes": "",
|
||||||
|
"contacts": [{"name": "No Link", "email": "", "title": ""}]},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) "
|
||||||
|
"VALUES ('main', ?, '[]', 1) "
|
||||||
|
"ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),))
|
||||||
|
# Classic contacts directory: Jose has the captured email the blob never got; Philip is blank.
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name,email) VALUES "
|
||||||
|
"('c-phil','Philip','Treick',''),"
|
||||||
|
"('c-jose','Jose','Briones','jbriones@uwyo.edu'),"
|
||||||
|
"('c-jane','Jane','Doe','other@acme.com')") # differs from the blob's keep@acme.com
|
||||||
|
# Relational mirror (what sync_fundraising_relational would build): blank fc.email, linked contact_id.
|
||||||
|
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested) VALUES "
|
||||||
|
"('inv-w','Wyoming','rowW',0),('inv-a','Acme Capital','rowA',0),('inv-o','Orphan LP','rowO',0)")
|
||||||
|
# fc-orphan has contact_id NULL (pre-0004 orphan) and blank email — nothing to heal from.
|
||||||
|
c.execute("INSERT INTO fundraising_contacts (id,investor_id,full_name,email,sort_order,contact_id) VALUES "
|
||||||
|
"('fc-phil','inv-w','Philip Treick','',0,'c-phil'),"
|
||||||
|
"('fc-jose','inv-w','Jose Briones','',1,'c-jose'),"
|
||||||
|
"('fc-jane','inv-a','Jane Doe','',0,'c-jane'),"
|
||||||
|
"('fc-orphan','inv-o','No Link','',0,NULL)")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
st, d = _get_state(port, token)
|
||||||
|
rows = ((d or {}).get("data", {}).get("grid", {}) or {}).get("rows", [])
|
||||||
|
by_id = {r.get("id"): r for r in rows}
|
||||||
|
w = by_id.get("rowW", {})
|
||||||
|
a = by_id.get("rowA", {})
|
||||||
|
wc = w.get("contacts", [])
|
||||||
|
ac = a.get("contacts", [])
|
||||||
|
|
||||||
|
print("\n[heal: blank pill email filled from the linked contact (Jose)]")
|
||||||
|
jose = next((c for c in wc if c.get("name") == "Jose Briones"), {})
|
||||||
|
check(st == 200 and jose.get("email") == "jbriones@uwyo.edu",
|
||||||
|
f"Jose pill healed to jbriones@uwyo.edu (got {jose.get('email')!r})")
|
||||||
|
|
||||||
|
print("\n[heal: blank pill whose contact is also blank stays blank (Philip)]")
|
||||||
|
phil = next((c for c in wc if c.get("name") == "Philip Treick"), {})
|
||||||
|
check(phil.get("email", "") == "",
|
||||||
|
f"Philip pill stays blank (got {phil.get('email')!r})")
|
||||||
|
|
||||||
|
print("\n[heal: a pill that already has an email is never overwritten (Jane)]")
|
||||||
|
jane = next((c for c in ac if c.get("name") == "Jane Doe"), {})
|
||||||
|
check(jane.get("email") == "keep@acme.com",
|
||||||
|
f"Jane pill keeps its blob email, not the contact's (got {jane.get('email')!r})")
|
||||||
|
|
||||||
|
print("\n[heal: a pill whose fundraising_contacts row has contact_id NULL stays blank (orphan)]")
|
||||||
|
o = by_id.get("rowO", {})
|
||||||
|
orphan = next((c for c in o.get("contacts", []) if c.get("name") == "No Link"), {})
|
||||||
|
check(orphan.get("email", "") == "",
|
||||||
|
f"orphan pill (no contact_id, no email source) stays blank (got {orphan.get('email')!r})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (grid email heal)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,312 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the grid → Pipeline link ("Adopt the Pipeline", v0.1.0:87).
|
||||||
|
|
||||||
|
Boots the REAL server against a temp DB and exercises the new endpoints end-to-end:
|
||||||
|
- POST /api/fundraising/pipeline/link creates exactly ONE opportunity, linked via
|
||||||
|
opportunities.fundraising_investor_id, reusing the grid's synced contact (no
|
||||||
|
POST /api/contacts side-door) and mapping the grid 'lead' -> owner;
|
||||||
|
- the link is idempotent: a re-link returns the existing opp and NEVER reseeds its
|
||||||
|
Pipeline-owned funnel fields (stage/probability) — the board owns those;
|
||||||
|
- GET /api/fundraising/state injects read-only pipeline / pipeline_stage row values
|
||||||
|
derived from the live opp;
|
||||||
|
- linking a contactless row, or an unknown row, is refused;
|
||||||
|
- POST .../unlink soft-deletes the opp (off the board, recoverable) while leaving the
|
||||||
|
grid investor row fully intact;
|
||||||
|
- deleting an investor from the grid archives its orphaned opp on the next save;
|
||||||
|
- the pipeline report + dashboard aggregates exclude archived (soft-deleted) opps.
|
||||||
|
Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_grid_pipeline_link.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(port, method, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
payload = None
|
||||||
|
if body is not None:
|
||||||
|
payload = json.dumps(body)
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
def _put_grid(port, token, rows):
|
||||||
|
return _req(port, "PUT", "/api/fundraising/state", token,
|
||||||
|
{"grid": {"columns": [], "rows": rows}, "views": []})
|
||||||
|
|
||||||
|
|
||||||
|
ROW_ACME = {"id": "rowAcme", "investor_name": "Acme Capital", "notes": "", "lead": "Grant",
|
||||||
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]}
|
||||||
|
ROW_BETA = {"id": "rowBeta", "investor_name": "Beta Capital LLC", "notes": "", "lead": "",
|
||||||
|
"contacts": [{"name": "Pat Roe", "email": "pat@beta.com", "title": ""}]}
|
||||||
|
ROW_EMPTY = {"id": "rowEmpty", "investor_name": "Empty LP", "notes": "", "contacts": []}
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
return sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = _db()
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _opp_count_live(fr_investor_id=None):
|
||||||
|
c = _db()
|
||||||
|
if fr_investor_id:
|
||||||
|
n = c.execute("SELECT COUNT(*) FROM opportunities WHERE fundraising_investor_id = ? "
|
||||||
|
"AND deleted_at IS NULL", (fr_investor_id,)).fetchone()[0]
|
||||||
|
else:
|
||||||
|
n = c.execute("SELECT COUNT(*) FROM opportunities WHERE deleted_at IS NULL").fetchone()[0]
|
||||||
|
c.close()
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
st, _ = _put_grid(port, token, [ROW_ACME, ROW_BETA, ROW_EMPTY])
|
||||||
|
check(st == 200, f"seed grid via PUT /state (got {st})")
|
||||||
|
|
||||||
|
# ── link creates one linked opp with the seeds + resolved contact + mapped owner ──
|
||||||
|
print("\n[link: creates one linked opportunity with seeds]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/link", token, {
|
||||||
|
"source_row_id": "rowAcme", "fund_name": "Fund III",
|
||||||
|
"expected_amount": 250000, "probability": 40, "stage": "engaged",
|
||||||
|
})
|
||||||
|
opp = (d or {}).get("data") or {}
|
||||||
|
check(st == 201 and (d or {}).get("already_linked") is False, f"link -> 201 new (got {st}, {d})")
|
||||||
|
check(opp.get("stage") == "engaged" and opp.get("expected_amount") == 250000
|
||||||
|
and opp.get("probability") == 40 and opp.get("fund_name") == "Fund III",
|
||||||
|
f"seeds applied (got {{stage:{opp.get('stage')}, amt:{opp.get('expected_amount')}, "
|
||||||
|
f"prob:{opp.get('probability')}, fund:{opp.get('fund_name')}}})")
|
||||||
|
check(opp.get("first_name") == "Jane", f"reused synced contact Jane Doe (got {opp.get('first_name')})")
|
||||||
|
check(opp.get("owner_name") == "Grant", f"grid lead 'Grant' -> owner Grant (got {opp.get('owner_name')})")
|
||||||
|
fr_id = opp.get("fundraising_investor_id")
|
||||||
|
check(bool(fr_id), f"opportunity carries fundraising_investor_id (got {fr_id})")
|
||||||
|
check(_opp_count_live(fr_id) == 1, "exactly one live opp linked to the investor")
|
||||||
|
opp_id = opp.get("id")
|
||||||
|
jane_contact_id = opp.get("contact_id")
|
||||||
|
|
||||||
|
# ── idempotent re-link: returns existing, board-owned stage NOT reseeded ──
|
||||||
|
print("\n[idempotent: re-link returns existing opp without reseeding funnel fields]")
|
||||||
|
st, _ = _req(port, "PATCH", f"/api/opportunities/{opp_id}/stage", token, {"stage": "diligence"})
|
||||||
|
check(st == 200, f"advance stage on the board -> diligence (got {st})")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/link", token, {
|
||||||
|
"source_row_id": "rowAcme", "stage": "lead", "expected_amount": 999, "probability": 5,
|
||||||
|
})
|
||||||
|
opp2 = (d or {}).get("data") or {}
|
||||||
|
check(st == 200 and (d or {}).get("already_linked") is True, f"re-link -> already_linked (got {st}, {d})")
|
||||||
|
check(opp2.get("stage") == "diligence" and opp2.get("expected_amount") == 250000,
|
||||||
|
f"funnel fields preserved, not reseeded (got stage={opp2.get('stage')}, amt={opp2.get('expected_amount')})")
|
||||||
|
check(_opp_count_live(fr_id) == 1, "still exactly one live opp (no duplicate)")
|
||||||
|
|
||||||
|
# ── stage validation: legacy/invalid values rejected (4-stage enum guard) ──
|
||||||
|
# The stage check precedes the contact lookup in handle_create_opportunity, so a fake
|
||||||
|
# contact_id still surfaces the stage error first.
|
||||||
|
print("\n[validation: legacy stage values rejected by stage + create endpoints]")
|
||||||
|
st, _ = _req(port, "PATCH", f"/api/opportunities/{opp_id}/stage", token, {"stage": "outreach"})
|
||||||
|
check(st >= 400, f"PATCH legacy stage 'outreach' rejected (got {st})")
|
||||||
|
st, _ = _req(port, "POST", "/api/opportunities", token,
|
||||||
|
{"name": "X", "contact_id": "x", "stage": "due_diligence"})
|
||||||
|
check(st >= 400, f"POST opportunity with legacy stage 'due_diligence' rejected (got {st})")
|
||||||
|
|
||||||
|
# ── read-injection: GET state shows pipeline flag + stage, derived live ──
|
||||||
|
print("\n[read-injection: GET /state exposes read-only pipeline + pipeline_stage]")
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
rows = {r["id"]: r for r in (d or {}).get("data", {}).get("grid", {}).get("rows", [])}
|
||||||
|
check(rows.get("rowAcme", {}).get("pipeline") is True
|
||||||
|
and rows.get("rowAcme", {}).get("pipeline_stage") == "diligence",
|
||||||
|
f"rowAcme pipeline true @diligence (got {rows.get('rowAcme', {}).get('pipeline')}, "
|
||||||
|
f"{rows.get('rowAcme', {}).get('pipeline_stage')})")
|
||||||
|
# Read-only opportunity_id is injected for a linked row so the mobile grid detail can
|
||||||
|
# PATCH the stage on the opportunities endpoint (the grid row carries no opp id otherwise).
|
||||||
|
check(rows.get("rowAcme", {}).get("opportunity_id") == opp_id,
|
||||||
|
f"rowAcme carries the live opportunity_id (got {rows.get('rowAcme', {}).get('opportunity_id')}, want {opp_id})")
|
||||||
|
# Phase 0 derived signals are injected read-only on every row (values depend on seed;
|
||||||
|
# assert the keys are present so the strip/inject round-trip below is meaningful).
|
||||||
|
check(all(k in rows.get("rowAcme", {}) for k in ("existing_investor", "staleness", "last_activity_at")),
|
||||||
|
f"rowAcme carries derived existing_investor/staleness/last_activity (keys: {sorted(rows.get('rowAcme', {}).keys())})")
|
||||||
|
check(rows.get("rowBeta", {}).get("pipeline") is False
|
||||||
|
and rows.get("rowBeta", {}).get("pipeline_stage") == ""
|
||||||
|
and rows.get("rowBeta", {}).get("opportunity_id") == "",
|
||||||
|
f"rowBeta not in pipeline (got {rows.get('rowBeta', {}).get('pipeline')}, "
|
||||||
|
f"opp_id={rows.get('rowBeta', {}).get('opportunity_id')!r})")
|
||||||
|
|
||||||
|
# ── round-trip: a save echoing the injected read-only values is lossless ──
|
||||||
|
print("\n[round-trip: PUT carrying injected pipeline values strips them, link intact]")
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
echoed = (d or {}).get("data", {}).get("grid", {}).get("rows", [])
|
||||||
|
st, _ = _put_grid(port, token, echoed) # as the frontend autosave would, rows still carry pipeline*
|
||||||
|
check(st == 200, f"echo-back save -> 200 (got {st})")
|
||||||
|
check(_opp_count_live(fr_id) == 1, "link survives the round-trip (no dup, not archived)")
|
||||||
|
c = _db()
|
||||||
|
blob = json.loads(c.execute("SELECT grid_json FROM fundraising_state WHERE id='main'").fetchone()[0])
|
||||||
|
c.close()
|
||||||
|
stored_acme = {r["id"]: r for r in blob.get("rows", [])}.get("rowAcme", {})
|
||||||
|
check(not any(k in stored_acme for k in ("pipeline", "pipeline_stage", "opportunity_id",
|
||||||
|
"existing_investor", "staleness", "last_activity_at")),
|
||||||
|
"computed keys (pipeline + opportunity_id + existing_investor/staleness/last_activity) NOT persisted into the grid blob")
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
rt = {r["id"]: r for r in (d or {}).get("data", {}).get("grid", {}).get("rows", [])}.get("rowAcme", {})
|
||||||
|
check(rt.get("pipeline") is True and rt.get("pipeline_stage") == "diligence"
|
||||||
|
and rt.get("opportunity_id") == opp_id,
|
||||||
|
f"pipeline values re-injected after round-trip (got {rt.get('pipeline')}, {rt.get('pipeline_stage')}, opp_id={rt.get('opportunity_id')!r})")
|
||||||
|
check(all(k in rt for k in ("existing_investor", "staleness", "last_activity_at")),
|
||||||
|
"derived signals re-injected after round-trip")
|
||||||
|
|
||||||
|
# ── guards ──
|
||||||
|
print("\n[guard: a contactless row cannot be added to the pipeline]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/link", token, {"source_row_id": "rowEmpty"})
|
||||||
|
check(st == 400, f"no contact -> 400 (got {st}, {d})")
|
||||||
|
check(_opp_count_live() == 1, "no stray opp created for the contactless row")
|
||||||
|
|
||||||
|
print("\n[guard: unknown grid row -> 404]")
|
||||||
|
st, _ = _req(port, "POST", "/api/fundraising/pipeline/link", token, {"source_row_id": "nope"})
|
||||||
|
check(st == 404, f"unknown row -> 404 (got {st})")
|
||||||
|
|
||||||
|
print("\n[guard: unauthenticated -> 401]")
|
||||||
|
st, _ = _req(port, "POST", "/api/fundraising/pipeline/link", None, {"source_row_id": "rowAcme"})
|
||||||
|
check(st == 401, f"no token -> 401 (got {st})")
|
||||||
|
|
||||||
|
# ── the opp loads on the board + counts in the dashboard while live ──
|
||||||
|
print("\n[board + dashboard count the live opp]")
|
||||||
|
st, d = _req(port, "GET", "/api/opportunities?limit=1000", token)
|
||||||
|
ids = [o["id"] for o in (d or {}).get("data", [])]
|
||||||
|
check(opp_id in ids, "linked opp appears on the board")
|
||||||
|
# 8h deep-link: the opp list injects source_row_id from the durable fundraising_investor_id
|
||||||
|
# (the mobile Pipeline detail's "Open investor in Grid" target).
|
||||||
|
acme_opp = next((o for o in (d or {}).get("data", []) if o["id"] == opp_id), {})
|
||||||
|
check(acme_opp.get("source_row_id") == "rowAcme",
|
||||||
|
f"opp list injects source_row_id == 'rowAcme' (got {acme_opp.get('source_row_id')!r})")
|
||||||
|
st, d = _req(port, "GET", "/api/reports/dashboard", token)
|
||||||
|
active = (d or {}).get("data", {}).get("metrics", {}).get("active_opportunities")
|
||||||
|
check(active == 1, f"dashboard active_opportunities == 1 (got {active})")
|
||||||
|
|
||||||
|
# ── unlink soft-deletes the opp; the GRID ROW stays fully intact ──
|
||||||
|
print("\n[unlink: archives the opp, leaves the grid investor intact]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/unlink", token, {"source_row_id": "rowAcme"})
|
||||||
|
check(st == 200 and (d or {}).get("data", {}).get("archived") == 1, f"unlink -> archived 1 (got {st}, {d})")
|
||||||
|
check(_opp_count_live(fr_id) == 0, "opp is no longer live (soft-deleted)")
|
||||||
|
c = _db()
|
||||||
|
gone = c.execute("SELECT deleted_at FROM opportunities WHERE id = ?", (opp_id,)).fetchone()[0]
|
||||||
|
inv_still = c.execute("SELECT investor_name FROM fundraising_investors WHERE source_row_id = 'rowAcme'").fetchone()
|
||||||
|
contact_still = c.execute("SELECT COUNT(*) FROM fundraising_contacts WHERE investor_id = ?", (fr_id,)).fetchone()[0]
|
||||||
|
c.close()
|
||||||
|
check(gone is not None, "opp row tombstoned (deleted_at set), not hard-deleted")
|
||||||
|
check(inv_still and inv_still[0] == "Acme Capital", "grid investor row untouched by unlink")
|
||||||
|
check(contact_still >= 1, "grid investor's contacts untouched by unlink")
|
||||||
|
st, d = _req(port, "GET", "/api/opportunities?limit=1000", token)
|
||||||
|
check(opp_id not in [o["id"] for o in (d or {}).get("data", [])], "archived opp left the board")
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
rows = {r["id"]: r for r in (d or {}).get("data", {}).get("grid", {}).get("rows", [])}
|
||||||
|
check(rows.get("rowAcme", {}).get("pipeline") is False, "grid no longer flags rowAcme as in-pipeline")
|
||||||
|
|
||||||
|
# ── aggregates exclude the archived opp ──
|
||||||
|
print("\n[aggregates exclude archived opps]")
|
||||||
|
st, d = _req(port, "GET", "/api/reports/dashboard", token)
|
||||||
|
active = (d or {}).get("data", {}).get("metrics", {}).get("active_opportunities")
|
||||||
|
check(active == 0, f"dashboard active_opportunities back to 0 (got {active})")
|
||||||
|
st, d = _req(port, "GET", "/api/reports/pipeline", token)
|
||||||
|
by_stage = (d or {}).get("data", {}).get("by_stage", [])
|
||||||
|
total = sum(s.get("count", 0) for s in by_stage)
|
||||||
|
check(total == 0, f"pipeline report by_stage excludes archived (got total {total})")
|
||||||
|
|
||||||
|
# ── re-link after unlink: a fresh opp is created (the archived one stays archived) ──
|
||||||
|
print("\n[re-link after unlink: creates a new opp, flag reappears]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/link", token, {
|
||||||
|
"source_row_id": "rowAcme", "stage": "engaged", "expected_amount": 50000,
|
||||||
|
})
|
||||||
|
relinked = (d or {}).get("data") or {}
|
||||||
|
check(st == 201 and (d or {}).get("already_linked") is False and relinked.get("id") != opp_id,
|
||||||
|
f"re-link -> a NEW opp distinct from the archived one (got {st}, {relinked.get('id')} vs {opp_id})")
|
||||||
|
check(_opp_count_live(fr_id) == 1, "exactly one live opp again after re-link")
|
||||||
|
st, _ = _req(port, "POST", "/api/fundraising/pipeline/unlink", token, {"source_row_id": "rowAcme"})
|
||||||
|
check(st == 200, "reset: unlink the re-linked opp")
|
||||||
|
|
||||||
|
# ── orphan reconciler: deleting the investor from the grid archives its opp ──
|
||||||
|
print("\n[orphan: deleting the grid investor archives its linked opp on next save]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/pipeline/link", token, {
|
||||||
|
"source_row_id": "rowBeta", "stage": "lead", "expected_amount": 100000,
|
||||||
|
})
|
||||||
|
beta = (d or {}).get("data") or {}
|
||||||
|
beta_opp_id, beta_fr = beta.get("id"), beta.get("fundraising_investor_id")
|
||||||
|
check(st == 201 and _opp_count_live(beta_fr) == 1, f"beta linked (got {st})")
|
||||||
|
# drop rowBeta from the grid (keep the others)
|
||||||
|
st, _ = _put_grid(port, token, [ROW_ACME, ROW_EMPTY])
|
||||||
|
check(st == 200, f"save grid without rowBeta (got {st})")
|
||||||
|
check(_opp_count_live(beta_fr) == 0, "beta's orphaned opp archived by the reconciler")
|
||||||
|
st, d = _req(port, "GET", "/api/opportunities?limit=1000", token)
|
||||||
|
check(beta_opp_id not in [o["id"] for o in (d or {}).get("data", [])], "orphaned opp left the board")
|
||||||
|
|
||||||
|
# ── 8h: a manually-created deal (no fundraising_investor_id) has a null source_row_id, so
|
||||||
|
# the mobile Pipeline detail hides "Open investor in Grid" for it ──
|
||||||
|
print("\n[8h: opp source_row_id is null for a deal with no grid link]")
|
||||||
|
st, d = _req(port, "POST", "/api/opportunities", token,
|
||||||
|
{"name": "Manual deal", "contact_id": jane_contact_id, "stage": "lead"})
|
||||||
|
manual_id = (d or {}).get("data", {}).get("id")
|
||||||
|
check(st in (200, 201) and bool(manual_id), f"create a manual (non-grid) opp (got {st})")
|
||||||
|
st, d = _req(port, "GET", "/api/opportunities?limit=1000", token)
|
||||||
|
manual = next((o for o in (d or {}).get("data", []) if o["id"] == manual_id), {})
|
||||||
|
check(manual.get("source_row_id") in (None, ""),
|
||||||
|
f"manual opp has null source_row_id (got {manual.get('source_row_id')!r})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print("\n" + ("ALL PASS" if not FAILS else f"{len(FAILS)} FAILURE(S):"))
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1 if FAILS else 0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,275 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the in-app business-card intake endpoint (#7): POST /api/intake/card.
|
||||||
|
|
||||||
|
The endpoint reuses the Matrix card flow's nio-free core — vision-transcribe (spark) -> text
|
||||||
|
parse (parse) -> the same fuzzy matcher (find_intake_match / find_intake_candidates) — minus
|
||||||
|
Matrix, surfaced for a mobile sheet. The real vision/OCR path is live-smoke only (same as the
|
||||||
|
Matrix M3 path), so here we STUB the two network legs and assert the wiring + contract:
|
||||||
|
- happy path: transcribe -> parse -> proposal + match/candidates, status 200 ok:true;
|
||||||
|
- the email-integrity rule rides along (a model-minted address NOT in the transcription is
|
||||||
|
dropped in favor of the one literally present), exactly as on the text/Matrix path;
|
||||||
|
- new-vs-existing: an exact firm name returns `match`; a near-spelling returns `candidates`;
|
||||||
|
- soft-fails: an unreadable image -> ok:false/unreadable; vision down -> 502/vision_unavailable;
|
||||||
|
- guards: missing/invalid image -> 400; unauthenticated -> 401;
|
||||||
|
- provenance: the approve write reuses log-communication tagged source="app_card".
|
||||||
|
Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_intake_card.py
|
||||||
|
"""
|
||||||
|
import base64
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
_BACKEND = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
sys.path.insert(0, _BACKEND)
|
||||||
|
sys.path.insert(0, os.path.join(_BACKEND, "ingest")) # llm
|
||||||
|
sys.path.insert(0, os.path.join(_BACKEND, "matrix_intake")) # spark, parse
|
||||||
|
|
||||||
|
import server # noqa: E402
|
||||||
|
import llm # noqa: E402 (ingest/llm.py — patched so spark.parse_json hits no network)
|
||||||
|
import spark # noqa: E402 (matrix_intake/spark.py — transcribe_card stubbed)
|
||||||
|
import parse # noqa: E402 (matrix_intake/parse.py — parse_message defaults to spark.parse_json)
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
# The handler imports `spark`/`parse` lazily and looks up transcribe_card on the module at call
|
||||||
|
# time, so patching the module attribute here takes effect. parse.parse_message binds its default
|
||||||
|
# parse_fn=spark.parse_json at import, and spark.parse_json calls llm.chat_json dynamically — so
|
||||||
|
# patching llm.chat_json (not spark.parse_json) is what reaches the parse leg.
|
||||||
|
_STATE = {"transcription": "", "raw": {}, "boom": False}
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_transcribe(image_b64, mime="image/jpeg", chat_fn=None):
|
||||||
|
if _STATE["boom"]:
|
||||||
|
raise RuntimeError("spark control unreachable")
|
||||||
|
return _STATE["transcription"]
|
||||||
|
|
||||||
|
|
||||||
|
def _fake_chat_json(prompt, system=None, max_tokens=200):
|
||||||
|
return dict(_STATE["raw"])
|
||||||
|
|
||||||
|
|
||||||
|
spark.transcribe_card = _fake_transcribe
|
||||||
|
llm.chat_json = _fake_chat_json
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(port, method, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
payload = None
|
||||||
|
if body is not None:
|
||||||
|
payload = json.dumps(body)
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
GRID = {
|
||||||
|
"columns": [],
|
||||||
|
"rows": [
|
||||||
|
{"id": "rowAcme", "investor_name": "Acme Capital", "notes": "",
|
||||||
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
_IMG = base64.b64encode(b"not-a-real-image-just-valid-base64").decode()
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) "
|
||||||
|
"VALUES ('main', ?, '[]', 1) "
|
||||||
|
"ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),))
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
print("\n[happy path: transcribe -> parse -> proposal, new investor, no match]")
|
||||||
|
_STATE["transcription"] = ("Sam Lee\nPartner\nBeacon Ventures\n"
|
||||||
|
"sam@beacon.vc\nMobile: +1 555 987 6543")
|
||||||
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures",
|
||||||
|
"contact_name": "Sam Lee", "contact_title": "Partner",
|
||||||
|
"mobile": "+1 555 987 6543", "contact_email": "sam@beacon.vc"}
|
||||||
|
_STATE["boom"] = False
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
p = data.get("proposal", {})
|
||||||
|
check(st == 200 and data.get("ok") is True, f"200 ok:true (got {st}, {data})")
|
||||||
|
check(p.get("investor_name") == "Beacon Ventures" and p.get("contact_name") == "Sam Lee",
|
||||||
|
f"proposal carries firm + person (got {p})")
|
||||||
|
check(p.get("contact_email") == "sam@beacon.vc", f"email kept (got {p.get('contact_email')})")
|
||||||
|
check(p.get("mobile") == "+1 555 987 6543", f"mobile kept (got {p.get('mobile')})")
|
||||||
|
check("transcription" in data and data["match"] is None and data["candidates"] == [],
|
||||||
|
f"transcription returned, unknown firm -> no match/candidates (got {data})")
|
||||||
|
check(not any(k.startswith("_") for k in p), f"internal control keys stripped (got {list(p)})")
|
||||||
|
|
||||||
|
print("\n[email integrity: a model-minted address NOT in the card is dropped]")
|
||||||
|
_STATE["transcription"] = "Ann Roe\nDir\nOmega LP\nann@omega.fund" # the only address present
|
||||||
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Omega LP",
|
||||||
|
"contact_name": "Ann Roe", "contact_email": "evil@phish.example"}
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
p = (d or {}).get("data", {}).get("proposal", {})
|
||||||
|
check(p.get("contact_email") == "ann@omega.fund",
|
||||||
|
f"source address wins over the minted one (got {p.get('contact_email')})")
|
||||||
|
|
||||||
|
print("\n[match: exact firm name returns the grid row id]")
|
||||||
|
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital" # no email -> match on name
|
||||||
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital",
|
||||||
|
"contact_name": "Jane Doe", "contact_title": "GP"}
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
m = (d or {}).get("data", {}).get("match")
|
||||||
|
check(m and m.get("id") == "rowAcme", f"exact firm -> match rowAcme (got {m})")
|
||||||
|
|
||||||
|
print("\n[match by card email: exact contact email returns the grid row id]")
|
||||||
|
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital Group\njane@acme.com"
|
||||||
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acme Capital Group",
|
||||||
|
"contact_name": "Jane Doe"}
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
m = (d or {}).get("data", {}).get("match")
|
||||||
|
check(m and m.get("id") == "rowAcme" and m.get("matched_on") == "email",
|
||||||
|
f"card email -> exact match rowAcme on email (got {m})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: a near-spelling returns a candidate, no exact match]")
|
||||||
|
# Typo in the DISTINCTIVE token ('Acme'->'Acne') so the fuzzy matcher surfaces it; a typo
|
||||||
|
# in a generic descriptor (e.g. 'Capitol') wouldn't, since those are stripped first.
|
||||||
|
_STATE["transcription"] = "Jane Doe\nGP\nAcne Capital" # no email -> name-only fuzzy
|
||||||
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Acne Capital",
|
||||||
|
"contact_name": "Jane Doe"}
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
cids = [c["id"] for c in data.get("candidates", [])]
|
||||||
|
check(data.get("match") is None and "rowAcme" in cids,
|
||||||
|
f"near-spelling -> candidate rowAcme, no exact (got {data})")
|
||||||
|
|
||||||
|
print("\n[no firm and no person: readable but unactionable -> ok:true, no DB lookup, no 500]")
|
||||||
|
_STATE["transcription"] = "some faded scribbles, no usable fields" # >=5 chars, no email/firm
|
||||||
|
_STATE["raw"] = {"intent": "unclear"}
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
check(st == 200 and data.get("ok") is True
|
||||||
|
and data.get("match") is None and data.get("candidates") == [],
|
||||||
|
f"unclear proposal -> ok:true, no match/candidates, not 500 (got {st}, {data})")
|
||||||
|
|
||||||
|
print("\n[parse leg down: parse_message raises -> 502/vision_unavailable]")
|
||||||
|
_orig_pm = parse.parse_message
|
||||||
|
parse.parse_message = lambda *a, **k: (_ for _ in ()).throw(RuntimeError("qwen down"))
|
||||||
|
try:
|
||||||
|
_STATE["transcription"] = "Jane Doe\nGP\nAcme Capital"
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
check(st == 502 and data.get("reason") == "vision_unavailable",
|
||||||
|
f"parse error -> 502 vision_unavailable (got {st}, {data})")
|
||||||
|
finally:
|
||||||
|
parse.parse_message = _orig_pm
|
||||||
|
|
||||||
|
print("\n[unreadable: model saw no card -> ok:false/unreadable, 200]")
|
||||||
|
_STATE["transcription"] = "" # transcribe_card returns '' on the NONE sentinel
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
check(st == 200 and data.get("ok") is False and data.get("reason") == "unreadable",
|
||||||
|
f"empty transcription -> unreadable (got {st}, {data})")
|
||||||
|
|
||||||
|
print("\n[vision down: transcribe raises -> 502/vision_unavailable]")
|
||||||
|
_STATE["boom"] = True
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token, {"image_b64": _IMG})
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
check(st == 502 and data.get("reason") == "vision_unavailable",
|
||||||
|
f"spark error -> 502 vision_unavailable (got {st}, {data})")
|
||||||
|
_STATE["boom"] = False
|
||||||
|
|
||||||
|
print("\n[data-URI tolerated: a full data: prefix is stripped to raw base64]")
|
||||||
|
_STATE["transcription"] = "Sam Lee\nPartner\nBeacon Ventures"
|
||||||
|
_STATE["raw"] = {"intent": "new_investor", "investor_name": "Beacon Ventures",
|
||||||
|
"contact_name": "Sam Lee"}
|
||||||
|
st, d = _req(port, "POST", "/api/intake/card", token,
|
||||||
|
{"image_b64": "data:image/jpeg;base64," + _IMG})
|
||||||
|
check(st == 200 and (d or {}).get("data", {}).get("ok") is True,
|
||||||
|
f"data-URI accepted (got {st})")
|
||||||
|
|
||||||
|
print("\n[guard: missing image -> 400]")
|
||||||
|
st, _ = _req(port, "POST", "/api/intake/card", token, {})
|
||||||
|
check(st == 400, f"no image_b64 -> 400 (got {st})")
|
||||||
|
|
||||||
|
print("\n[guard: malformed base64 -> 400]")
|
||||||
|
st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "%%%not base64%%%"})
|
||||||
|
check(st == 400, f"invalid base64 -> 400 (got {st})")
|
||||||
|
|
||||||
|
print("\n[guard: oversized image -> 413 (size check runs before decode)]")
|
||||||
|
st, _ = _req(port, "POST", "/api/intake/card", token, {"image_b64": "A" * 12_000_001})
|
||||||
|
check(st == 413, f"over the 12 MB b64 cap -> 413 (got {st})")
|
||||||
|
|
||||||
|
print("\n[guard: unauthenticated -> 401]")
|
||||||
|
st, _ = _req(port, "POST", "/api/intake/card", None, {"image_b64": _IMG})
|
||||||
|
check(st == 401, f"no token -> 401 (got {st})")
|
||||||
|
|
||||||
|
print("\n[provenance: the approve write reuses log-communication tagged source=app_card]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/log-communication", token, {
|
||||||
|
"investor_name": "Beacon Ventures",
|
||||||
|
"contact": {"name": "Sam Lee", "email": "sam@beacon.vc", "title": "Partner"},
|
||||||
|
"create_investor_if_missing": True,
|
||||||
|
"type": "note", "subject": "", "body": "scanned business card",
|
||||||
|
"source": "app_card",
|
||||||
|
})
|
||||||
|
check(st in (200, 201), f"app_card create -> 201 (got {st})")
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
rows = c.execute("SELECT changes FROM audit_log WHERE entity_type='communication' AND action='create'").fetchall()
|
||||||
|
c.close()
|
||||||
|
sources = [json.loads(r[0]).get("source") for r in rows if r[0]]
|
||||||
|
check("app_card" in sources, f"audit carries source=app_card (got {sources})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (in-app card intake endpoint)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for the Matrix-intake CRM surface (v0.1.0 Matrix-intake M2).
|
||||||
|
|
||||||
|
The bot adds no parallel write path — it reuses /api/fundraising/log-communication and adds
|
||||||
|
one read-only lookup, GET /api/intake/match. This boots the REAL server against a temp DB and
|
||||||
|
asserts:
|
||||||
|
- match by normalized name and by contact email, returning the GRID ROW id;
|
||||||
|
- the new-vs-existing contract: a bot-style create (log-communication +
|
||||||
|
create_investor_if_missing) then matches by name — so an approved note lands on that same
|
||||||
|
investor instead of duplicating it;
|
||||||
|
- provenance: an intake-sourced communication is audited with source="matrix_intake";
|
||||||
|
- guards: missing q/email -> 400, unauthenticated -> 401.
|
||||||
|
Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_intake_endpoints.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(port, method, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
payload = None
|
||||||
|
if body is not None:
|
||||||
|
payload = json.dumps(body)
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
GRID = {
|
||||||
|
"columns": [],
|
||||||
|
"rows": [
|
||||||
|
{"id": "rowAcme", "investor_name": "Acme Capital", "notes": "",
|
||||||
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]},
|
||||||
|
{"id": "rowCharlie", "investor_name": "Charlie Brown", "notes": "",
|
||||||
|
"contacts": [{"name": "Charlie Brown", "email": "cb@brown.fund", "title": ""}]},
|
||||||
|
{"id": "rowBeta", "investor_name": "Beta Capital LLC", "notes": "",
|
||||||
|
"contacts": [{"name": "Pat Roe", "email": "pat@beta.com", "title": ""}]},
|
||||||
|
# Generic-descriptor decoys: share only "investment group" / "investments" with the
|
||||||
|
# Fortitude card below — must NOT surface as look-alikes (the 2026-06-20 false-positive fix).
|
||||||
|
{"id": "rowAether", "investor_name": "Aether Investment Group", "notes": "",
|
||||||
|
"contacts": [{"name": "Ada Ng", "email": "ada@aether.com", "title": ""}]},
|
||||||
|
{"id": "rowRussell", "investor_name": "Russell Investments", "notes": "",
|
||||||
|
"contacts": [{"name": "Russ Lee", "email": "russ@russell.com", "title": ""}]},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
# init_db doesn't create the 'main' state row (it's created lazily on first write), so
|
||||||
|
# upsert rather than UPDATE — a plain UPDATE would silently match zero rows.
|
||||||
|
c.execute("INSERT INTO fundraising_state (id, grid_json, views_json, version) "
|
||||||
|
"VALUES ('main', ?, '[]', 1) "
|
||||||
|
"ON CONFLICT(id) DO UPDATE SET grid_json = excluded.grid_json", (json.dumps(GRID),))
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
# Unit: the distinctive-token similarity edges (the all-generic fallback path the endpoint
|
||||||
|
# seed can't naturally reach — no real investor is named purely with generic descriptors).
|
||||||
|
print("\n[unit: _name_similarity distinctive-token edges]")
|
||||||
|
sim = server._name_similarity
|
||||||
|
check(sim("Fortitude Investment Group", "Aether Investment Group") < 0.62,
|
||||||
|
f"generic-only overlap stays below threshold (got {sim('Fortitude Investment Group', 'Aether Investment Group'):.2f})")
|
||||||
|
check(sim("Aether Capital", "Aether Capital Partners") == 1.0,
|
||||||
|
f"distinctive 'aether' (generic descriptors stripped) scores 1.0 (got {sim('Aether Capital', 'Aether Capital Partners'):.2f})")
|
||||||
|
# Both sides all-generic → fallback compares full tokens on BOTH sides; shared generic word
|
||||||
|
# alone must not clear the bar.
|
||||||
|
check(sim("Capital Group", "Global Capital") < 0.62,
|
||||||
|
f"all-generic both sides stays below threshold (got {sim('Capital Group', 'Global Capital'):.2f})")
|
||||||
|
check(sim("Family Office", "Family Office") == 1.0,
|
||||||
|
"identical all-generic names still score 1.0 (early-out)")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
print("\n[match: existing investor by name returns the grid row id]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital", token)
|
||||||
|
m = (d or {}).get("data", {}).get("match")
|
||||||
|
check(st == 200 and m and m["id"] == "rowAcme" and m["matched_on"] == "name",
|
||||||
|
f"name match -> rowAcme (got {st}, {m})")
|
||||||
|
|
||||||
|
print("\n[match: case-insensitive name]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=acme%20capital", token)
|
||||||
|
m = (d or {}).get("data", {}).get("match")
|
||||||
|
check(m and m["id"] == "rowAcme", f"normalized name match (got {m})")
|
||||||
|
|
||||||
|
print("\n[match: by contact email]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?email=jane@acme.com", token)
|
||||||
|
m = (d or {}).get("data", {}).get("match")
|
||||||
|
check(m and m["id"] == "rowAcme" and m["matched_on"] == "email",
|
||||||
|
f"email match -> rowAcme (got {m})")
|
||||||
|
|
||||||
|
print("\n[match: unknown -> null]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Nobody%20LP", token)
|
||||||
|
check(st == 200 and (d or {}).get("data", {}).get("match") is None,
|
||||||
|
f"no match -> null (got {st}, {d})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: exact match returns no candidates (bot auto-attaches)]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
check(st == 200 and data.get("match") and data.get("candidates") == [],
|
||||||
|
f"exact match -> match set, candidates empty (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: near-spelling surfaces a candidate (Charles Brown ~ Charlie Brown)]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Charles%20Brown", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
cids = [c["id"] for c in data.get("candidates", [])]
|
||||||
|
check(data.get("match") is None and "rowCharlie" in cids,
|
||||||
|
f"near-spelling -> candidate rowCharlie, no exact (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: legal-suffix difference surfaces a candidate (Beta Capital ~ Beta Capital LLC)]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Beta%20Capital", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
cids = [c["id"] for c in data.get("candidates", [])]
|
||||||
|
check(data.get("match") is None and "rowBeta" in cids,
|
||||||
|
f"legal-suffix -> candidate rowBeta, no exact (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: legal-suffix-only difference ranks as a top candidate (Acme Capital LLC ~ Acme Capital)]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capital%20LLC", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
top = (data.get("candidates") or [None])[0]
|
||||||
|
check(data.get("match") is None and top and top["id"] == "rowAcme" and top["score"] == 1.0,
|
||||||
|
f"legal-suffix-only -> rowAcme top candidate @1.0, no exact (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: one-character email typo surfaces a candidate by email]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?email=jhane@acme.com", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
cands = data.get("candidates", [])
|
||||||
|
hit = next((c for c in cands if c["id"] == "rowAcme"), None)
|
||||||
|
check(data.get("match") is None and hit and hit["matched_on"] == "email",
|
||||||
|
f"email typo -> candidate rowAcme matched_on email (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: two-character email typo (distance 2) still surfaces]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?email=jane@acne.con", token) # acme->acne, com->con
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
hit = next((c for c in data.get("candidates", []) if c["id"] == "rowAcme"), None)
|
||||||
|
check(data.get("match") is None and hit and hit["matched_on"] == "email" and hit["score"] == 0.8,
|
||||||
|
f"dist-2 email -> rowAcme @0.8 (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: a row matching on BOTH name and email appears once (deduped)]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Acme%20Capitol&email=jhane@acme.com", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
acme_hits = [c for c in data.get("candidates", []) if c["id"] == "rowAcme"]
|
||||||
|
check(data.get("match") is None and len(acme_hits) == 1,
|
||||||
|
f"name+email both match rowAcme -> single deduped entry (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: nothing close -> empty candidates]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Zphq%20Nobody%20LP", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
check(st == 200 and data.get("match") is None and data.get("candidates") == [],
|
||||||
|
f"unrelated query -> no match, no candidates (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: shared generic words alone do NOT surface look-alikes (Fortitude vs Aether/Russell)]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Fortitude%20Investment%20Group", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
cids = [c["id"] for c in data.get("candidates", [])]
|
||||||
|
check(data.get("match") is None and "rowAether" not in cids and "rowRussell" not in cids,
|
||||||
|
f"generic-only overlap -> no decoy candidates (got {data})")
|
||||||
|
|
||||||
|
print("\n[fuzzy: a shared DISTINCTIVE word still surfaces (Aether Capital ~ Aether Investment Group)]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Aether%20Capital", token)
|
||||||
|
data = (d or {}).get("data", {})
|
||||||
|
cids = [c["id"] for c in data.get("candidates", [])]
|
||||||
|
check(data.get("match") is None and "rowAether" in cids,
|
||||||
|
f"distinctive overlap -> rowAether candidate (got {data})")
|
||||||
|
|
||||||
|
print("\n[match: missing q and email -> 400]")
|
||||||
|
st, _ = _req(port, "GET", "/api/intake/match", token)
|
||||||
|
check(st == 400, f"no params -> 400 (got {st})")
|
||||||
|
|
||||||
|
print("\n[match: unauthenticated -> 401]")
|
||||||
|
st, _ = _req(port, "GET", "/api/intake/match?q=Acme", None)
|
||||||
|
check(st == 401, f"no token -> 401 (got {st})")
|
||||||
|
|
||||||
|
print("\n[bot create: log-communication + create_investor_if_missing, source tagged]")
|
||||||
|
st, d = _req(port, "POST", "/api/fundraising/log-communication", token, {
|
||||||
|
"investor_name": "Beacon Ventures",
|
||||||
|
"contact": {"name": "Sam Lee", "email": "sam@beacon.vc", "title": "Partner"},
|
||||||
|
"create_investor_if_missing": True,
|
||||||
|
"type": "note", "subject": "Intake (Matrix)", "body": "met at the Austin conf",
|
||||||
|
"source": "matrix_intake",
|
||||||
|
})
|
||||||
|
check(st in (200, 201), f"create new investor -> 201 (got {st})")
|
||||||
|
|
||||||
|
print("\n[new-vs-existing contract: the just-created investor now matches by name]")
|
||||||
|
st, d = _req(port, "GET", "/api/intake/match?q=Beacon%20Ventures", token)
|
||||||
|
m = (d or {}).get("data", {}).get("match")
|
||||||
|
check(m and m.get("investor_name") == "Beacon Ventures",
|
||||||
|
f"created investor is matchable (no duplicate on next note) (got {m})")
|
||||||
|
|
||||||
|
print("\n[provenance: the intake communication is audited as source=matrix_intake]")
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
rows = c.execute("SELECT changes FROM audit_log WHERE entity_type='communication' AND action='create'").fetchall()
|
||||||
|
c.close()
|
||||||
|
sources = [json.loads(r[0]).get("source") for r in rows if r[0]]
|
||||||
|
check("matrix_intake" in sources, f"audit carries source=matrix_intake (got {sources})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (matrix-intake endpoints)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Regression test for Phase 0 — the 4-stage pipeline funnel + the derived grid signals
|
||||||
|
(ROADMAP "Pipeline stages + investor flags/labels -- LOCKED SPEC", 2026-06-19).
|
||||||
|
|
||||||
|
Covers the parts the round-trip test in test_grid_pipeline_link.py only checks structurally:
|
||||||
|
|
||||||
|
* migration 0007 stage remap — outreach/meeting -> engaged, due_diligence -> diligence,
|
||||||
|
committed/funded -> commitment, and the stray 'lost' value archived (soft-deleted), with
|
||||||
|
'lead' left untouched;
|
||||||
|
* existing_investor_by_source_row — total_invested > 0 is the auto-derived "Existing Investor";
|
||||||
|
* staleness_by_source_row — last-contact age maps to '' / 'aging' (>= 30d) / 'stale' (>= 60d),
|
||||||
|
boundaries inclusive, and NO recorded activity -> '' (no false "stale" on a fresh lead).
|
||||||
|
|
||||||
|
Synthetic only (guardrail #9). Run: cd backend && python3 test_pipeline_stages_v2.py
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
_HERE = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
def _days_ago(n):
|
||||||
|
return (datetime.utcnow() - timedelta(days=n)).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
|
||||||
|
def test_migration_remap(conn):
|
||||||
|
print("\n[migration 0007: stage remap + lost archived]")
|
||||||
|
c = conn.cursor()
|
||||||
|
olds = [("m_out", "outreach"), ("m_meet", "meeting"), ("m_dd", "due_diligence"),
|
||||||
|
("m_comm", "committed"), ("m_fund", "funded"), ("m_lost", "lost"),
|
||||||
|
("m_lead", "lead")]
|
||||||
|
for oid, stage in olds:
|
||||||
|
c.execute("INSERT INTO opportunities (id, name, stage, contact_id, owner_id) VALUES (?,?,?,?,?)",
|
||||||
|
(oid, oid, stage, "c0", "u1"))
|
||||||
|
conn.commit()
|
||||||
|
# init_db already applied 0007; re-run its SQL against the rows we just inserted with
|
||||||
|
# legacy values (the UPDATEs target by old stage value, so this is exactly the remap).
|
||||||
|
with open(os.path.join(_HERE, "migrations", "0007_pipeline_stages_v2.sql")) as fh:
|
||||||
|
conn.executescript(fh.read())
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def stage_of(oid):
|
||||||
|
return c.execute("SELECT stage FROM opportunities WHERE id=?", (oid,)).fetchone()[0]
|
||||||
|
|
||||||
|
def archived(oid):
|
||||||
|
return c.execute("SELECT deleted_at FROM opportunities WHERE id=?", (oid,)).fetchone()[0] is not None
|
||||||
|
|
||||||
|
check(stage_of("m_out") == "engaged" and stage_of("m_meet") == "engaged",
|
||||||
|
"outreach + meeting -> engaged")
|
||||||
|
check(stage_of("m_dd") == "diligence", "due_diligence -> diligence")
|
||||||
|
check(stage_of("m_comm") == "commitment" and stage_of("m_fund") == "commitment",
|
||||||
|
"committed + funded -> commitment")
|
||||||
|
check(stage_of("m_lead") == "lead", "lead unchanged")
|
||||||
|
check(archived("m_lost"), "lost opp archived (deleted_at set)")
|
||||||
|
check(not archived("m_lead") and not archived("m_comm"),
|
||||||
|
"non-lost opps NOT archived by the migration")
|
||||||
|
# Cleanup so these rows don't perturb the derivation seed below.
|
||||||
|
c.execute("DELETE FROM opportunities WHERE id IN ('m_out','m_meet','m_dd','m_comm','m_fund','m_lost','m_lead')")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def _investor(conn, srid, total, contact_id=None, comm_days_ago=None):
|
||||||
|
"""Seed a grid investor (+ optional linked contact & dated communication)."""
|
||||||
|
iid = "i_" + srid
|
||||||
|
conn.execute("INSERT INTO fundraising_investors (id, investor_name, source_row_id, total_invested) "
|
||||||
|
"VALUES (?,?,?,?)", (iid, srid, srid, total))
|
||||||
|
if contact_id:
|
||||||
|
conn.execute("INSERT INTO contacts (id, first_name, last_name) VALUES (?,?,?)",
|
||||||
|
(contact_id, srid, "Person"))
|
||||||
|
conn.execute("INSERT INTO fundraising_contacts (id, investor_id, full_name, contact_id) "
|
||||||
|
"VALUES (?,?,?,?)", ("fc_" + srid, iid, srid + " Person", contact_id))
|
||||||
|
if comm_days_ago is not None:
|
||||||
|
conn.execute("INSERT INTO communications (id, contact_id, communication_date, created_by, subject) "
|
||||||
|
"VALUES (?,?,?,?,?)",
|
||||||
|
("cm_" + srid, contact_id, _days_ago(comm_days_ago), "u1", "note"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_derivations(conn):
|
||||||
|
print("\n[existing_investor + staleness derivations]")
|
||||||
|
# Existing flag: only total_invested > 0.
|
||||||
|
_investor(conn, "rowExist", 5_000_000, contact_id="c_exist", comm_days_ago=100)
|
||||||
|
_investor(conn, "rowProspect", 0)
|
||||||
|
# Staleness ramp + boundaries (>=30 aging, >=60 stale; inclusive).
|
||||||
|
_investor(conn, "rowStale", 0, contact_id="c_stale", comm_days_ago=70)
|
||||||
|
_investor(conn, "rowAging", 0, contact_id="c_aging", comm_days_ago=45)
|
||||||
|
_investor(conn, "rowFresh", 0, contact_id="c_fresh", comm_days_ago=5)
|
||||||
|
_investor(conn, "rowNoAct", 0)
|
||||||
|
_investor(conn, "rowB60", 0, contact_id="c_b60", comm_days_ago=60) # boundary -> stale
|
||||||
|
_investor(conn, "rowB59", 0, contact_id="c_b59", comm_days_ago=59) # -> aging
|
||||||
|
_investor(conn, "rowB30", 0, contact_id="c_b30", comm_days_ago=30) # boundary -> aging
|
||||||
|
_investor(conn, "rowB29", 0, contact_id="c_b29", comm_days_ago=29) # -> fresh
|
||||||
|
# Empty source_row_id with committed capital — must be EXCLUDED by the `if not srid` guard
|
||||||
|
# (would otherwise key the injection under '' and clobber a real row).
|
||||||
|
_investor(conn, "", 9_999, contact_id="c_empty", comm_days_ago=100)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
existing = server.existing_investor_by_source_row(conn)
|
||||||
|
check(existing == {"rowExist"},
|
||||||
|
f"existing_investor = total_invested>0 with a non-empty source_row_id only (got {sorted(existing)})")
|
||||||
|
|
||||||
|
st = server.staleness_by_source_row(conn)
|
||||||
|
level = lambda srid: st.get(srid, (None, "MISSING"))[1]
|
||||||
|
check(level("rowStale") == "stale", f"70d -> stale (got {level('rowStale')})")
|
||||||
|
check(level("rowAging") == "aging", f"45d -> aging (got {level('rowAging')})")
|
||||||
|
check(level("rowFresh") == "", f"5d -> fresh/'' (got {level('rowFresh')!r})")
|
||||||
|
check(level("rowNoAct") == "", f"no activity -> '' (got {level('rowNoAct')!r})")
|
||||||
|
check(level("rowExist") == "stale", "existing + stale coexist (orthogonal axes)")
|
||||||
|
check(level("rowB60") == "stale" and level("rowB59") == "aging",
|
||||||
|
f"60d boundary inclusive -> stale; 59d -> aging (got {level('rowB60')}, {level('rowB59')})")
|
||||||
|
check(level("rowB30") == "aging" and level("rowB29") == "",
|
||||||
|
f"30d boundary inclusive -> aging; 29d -> '' (got {level('rowB30')}, {level('rowB29')!r})")
|
||||||
|
# last_activity_at is carried through alongside the level for the recency display.
|
||||||
|
check(st.get("rowStale", (None, ""))[0] is not None, "stale row carries a last_activity_at value")
|
||||||
|
check(st.get("rowNoAct", ("X", ""))[0] is None, "no-activity row has last_activity_at None")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
conn = server.get_db()
|
||||||
|
conn.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
conn.execute("INSERT INTO contacts (id, first_name, last_name) VALUES ('c0','Seed','Contact')")
|
||||||
|
conn.commit()
|
||||||
|
test_migration_remap(conn)
|
||||||
|
test_derivations(conn)
|
||||||
|
conn.close()
|
||||||
|
print("\n" + ("ALL PASS (pipeline stages v2)" if not FAILS else f"{len(FAILS)} FAILED"))
|
||||||
|
sys.exit(1 if FAILS else 0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,157 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Test for the admin soft-deleted purge (v0.1.0:104).
|
||||||
|
|
||||||
|
The purge is a deliberate, admin-only, type-to-confirm exception to never-hard-delete, for
|
||||||
|
clearing dummy/test data. It must be SAFE: only ever touch a soft-deleted row, and never
|
||||||
|
remove or mutate LIVE data via a cascade/SET-NULL. This boots the real server, seeds live +
|
||||||
|
soft-deleted graphs, and drives /api/admin/soft-deleted[/purge] over HTTP. Synthetic only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_purge_soft_deleted.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
DEL = "2026-06-01T00:00:00"
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _post(port, path, token, payload):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
conn.request("POST", path, body=json.dumps(payload),
|
||||||
|
headers={"Authorization": "Bearer " + token, "Content-Type": "application/json"})
|
||||||
|
resp = conn.getresponse()
|
||||||
|
body = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
try:
|
||||||
|
return resp.status, (json.loads(body) if body else None)
|
||||||
|
except ValueError:
|
||||||
|
return resp.status, None
|
||||||
|
|
||||||
|
|
||||||
|
def _get(port, path, token):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
conn.request("GET", path, headers={"Authorization": "Bearer " + token})
|
||||||
|
resp = conn.getresponse()
|
||||||
|
body = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
try:
|
||||||
|
return resp.status, (json.loads(body) if body else None)
|
||||||
|
except ValueError:
|
||||||
|
return resp.status, None
|
||||||
|
|
||||||
|
|
||||||
|
def exists(table, rid):
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
n = c.execute(f"SELECT COUNT(*) FROM {table} WHERE id = ?", (rid,)).fetchone()[0]
|
||||||
|
c.close()
|
||||||
|
return n > 0
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
# Soft-deleted contact with ONLY soft-deleted children -> purgeable; cascade should remove them.
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name,deleted_at) VALUES ('cClean','Dummy','Clean',?)", (DEL,))
|
||||||
|
c.execute("INSERT INTO opportunities (id,name,contact_id,owner_id,deleted_at) VALUES ('opC','Opp','cClean','u1',?)", (DEL,))
|
||||||
|
c.execute("INSERT INTO communications (id,contact_id,communication_date,created_by,subject,deleted_at) VALUES ('cmC','cClean','2026-05-01','u1','note',?)", (DEL,))
|
||||||
|
# A reminder pointing at the purge target (reminders.contact_id is a bare logical FK, no ON DELETE):
|
||||||
|
# the purge must NULL it, not leave it dangling and not delete the reminder.
|
||||||
|
c.execute("INSERT INTO reminders (id,contact_id,investor_id,title) VALUES ('remC','cClean','inv-x','Follow up dummy')")
|
||||||
|
# Soft-deleted contact WITH a live child -> must refuse (cascade would kill live data).
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name,deleted_at) VALUES ('cLiveKid','Has','Livekid',?)", (DEL,))
|
||||||
|
c.execute("INSERT INTO communications (id,contact_id,communication_date,created_by,subject) VALUES ('cmLive','cLiveKid','2026-05-02','u1','live note')")
|
||||||
|
# A live contact -> must refuse (not soft-deleted).
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name) VALUES ('cLive','Real','Person')")
|
||||||
|
# Soft-deleted org with no live refs -> purgeable.
|
||||||
|
c.execute("INSERT INTO organizations (id,name,deleted_at) VALUES ('orgClean','Dead Org',?)", (DEL,))
|
||||||
|
# Soft-deleted org referenced by a LIVE contact -> must refuse (SET NULL would mutate live data).
|
||||||
|
c.execute("INSERT INTO organizations (id,name,deleted_at) VALUES ('orgRef','Ref Org',?)", (DEL,))
|
||||||
|
c.execute("INSERT INTO contacts (id,first_name,last_name,organization_id) VALUES ('cRef','Org','Member','orgRef')")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
print("\n[list soft-deleted]")
|
||||||
|
st, body = _get(port, "/api/admin/soft-deleted", token)
|
||||||
|
groups = (body or {}).get("groups", {})
|
||||||
|
cids = {x["id"] for x in groups.get("contacts", [])}
|
||||||
|
oids = {x["id"] for x in groups.get("organizations", [])}
|
||||||
|
check(st == 200, f"GET soft-deleted -> 200 (got {st})")
|
||||||
|
check({"cClean", "cLiveKid"} <= cids and "cLive" not in cids, f"lists soft-deleted contacts only (got {cids})")
|
||||||
|
check({"orgClean", "orgRef"} <= oids, f"lists soft-deleted orgs (got {oids})")
|
||||||
|
check("opC" in {x["id"] for x in groups.get("opportunities", [])}, "lists the soft-deleted opportunity")
|
||||||
|
|
||||||
|
print("\n[purge guards]")
|
||||||
|
st, _ = _post(port, "/api/admin/soft-deleted/purge", token, {"table": "contacts", "id": "cLive"})
|
||||||
|
check(st == 400, f"purge a LIVE contact -> 400 (got {st})")
|
||||||
|
check(exists("contacts", "cLive"), "live contact still present after refused purge")
|
||||||
|
st, _ = _post(port, "/api/admin/soft-deleted/purge", token, {"table": "contacts", "id": "cLiveKid"})
|
||||||
|
check(st == 409, f"purge contact with a LIVE child -> 409 (got {st})")
|
||||||
|
check(exists("contacts", "cLiveKid") and exists("communications", "cmLive"), "contact + its live child preserved")
|
||||||
|
st, _ = _post(port, "/api/admin/soft-deleted/purge", token, {"table": "organizations", "id": "orgRef"})
|
||||||
|
check(st == 409, f"purge org referenced by a LIVE contact -> 409 (got {st})")
|
||||||
|
check(exists("organizations", "orgRef") and exists("contacts", "cRef"), "org + its live referencing contact preserved")
|
||||||
|
st, _ = _post(port, "/api/admin/soft-deleted/purge", token, {"table": "bogus", "id": "x"})
|
||||||
|
check(st == 400, f"unknown table -> 400 (got {st})")
|
||||||
|
st, _ = _post(port, "/api/admin/soft-deleted/purge", token, {"table": "contacts", "id": "nope"})
|
||||||
|
check(st == 404, f"missing id -> 404 (got {st})")
|
||||||
|
|
||||||
|
print("\n[purge happy path + cascade]")
|
||||||
|
st, _ = _post(port, "/api/admin/soft-deleted/purge", token, {"table": "contacts", "id": "cClean"})
|
||||||
|
check(st == 200, f"purge a clean soft-deleted contact -> 200 (got {st})")
|
||||||
|
check(not exists("contacts", "cClean"), "purged contact is gone")
|
||||||
|
check(not exists("opportunities", "opC") and not exists("communications", "cmC"),
|
||||||
|
"its soft-deleted children were cascade-removed")
|
||||||
|
_rc = sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
_rem = _rc.execute("SELECT contact_id FROM reminders WHERE id = 'remC'").fetchone()
|
||||||
|
_rc.close()
|
||||||
|
check(_rem is not None and _rem[0] is None,
|
||||||
|
"a reminder on the purged contact is KEPT but its contact_id is NULL'd (no dangling ref)")
|
||||||
|
st, _ = _post(port, "/api/admin/soft-deleted/purge", token, {"table": "organizations", "id": "orgClean"})
|
||||||
|
check(st == 200, f"purge a clean soft-deleted org -> 200 (got {st})")
|
||||||
|
check(not exists("organizations", "orgClean"), "purged org is gone")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"{len(FAILS)} FAILED")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (soft-deleted purge)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,303 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Tests for reminders / follow-ups (W1).
|
||||||
|
|
||||||
|
Boots the REAL server against a temp DB and exercises the new endpoints end-to-end:
|
||||||
|
- POST /api/reminders creates an open reminder tied to a grid investor (denormalized
|
||||||
|
investor_name resolved from the grid), or a standalone task (no investor_id);
|
||||||
|
- GET /api/reminders lists + filters by status (active/open/done/...), overdue, investor_id,
|
||||||
|
assignee=me; every read is soft-delete filtered;
|
||||||
|
- PATCH completes (stamps completed_at) / snoozes / edits a reminder; status is validated;
|
||||||
|
- DELETE soft-deletes (gone from every list, never hard-deleted);
|
||||||
|
- GET /api/fundraising/state injects a read-only reminder_status (overdue/due_soon/open/'')
|
||||||
|
derived live from open reminders, and strips it on save (never persisted to the blob);
|
||||||
|
- deleting an investor from the grid cancels its orphaned reminders (reconcile twin),
|
||||||
|
while a standalone reminder is untouched;
|
||||||
|
- the usual guards (missing title -> 400, bad status -> 400, unknown investor -> 404,
|
||||||
|
unauthenticated -> 401).
|
||||||
|
Synthetic data only.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_reminders.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_DATA = tempfile.mkdtemp()
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
TODAY = datetime.utcnow().date()
|
||||||
|
TOMORROW = (TODAY + timedelta(days=1)).isoformat()
|
||||||
|
YESTERDAY = (TODAY - timedelta(days=1)).isoformat()
|
||||||
|
FAR = (TODAY + timedelta(days=30)).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _req(port, method, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
payload = None
|
||||||
|
if body is not None:
|
||||||
|
payload = json.dumps(body)
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
conn.request(method, path, body=payload, headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
data = None
|
||||||
|
if raw:
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return resp.status, data
|
||||||
|
|
||||||
|
|
||||||
|
def _put_grid(port, token, rows):
|
||||||
|
return _req(port, "PUT", "/api/fundraising/state", token,
|
||||||
|
{"grid": {"columns": [], "rows": rows}, "views": []})
|
||||||
|
|
||||||
|
|
||||||
|
ROW_ACME = {"id": "rowAcme", "investor_name": "Acme Capital", "notes": "", "lead": "Grant",
|
||||||
|
"contacts": [{"name": "Jane Doe", "email": "jane@acme.com", "title": "GP"}]}
|
||||||
|
ROW_BETA = {"id": "rowBeta", "investor_name": "Beta Capital", "notes": "",
|
||||||
|
"contacts": [{"name": "Pat Roe", "email": "pat@beta.com"}]}
|
||||||
|
ROW_GAMMA = {"id": "rowGamma", "investor_name": "Gamma Partners", "notes": "",
|
||||||
|
"contacts": [{"name": "Sam Lee", "email": "sam@gamma.com"}]}
|
||||||
|
|
||||||
|
|
||||||
|
def _db():
|
||||||
|
return sqlite3.connect(os.environ["CRM_DB_PATH"])
|
||||||
|
|
||||||
|
|
||||||
|
def seed():
|
||||||
|
c = _db()
|
||||||
|
c.execute("INSERT INTO users (id,username,email,password_hash,full_name,role,is_active) "
|
||||||
|
"VALUES ('u1','grant','grant@ten31.example','x','Grant','admin',1)")
|
||||||
|
c.commit()
|
||||||
|
c.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _investor_id(source_row_id):
|
||||||
|
c = _db()
|
||||||
|
r = c.execute("SELECT id FROM fundraising_investors WHERE source_row_id = ?", (source_row_id,)).fetchone()
|
||||||
|
c.close()
|
||||||
|
return r[0] if r else None
|
||||||
|
|
||||||
|
|
||||||
|
def _grid_reminder_status(port, token):
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
rows = (d or {}).get("data", {}).get("grid", {}).get("rows", [])
|
||||||
|
return {r["id"]: r.get("reminder_status") for r in rows}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
seed()
|
||||||
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
st, _ = _put_grid(port, token, [ROW_ACME, ROW_BETA, ROW_GAMMA])
|
||||||
|
check(st == 200, f"seed grid via PUT /state (got {st})")
|
||||||
|
acme_id = _investor_id("rowAcme")
|
||||||
|
beta_id = _investor_id("rowBeta")
|
||||||
|
gamma_id = _investor_id("rowGamma")
|
||||||
|
check(bool(acme_id and beta_id and gamma_id), "investor ids resolved from the grid")
|
||||||
|
|
||||||
|
# ── create: investor-linked + denormalized name resolved from the grid ──
|
||||||
|
print("\n[create: investor-linked reminder resolves the denormalized name]")
|
||||||
|
st, d = _req(port, "POST", "/api/reminders", token,
|
||||||
|
{"investor_id": acme_id, "title": "Send Fund III deck", "due_date": TOMORROW})
|
||||||
|
rem = (d or {}).get("data") or {}
|
||||||
|
acme_rem_id = rem.get("id")
|
||||||
|
check(st == 201 and rem.get("status") == "open", f"create -> 201 open (got {st}, {d})")
|
||||||
|
check(rem.get("investor_name") == "Acme Capital", f"name denormalized from grid (got {rem.get('investor_name')})")
|
||||||
|
|
||||||
|
# overdue reminder on Beta; far-future + standalone for filter coverage
|
||||||
|
st, d = _req(port, "POST", "/api/reminders", token,
|
||||||
|
{"investor_id": beta_id, "title": "Call Pat", "due_date": YESTERDAY})
|
||||||
|
beta_rem_id = (d or {}).get("data", {}).get("id")
|
||||||
|
check(st == 201, f"create overdue beta reminder (got {st})")
|
||||||
|
st, d = _req(port, "POST", "/api/reminders", token,
|
||||||
|
{"investor_id": gamma_id, "title": "Quarterly touch", "due_date": FAR})
|
||||||
|
gamma_rem_id = (d or {}).get("data", {}).get("id")
|
||||||
|
st, d = _req(port, "POST", "/api/reminders", token, {"title": "Team: refresh pipeline view"})
|
||||||
|
standalone_id = (d or {}).get("data", {}).get("id")
|
||||||
|
check(st == 201 and (d or {}).get("data", {}).get("investor_id") in (None, ""),
|
||||||
|
f"standalone reminder (no investor) created (got {st})")
|
||||||
|
|
||||||
|
# ── list + filters ──
|
||||||
|
print("\n[list + filters]")
|
||||||
|
st, d = _req(port, "GET", "/api/reminders", token)
|
||||||
|
items = (d or {}).get("data", [])
|
||||||
|
check(st == 200 and len(items) == 4, f"list returns all 4 open (got {st}, {len(items)})")
|
||||||
|
check(all("last_activity_at" in it for it in items), "each row carries last_activity_at")
|
||||||
|
# 8h deep-link: each row injects source_row_id (joined via investor_id) — the grid row id
|
||||||
|
# the mobile Reminders edit sheet uses for "Open investor in Grid"; null for a team task.
|
||||||
|
by_id = {it.get("id"): it for it in items}
|
||||||
|
check(by_id.get(acme_rem_id, {}).get("source_row_id") == "rowAcme",
|
||||||
|
f"investor reminder injects source_row_id == 'rowAcme' (got {by_id.get(acme_rem_id, {}).get('source_row_id')!r})")
|
||||||
|
check(by_id.get(standalone_id, {}).get("source_row_id") in (None, ""),
|
||||||
|
f"team task has null source_row_id -> Open-in-Grid hidden (got {by_id.get(standalone_id, {}).get('source_row_id')!r})")
|
||||||
|
# dated reminders sort before undated, soonest first -> YESTERDAY (beta) leads
|
||||||
|
check(items and items[0].get("id") == beta_rem_id, f"overdue sorts first (got {items[0].get('id') if items else None})")
|
||||||
|
|
||||||
|
st, d = _req(port, "GET", "/api/reminders?overdue=1", token)
|
||||||
|
ids = [it["id"] for it in (d or {}).get("data", [])]
|
||||||
|
check(ids == [beta_rem_id], f"overdue=1 -> only the past-due one (got {ids})")
|
||||||
|
# overdue owns the status constraint: a conflicting status= must not silently zero out
|
||||||
|
st, d = _req(port, "GET", "/api/reminders?overdue=1&status=done", token)
|
||||||
|
ids = [it["id"] for it in (d or {}).get("data", [])]
|
||||||
|
check(ids == [beta_rem_id], f"overdue wins over a conflicting status= (got {ids})")
|
||||||
|
|
||||||
|
st, d = _req(port, "GET", f"/api/reminders?investor_id={acme_id}", token)
|
||||||
|
ids = [it["id"] for it in (d or {}).get("data", [])]
|
||||||
|
check(ids == [acme_rem_id], f"investor_id filter (got {ids})")
|
||||||
|
|
||||||
|
st, d = _req(port, "GET", "/api/reminders?assignee=me", token)
|
||||||
|
check(len(d.get("data", [])) == 0, "assignee=me -> none (reminders created unassigned)")
|
||||||
|
|
||||||
|
# ── grid injection: reminder_status derived live, never persisted ──
|
||||||
|
print("\n[grid injection: read-only reminder_status]")
|
||||||
|
s = _grid_reminder_status(port, token)
|
||||||
|
check(s.get("rowAcme") == "due_soon", f"due-tomorrow -> due_soon (got {s.get('rowAcme')})")
|
||||||
|
check(s.get("rowBeta") == "overdue", f"past-due -> overdue (got {s.get('rowBeta')})")
|
||||||
|
check(s.get("rowGamma") == "open", f"far-future -> open (got {s.get('rowGamma')})")
|
||||||
|
# echo the injected value back on save; it must NOT persist into the blob
|
||||||
|
st, d = _req(port, "GET", "/api/fundraising/state", token)
|
||||||
|
echoed = (d or {}).get("data", {}).get("grid", {}).get("rows", [])
|
||||||
|
st, _ = _put_grid(port, token, echoed)
|
||||||
|
c = _db()
|
||||||
|
blob = json.loads(c.execute("SELECT grid_json FROM fundraising_state WHERE id='main'").fetchone()[0])
|
||||||
|
c.close()
|
||||||
|
acme_stored = {r["id"]: r for r in blob.get("rows", [])}.get("rowAcme", {})
|
||||||
|
check("reminder_status" not in acme_stored, "reminder_status not persisted into the grid blob")
|
||||||
|
|
||||||
|
# ── complete / reopen stamps completed_at ──
|
||||||
|
print("\n[complete + reopen]")
|
||||||
|
st, d = _req(port, "PATCH", f"/api/reminders/{acme_rem_id}", token, {"status": "done"})
|
||||||
|
check(st == 200 and (d or {}).get("data", {}).get("status") == "done"
|
||||||
|
and (d or {}).get("data", {}).get("completed_at"), f"done stamps completed_at (got {d})")
|
||||||
|
st, d = _req(port, "GET", "/api/reminders?status=open", token)
|
||||||
|
check(acme_rem_id not in [it["id"] for it in (d or {}).get("data", [])], "done reminder drops from status=open")
|
||||||
|
st, d = _req(port, "GET", "/api/reminders?status=done", token)
|
||||||
|
check(acme_rem_id in [it["id"] for it in (d or {}).get("data", [])], "done reminder shows under status=done")
|
||||||
|
check(_grid_reminder_status(port, token).get("rowAcme") in (None, ""), "completed reminder clears the grid chip")
|
||||||
|
st, d = _req(port, "PATCH", f"/api/reminders/{acme_rem_id}", token, {"status": "open"})
|
||||||
|
check((d or {}).get("data", {}).get("completed_at") in (None, ""), "reopen clears completed_at")
|
||||||
|
|
||||||
|
# ── snooze: out of 'open', still in 'active' ──
|
||||||
|
print("\n[snooze]")
|
||||||
|
st, d = _req(port, "PATCH", f"/api/reminders/{beta_rem_id}", token,
|
||||||
|
{"status": "snoozed", "snoozed_until": FAR})
|
||||||
|
check(st == 200 and (d or {}).get("data", {}).get("status") == "snoozed", f"snooze (got {st})")
|
||||||
|
st, d = _req(port, "GET", "/api/reminders?status=open", token)
|
||||||
|
check(beta_rem_id not in [it["id"] for it in (d or {}).get("data", [])], "snoozed drops from status=open")
|
||||||
|
st, d = _req(port, "GET", "/api/reminders?status=active", token)
|
||||||
|
check(beta_rem_id in [it["id"] for it in (d or {}).get("data", [])], "snoozed stays in status=active")
|
||||||
|
|
||||||
|
# ── edit title + due date ──
|
||||||
|
print("\n[edit]")
|
||||||
|
st, d = _req(port, "PATCH", f"/api/reminders/{gamma_rem_id}", token,
|
||||||
|
{"title": "Quarterly touch — Q3", "due_date": TOMORROW})
|
||||||
|
check((d or {}).get("data", {}).get("title") == "Quarterly touch — Q3"
|
||||||
|
and (d or {}).get("data", {}).get("due_date") == TOMORROW, f"title+due edited (got {d})")
|
||||||
|
|
||||||
|
# ── soft-delete: gone from every list, tombstoned not hard-deleted ──
|
||||||
|
print("\n[soft-delete]")
|
||||||
|
st, d = _req(port, "DELETE", f"/api/reminders/{standalone_id}", token)
|
||||||
|
check(st == 200 and (d or {}).get("data", {}).get("deleted") is True, f"delete -> 200 (got {st})")
|
||||||
|
st, d = _req(port, "GET", "/api/reminders", token)
|
||||||
|
check(standalone_id not in [it["id"] for it in (d or {}).get("data", [])], "deleted reminder hidden from list")
|
||||||
|
c = _db()
|
||||||
|
gone = c.execute("SELECT deleted_at FROM reminders WHERE id = ?", (standalone_id,)).fetchone()[0]
|
||||||
|
c.close()
|
||||||
|
check(gone is not None, "deleted reminder tombstoned (deleted_at set), not hard-deleted")
|
||||||
|
|
||||||
|
# ── orphan reconcile: drop the investor from the grid -> its reminders cancelled ──
|
||||||
|
print("\n[orphan reconcile: deleting the grid investor cancels its reminders]")
|
||||||
|
# create a fresh standalone task to confirm it is NOT cancelled by the reconciler
|
||||||
|
st, d = _req(port, "POST", "/api/reminders", token, {"title": "Standalone keeps living"})
|
||||||
|
keep_id = (d or {}).get("data", {}).get("id")
|
||||||
|
st, _ = _put_grid(port, token, [ROW_ACME, ROW_BETA]) # drop rowGamma
|
||||||
|
c = _db()
|
||||||
|
gamma_status = c.execute("SELECT status FROM reminders WHERE id = ?", (gamma_rem_id,)).fetchone()[0]
|
||||||
|
keep_status = c.execute("SELECT status FROM reminders WHERE id = ?", (keep_id,)).fetchone()[0]
|
||||||
|
gamma_name = c.execute("SELECT investor_name FROM reminders WHERE id = ?", (gamma_rem_id,)).fetchone()[0]
|
||||||
|
c.close()
|
||||||
|
check(gamma_status == "cancelled", f"orphaned investor's reminder cancelled (got {gamma_status})")
|
||||||
|
check(gamma_name == "Gamma Partners", "cancelled reminder keeps denormalized investor_name for history")
|
||||||
|
check(keep_status == "open", f"standalone reminder untouched by reconcile (got {keep_status})")
|
||||||
|
|
||||||
|
# ── recency rollup: a tombstoned email sighting must not inflate last_activity_at ──
|
||||||
|
print("\n[recency: soft-deleted email sighting excluded from last_activity_at]")
|
||||||
|
c = _db()
|
||||||
|
c.execute("INSERT INTO emails (id, rfc_message_id, from_email, sent_at, is_matched, match_status) "
|
||||||
|
"VALUES ('em1','<em1@x>','lp@acme.com','2026-06-10T00:00:00Z',1,'matched')")
|
||||||
|
c.execute("INSERT INTO email_investor_links (id, email_id, fundraising_investor_id, match_kind, match_confidence, matched_address) "
|
||||||
|
"VALUES ('eil1','em1',?, 'exact_email',1.0,'lp@acme.com')", (acme_id,))
|
||||||
|
c.execute("INSERT INTO email_account_messages (id, email_id, account_id, gmail_message_id, gmail_thread_id, deleted_at) "
|
||||||
|
"VALUES ('eam1','em1','acct1','g1','t1','2026-06-11T00:00:00Z')") # tombstoned sighting only
|
||||||
|
c.commit(); c.close()
|
||||||
|
st, d = _req(port, "GET", f"/api/reminders?investor_id={acme_id}", token)
|
||||||
|
items = (d or {}).get("data", [])
|
||||||
|
la = items[0].get("last_activity_at") if items else "MISSING"
|
||||||
|
check(bool(items) and la is None, f"tombstoned-only email -> no last_activity (got {la})")
|
||||||
|
c = _db()
|
||||||
|
c.execute("INSERT INTO email_account_messages (id, email_id, account_id, gmail_message_id, gmail_thread_id, deleted_at) "
|
||||||
|
"VALUES ('eam2','em1','acct2','g2','t2',NULL)")
|
||||||
|
c.commit(); c.close()
|
||||||
|
st, d = _req(port, "GET", f"/api/reminders?investor_id={acme_id}", token)
|
||||||
|
items = (d or {}).get("data", [])
|
||||||
|
la = items[0].get("last_activity_at") if items else "MISSING"
|
||||||
|
check(bool(items) and la == '2026-06-10T00:00:00Z', f"live sighting -> last_activity set (got {la})")
|
||||||
|
|
||||||
|
# ── guards ──
|
||||||
|
print("\n[guards]")
|
||||||
|
st, _ = _req(port, "POST", "/api/reminders", token, {"investor_id": acme_id})
|
||||||
|
check(st == 400, f"missing title -> 400 (got {st})")
|
||||||
|
st, _ = _req(port, "POST", "/api/reminders", token, {"title": "x", "investor_id": "nope"})
|
||||||
|
check(st == 404, f"unknown investor_id -> 404 (got {st})")
|
||||||
|
st, _ = _req(port, "PATCH", f"/api/reminders/{gamma_rem_id}", token, {"status": "bogus"})
|
||||||
|
check(st == 400, f"invalid status -> 400 (got {st})")
|
||||||
|
st, _ = _req(port, "DELETE", "/api/reminders/doesnotexist", token)
|
||||||
|
check(st == 404, f"delete unknown -> 404 (got {st})")
|
||||||
|
st, _ = _req(port, "GET", "/api/reminders", None)
|
||||||
|
check(st == 401, f"unauthenticated list -> 401 (got {st})")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print("\n" + ("ALL PASS" if not FAILS else f"{len(FAILS)} FAILURE(S):"))
|
||||||
|
for f in FAILS:
|
||||||
|
print(" - " + f)
|
||||||
|
sys.exit(1 if FAILS else 0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Endpoint tests for POST /api/admin/digest/test-email (handle_admin_send_test_email).
|
||||||
|
|
||||||
|
Boots the REAL server in-process against a throwaway DB, monkeypatches the SMTP
|
||||||
|
sender (no network), and proves the security-relevant contract:
|
||||||
|
* admin-gated: 401 without a token, 403 for a non-admin.
|
||||||
|
* recipients are restricted to the active-admin set — an arbitrary `to` is
|
||||||
|
rejected (the endpoint is NOT an open relay), an admin `to` is allowed,
|
||||||
|
and the default audience is every active admin.
|
||||||
|
* a send failure does not echo the exception (which could carry the SMTP
|
||||||
|
credential) back to the caller.
|
||||||
|
|
||||||
|
Run: cd backend && python3 test_smtp_endpoint.py
|
||||||
|
"""
|
||||||
|
import http.client
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
from http.server import ThreadingHTTPServer
|
||||||
|
|
||||||
|
_BASE = tempfile.mkdtemp()
|
||||||
|
_FRONTEND = os.path.join(_BASE, "frontend")
|
||||||
|
os.makedirs(os.path.join(_FRONTEND, "assets"))
|
||||||
|
_DATA = os.path.join(_BASE, "data")
|
||||||
|
os.makedirs(_DATA)
|
||||||
|
with open(os.path.join(_FRONTEND, "index.html"), "w") as f:
|
||||||
|
f.write("<!doctype html><title>crm</title>")
|
||||||
|
os.environ["CRM_FRONTEND_DIR"] = _FRONTEND
|
||||||
|
os.environ["CRM_DATA_DIR"] = _DATA
|
||||||
|
os.environ["CRM_DB_PATH"] = os.path.join(_DATA, "crm.db")
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
import server # noqa: E402
|
||||||
|
import smtp_send # noqa: E402
|
||||||
|
|
||||||
|
FAILS = []
|
||||||
|
|
||||||
|
# ── fake sender: record calls, optionally raise (no network) ──
|
||||||
|
SENT = []
|
||||||
|
RAISE = {"exc": None}
|
||||||
|
|
||||||
|
|
||||||
|
def fake_send(to_addrs, subject, body, **kw):
|
||||||
|
if RAISE["exc"] is not None:
|
||||||
|
raise RAISE["exc"]
|
||||||
|
SENT.append({"to": list(to_addrs), "subject": subject})
|
||||||
|
return {"sent_to": list(to_addrs), "from": "digest@ten31.test"}
|
||||||
|
|
||||||
|
|
||||||
|
smtp_send.send_email = fake_send
|
||||||
|
smtp_send.smtp_configured = lambda: True
|
||||||
|
|
||||||
|
|
||||||
|
def check(cond, msg):
|
||||||
|
print((" PASS " if cond else " FAIL ") + msg)
|
||||||
|
if not cond:
|
||||||
|
FAILS.append(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class _Quiet(server.CRMHandler):
|
||||||
|
def log_message(self, *a):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _post(port, path, token=None, body=None):
|
||||||
|
conn = http.client.HTTPConnection("127.0.0.1", port, timeout=10)
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if token:
|
||||||
|
headers["Authorization"] = "Bearer " + token
|
||||||
|
conn.request("POST", path, body=json.dumps(body or {}), headers=headers)
|
||||||
|
resp = conn.getresponse()
|
||||||
|
raw = resp.read().decode("utf-8", "replace")
|
||||||
|
conn.close()
|
||||||
|
try:
|
||||||
|
parsed = json.loads(raw)
|
||||||
|
except Exception:
|
||||||
|
parsed = None
|
||||||
|
return resp.status, raw, parsed
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
server.init_db()
|
||||||
|
|
||||||
|
# Admin = first registered user. A member is inserted directly (self-register
|
||||||
|
# is disabled after the first user) so get_user resolves a real active row.
|
||||||
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
|
port = httpd.server_address[1]
|
||||||
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
try:
|
||||||
|
st, raw, j = _post(port, "/api/auth/register", body={
|
||||||
|
"username": "admin", "email": "admin@ten31.test",
|
||||||
|
"password": "password123", "full_name": "Admin User"})
|
||||||
|
check(st == 201 and j and j.get("token"), f"register first user as admin (got {st})")
|
||||||
|
admin_token = j["token"]
|
||||||
|
admin_email = j["user"]["email"]
|
||||||
|
|
||||||
|
conn = server.get_db()
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO users (id, username, email, password_hash, full_name, role, is_active) "
|
||||||
|
"VALUES (?,?,?,?,?,?,1)",
|
||||||
|
("member-1", "member1", "member1@ten31.test",
|
||||||
|
server.hash_password("password123"), "Member One", "member"))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
member_token = server.create_token("member-1", "member1", "member")
|
||||||
|
|
||||||
|
path = "/api/admin/digest/test-email"
|
||||||
|
|
||||||
|
# 1. unauthenticated -> 401, sender untouched
|
||||||
|
SENT.clear()
|
||||||
|
st, raw, j = _post(port, path)
|
||||||
|
check(st == 401, f"no token -> 401 (got {st})")
|
||||||
|
check(not SENT, "no token: sender not called")
|
||||||
|
|
||||||
|
# 2. non-admin -> 403
|
||||||
|
SENT.clear()
|
||||||
|
st, raw, j = _post(port, path, token=member_token)
|
||||||
|
check(st == 403, f"member -> 403 (got {st})")
|
||||||
|
check(not SENT, "member: sender not called")
|
||||||
|
|
||||||
|
# 3. admin, no `to` -> 200, default audience = the admin set
|
||||||
|
SENT.clear()
|
||||||
|
st, raw, j = _post(port, path, token=admin_token)
|
||||||
|
check(st == 200, f"admin default -> 200 (got {st})")
|
||||||
|
check(len(SENT) == 1 and SENT[0]["to"] == [admin_email],
|
||||||
|
f"default recipients = active admins ({admin_email}); got {SENT}")
|
||||||
|
|
||||||
|
# 4. admin, arbitrary outside `to` -> 400, NOT an open relay
|
||||||
|
SENT.clear()
|
||||||
|
st, raw, j = _post(port, path, token=admin_token, body={"to": "attacker@evil.com"})
|
||||||
|
check(st == 400, f"outside `to` -> 400 (got {st})")
|
||||||
|
check(not SENT, "outside `to`: sender not called (no relay)")
|
||||||
|
|
||||||
|
# 5. admin, `to` an admin address (case-insensitive) -> 200
|
||||||
|
SENT.clear()
|
||||||
|
st, raw, j = _post(port, path, token=admin_token, body={"to": admin_email.upper()})
|
||||||
|
check(st == 200, f"admin `to` -> 200 (got {st})")
|
||||||
|
check(len(SENT) == 1 and SENT[0]["to"] == [admin_email.upper()],
|
||||||
|
"admin `to` is delivered as given")
|
||||||
|
|
||||||
|
# 6. mixed list with one outside address -> 400, all rejected
|
||||||
|
SENT.clear()
|
||||||
|
st, raw, j = _post(port, path, token=admin_token,
|
||||||
|
body={"to": [admin_email, "outsider@evil.com"]})
|
||||||
|
check(st == 400, f"mixed list with outsider -> 400 (got {st})")
|
||||||
|
check(not SENT, "mixed list: sender not called")
|
||||||
|
|
||||||
|
# 7. send failure must NOT leak the exception text (could carry the credential)
|
||||||
|
SENT.clear()
|
||||||
|
secret = "PWLEAK_SsHh99"
|
||||||
|
RAISE["exc"] = Exception(f"535 auth failed for user with password {secret}")
|
||||||
|
st, raw, j = _post(port, path, token=admin_token)
|
||||||
|
RAISE["exc"] = None
|
||||||
|
check(st == 502, f"send failure -> 502 (got {st})")
|
||||||
|
check(secret not in raw, "send-failure response does not leak the exception/credential text")
|
||||||
|
finally:
|
||||||
|
httpd.shutdown()
|
||||||
|
|
||||||
|
print()
|
||||||
|
if FAILS:
|
||||||
|
print(f"FAILED ({len(FAILS)}):")
|
||||||
|
for f in FAILS:
|
||||||
|
print(f" - {f}")
|
||||||
|
sys.exit(1)
|
||||||
|
print("ALL PASS (digest test-email endpoint)")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user