diff --git a/backend/matrix_intake/bot.py b/backend/matrix_intake/bot.py index 9699de1..54f63ce 100644 --- a/backend/matrix_intake/bot.py +++ b/backend/matrix_intake/bot.py @@ -254,10 +254,14 @@ async def main(): for ev in chunk: rel = ((getattr(ev, "source", None) or {}).get("content", {}) or {}).get("m.relates_to") or {} in_reply = (rel.get("m.in_reply_to") or {}).get("event_id") - # A thread child carries event_id==root; the un-threaded nudge carries only - # m.in_reply_to.event_id==root. Catch both so the thread AND its main-timeline - # pointer clear together. - if rel.get("event_id") == root or in_reply == root: + # A thread child carries rel_type=m.thread + event_id==root (the cards/acks + + # the human's yes/no replies — any sender). The un-threaded nudge is the BOT's + # own plain reply to root (only m.in_reply_to==root, no rel_type); gate that + # clause to our sender so we don't also redact an unrelated human plain-reply + # to the same root (root itself is already redacted above). + is_thread_child = rel.get("rel_type") == "m.thread" and rel.get("event_id") == root + is_own_nudge = in_reply == root and getattr(ev, "sender", None) == mx["user_id"] + if is_thread_child or is_own_nudge: await redact_card(room_id, ev.event_id) token = getattr(resp, "end", None) scanned += len(chunk) diff --git a/backend/test_grid_email_heal.py b/backend/test_grid_email_heal.py index a59923a..5e952fb 100644 --- a/backend/test_grid_email_heal.py +++ b/backend/test_grid_email_heal.py @@ -61,6 +61,8 @@ GRID = { {"name": "Jose Briones", "email": "", "title": ""}]}, {"id": "rowA", "investor_name": "Acme Capital", "notes": "", "contacts": [{"name": "Jane Doe", "email": "keep@acme.com", "title": ""}]}, + {"id": "rowO", "investor_name": "Orphan LP", "notes": "", + "contacts": [{"name": "No Link", "email": "", "title": ""}]}, ], } @@ -79,11 +81,13 @@ def seed(): "('c-jane','Jane','Doe','other@acme.com')") # differs from the blob's keep@acme.com # Relational mirror (what sync_fundraising_relational would build): blank fc.email, linked contact_id. c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested) VALUES " - "('inv-w','Wyoming','rowW',0),('inv-a','Acme Capital','rowA',0)") + "('inv-w','Wyoming','rowW',0),('inv-a','Acme Capital','rowA',0),('inv-o','Orphan LP','rowO',0)") + # fc-orphan has contact_id NULL (pre-0004 orphan) and blank email — nothing to heal from. c.execute("INSERT INTO fundraising_contacts (id,investor_id,full_name,email,sort_order,contact_id) VALUES " "('fc-phil','inv-w','Philip Treick','',0,'c-phil')," "('fc-jose','inv-w','Jose Briones','',1,'c-jose')," - "('fc-jane','inv-a','Jane Doe','',0,'c-jane')") + "('fc-jane','inv-a','Jane Doe','',0,'c-jane')," + "('fc-orphan','inv-o','No Link','',0,NULL)") c.commit() c.close() @@ -119,6 +123,12 @@ def main(): jane = next((c for c in ac if c.get("name") == "Jane Doe"), {}) check(jane.get("email") == "keep@acme.com", f"Jane pill keeps its blob email, not the contact's (got {jane.get('email')!r})") + + print("\n[heal: a pill whose fundraising_contacts row has contact_id NULL stays blank (orphan)]") + o = by_id.get("rowO", {}) + orphan = next((c for c in o.get("contacts", []) if c.get("name") == "No Link"), {}) + check(orphan.get("email", "") == "", + f"orphan pill (no contact_id, no email source) stays blank (got {orphan.get('email')!r})") finally: httpd.shutdown() diff --git a/backend/test_intake_endpoints.py b/backend/test_intake_endpoints.py index 8d0bcf6..c07a1eb 100644 --- a/backend/test_intake_endpoints.py +++ b/backend/test_intake_endpoints.py @@ -103,6 +103,21 @@ def main(): seed() token = server.create_token("u1", "grant", "admin") + # Unit: the distinctive-token similarity edges (the all-generic fallback path the endpoint + # seed can't naturally reach — no real investor is named purely with generic descriptors). + print("\n[unit: _name_similarity distinctive-token edges]") + sim = server._name_similarity + check(sim("Fortitude Investment Group", "Aether Investment Group") < 0.62, + f"generic-only overlap stays below threshold (got {sim('Fortitude Investment Group', 'Aether Investment Group'):.2f})") + check(sim("Aether Capital", "Aether Capital Partners") == 1.0, + f"distinctive 'aether' (generic descriptors stripped) scores 1.0 (got {sim('Aether Capital', 'Aether Capital Partners'):.2f})") + # Both sides all-generic → fallback compares full tokens on BOTH sides; shared generic word + # alone must not clear the bar. + check(sim("Capital Group", "Global Capital") < 0.62, + f"all-generic both sides stays below threshold (got {sim('Capital Group', 'Global Capital'):.2f})") + check(sim("Family Office", "Family Office") == 1.0, + "identical all-generic names still score 1.0 (early-out)") + httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet) port = httpd.server_address[1] threading.Thread(target=httpd.serve_forever, daemon=True).start()