Review fixes: narrow intake redact predicate to the bot's own nudge + edge tests

reviewer agent flagged the broadened redact_thread predicate (event_id OR in_reply==root)
as over-matching any plain reply to a thread root. Gate the bare-in_reply clause to the bot's
own sender (the nudge is always ours); thread children (cards/acks/human yes-no) still match by
rel_type=m.thread. Add unit edges for _name_similarity's all-generic fallback and a contact_id
NULL orphan case for the grid-blob email heal.
This commit is contained in:
Keysat
2026-06-20 13:05:13 -05:00
parent 8c9b8b8cc1
commit acd316ead4
3 changed files with 35 additions and 6 deletions
+8 -4
View File
@@ -254,10 +254,14 @@ async def main():
for ev in chunk:
rel = ((getattr(ev, "source", None) or {}).get("content", {}) or {}).get("m.relates_to") or {}
in_reply = (rel.get("m.in_reply_to") or {}).get("event_id")
# A thread child carries event_id==root; the un-threaded nudge carries only
# m.in_reply_to.event_id==root. Catch both so the thread AND its main-timeline
# pointer clear together.
if rel.get("event_id") == root or in_reply == root:
# A thread child carries rel_type=m.thread + event_id==root (the cards/acks +
# the human's yes/no replies — any sender). The un-threaded nudge is the BOT's
# own plain reply to root (only m.in_reply_to==root, no rel_type); gate that
# clause to our sender so we don't also redact an unrelated human plain-reply
# to the same root (root itself is already redacted above).
is_thread_child = rel.get("rel_type") == "m.thread" and rel.get("event_id") == root
is_own_nudge = in_reply == root and getattr(ev, "sender", None) == mx["user_id"]
if is_thread_child or is_own_nudge:
await redact_card(room_id, ev.event_id)
token = getattr(resp, "end", None)
scanned += len(chunk)
+12 -2
View File
@@ -61,6 +61,8 @@ GRID = {
{"name": "Jose Briones", "email": "", "title": ""}]},
{"id": "rowA", "investor_name": "Acme Capital", "notes": "",
"contacts": [{"name": "Jane Doe", "email": "keep@acme.com", "title": ""}]},
{"id": "rowO", "investor_name": "Orphan LP", "notes": "",
"contacts": [{"name": "No Link", "email": "", "title": ""}]},
],
}
@@ -79,11 +81,13 @@ def seed():
"('c-jane','Jane','Doe','other@acme.com')") # differs from the blob's keep@acme.com
# Relational mirror (what sync_fundraising_relational would build): blank fc.email, linked contact_id.
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested) VALUES "
"('inv-w','Wyoming','rowW',0),('inv-a','Acme Capital','rowA',0)")
"('inv-w','Wyoming','rowW',0),('inv-a','Acme Capital','rowA',0),('inv-o','Orphan LP','rowO',0)")
# fc-orphan has contact_id NULL (pre-0004 orphan) and blank email — nothing to heal from.
c.execute("INSERT INTO fundraising_contacts (id,investor_id,full_name,email,sort_order,contact_id) VALUES "
"('fc-phil','inv-w','Philip Treick','',0,'c-phil'),"
"('fc-jose','inv-w','Jose Briones','',1,'c-jose'),"
"('fc-jane','inv-a','Jane Doe','',0,'c-jane')")
"('fc-jane','inv-a','Jane Doe','',0,'c-jane'),"
"('fc-orphan','inv-o','No Link','',0,NULL)")
c.commit()
c.close()
@@ -119,6 +123,12 @@ def main():
jane = next((c for c in ac if c.get("name") == "Jane Doe"), {})
check(jane.get("email") == "keep@acme.com",
f"Jane pill keeps its blob email, not the contact's (got {jane.get('email')!r})")
print("\n[heal: a pill whose fundraising_contacts row has contact_id NULL stays blank (orphan)]")
o = by_id.get("rowO", {})
orphan = next((c for c in o.get("contacts", []) if c.get("name") == "No Link"), {})
check(orphan.get("email", "") == "",
f"orphan pill (no contact_id, no email source) stays blank (got {orphan.get('email')!r})")
finally:
httpd.shutdown()
+15
View File
@@ -103,6 +103,21 @@ def main():
seed()
token = server.create_token("u1", "grant", "admin")
# Unit: the distinctive-token similarity edges (the all-generic fallback path the endpoint
# seed can't naturally reach — no real investor is named purely with generic descriptors).
print("\n[unit: _name_similarity distinctive-token edges]")
sim = server._name_similarity
check(sim("Fortitude Investment Group", "Aether Investment Group") < 0.62,
f"generic-only overlap stays below threshold (got {sim('Fortitude Investment Group', 'Aether Investment Group'):.2f})")
check(sim("Aether Capital", "Aether Capital Partners") == 1.0,
f"distinctive 'aether' (generic descriptors stripped) scores 1.0 (got {sim('Aether Capital', 'Aether Capital Partners'):.2f})")
# Both sides all-generic → fallback compares full tokens on BOTH sides; shared generic word
# alone must not clear the bar.
check(sim("Capital Group", "Global Capital") < 0.62,
f"all-generic both sides stays below threshold (got {sim('Capital Group', 'Global Capital'):.2f})")
check(sim("Family Office", "Family Office") == 1.0,
"identical all-generic names still score 1.0 (early-out)")
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
port = httpd.server_address[1]
threading.Thread(target=httpd.serve_forever, daemon=True).start()