Review fixes: narrow intake redact predicate to the bot's own nudge + edge tests
reviewer agent flagged the broadened redact_thread predicate (event_id OR in_reply==root) as over-matching any plain reply to a thread root. Gate the bare-in_reply clause to the bot's own sender (the nudge is always ours); thread children (cards/acks/human yes-no) still match by rel_type=m.thread. Add unit edges for _name_similarity's all-generic fallback and a contact_id NULL orphan case for the grid-blob email heal.
This commit is contained in:
@@ -254,10 +254,14 @@ async def main():
|
|||||||
for ev in chunk:
|
for ev in chunk:
|
||||||
rel = ((getattr(ev, "source", None) or {}).get("content", {}) or {}).get("m.relates_to") or {}
|
rel = ((getattr(ev, "source", None) or {}).get("content", {}) or {}).get("m.relates_to") or {}
|
||||||
in_reply = (rel.get("m.in_reply_to") or {}).get("event_id")
|
in_reply = (rel.get("m.in_reply_to") or {}).get("event_id")
|
||||||
# A thread child carries event_id==root; the un-threaded nudge carries only
|
# A thread child carries rel_type=m.thread + event_id==root (the cards/acks +
|
||||||
# m.in_reply_to.event_id==root. Catch both so the thread AND its main-timeline
|
# the human's yes/no replies — any sender). The un-threaded nudge is the BOT's
|
||||||
# pointer clear together.
|
# own plain reply to root (only m.in_reply_to==root, no rel_type); gate that
|
||||||
if rel.get("event_id") == root or in_reply == root:
|
# clause to our sender so we don't also redact an unrelated human plain-reply
|
||||||
|
# to the same root (root itself is already redacted above).
|
||||||
|
is_thread_child = rel.get("rel_type") == "m.thread" and rel.get("event_id") == root
|
||||||
|
is_own_nudge = in_reply == root and getattr(ev, "sender", None) == mx["user_id"]
|
||||||
|
if is_thread_child or is_own_nudge:
|
||||||
await redact_card(room_id, ev.event_id)
|
await redact_card(room_id, ev.event_id)
|
||||||
token = getattr(resp, "end", None)
|
token = getattr(resp, "end", None)
|
||||||
scanned += len(chunk)
|
scanned += len(chunk)
|
||||||
|
|||||||
@@ -61,6 +61,8 @@ GRID = {
|
|||||||
{"name": "Jose Briones", "email": "", "title": ""}]},
|
{"name": "Jose Briones", "email": "", "title": ""}]},
|
||||||
{"id": "rowA", "investor_name": "Acme Capital", "notes": "",
|
{"id": "rowA", "investor_name": "Acme Capital", "notes": "",
|
||||||
"contacts": [{"name": "Jane Doe", "email": "keep@acme.com", "title": ""}]},
|
"contacts": [{"name": "Jane Doe", "email": "keep@acme.com", "title": ""}]},
|
||||||
|
{"id": "rowO", "investor_name": "Orphan LP", "notes": "",
|
||||||
|
"contacts": [{"name": "No Link", "email": "", "title": ""}]},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,11 +81,13 @@ def seed():
|
|||||||
"('c-jane','Jane','Doe','other@acme.com')") # differs from the blob's keep@acme.com
|
"('c-jane','Jane','Doe','other@acme.com')") # differs from the blob's keep@acme.com
|
||||||
# Relational mirror (what sync_fundraising_relational would build): blank fc.email, linked contact_id.
|
# Relational mirror (what sync_fundraising_relational would build): blank fc.email, linked contact_id.
|
||||||
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested) VALUES "
|
c.execute("INSERT INTO fundraising_investors (id,investor_name,source_row_id,total_invested) VALUES "
|
||||||
"('inv-w','Wyoming','rowW',0),('inv-a','Acme Capital','rowA',0)")
|
"('inv-w','Wyoming','rowW',0),('inv-a','Acme Capital','rowA',0),('inv-o','Orphan LP','rowO',0)")
|
||||||
|
# fc-orphan has contact_id NULL (pre-0004 orphan) and blank email — nothing to heal from.
|
||||||
c.execute("INSERT INTO fundraising_contacts (id,investor_id,full_name,email,sort_order,contact_id) VALUES "
|
c.execute("INSERT INTO fundraising_contacts (id,investor_id,full_name,email,sort_order,contact_id) VALUES "
|
||||||
"('fc-phil','inv-w','Philip Treick','',0,'c-phil'),"
|
"('fc-phil','inv-w','Philip Treick','',0,'c-phil'),"
|
||||||
"('fc-jose','inv-w','Jose Briones','',1,'c-jose'),"
|
"('fc-jose','inv-w','Jose Briones','',1,'c-jose'),"
|
||||||
"('fc-jane','inv-a','Jane Doe','',0,'c-jane')")
|
"('fc-jane','inv-a','Jane Doe','',0,'c-jane'),"
|
||||||
|
"('fc-orphan','inv-o','No Link','',0,NULL)")
|
||||||
c.commit()
|
c.commit()
|
||||||
c.close()
|
c.close()
|
||||||
|
|
||||||
@@ -119,6 +123,12 @@ def main():
|
|||||||
jane = next((c for c in ac if c.get("name") == "Jane Doe"), {})
|
jane = next((c for c in ac if c.get("name") == "Jane Doe"), {})
|
||||||
check(jane.get("email") == "keep@acme.com",
|
check(jane.get("email") == "keep@acme.com",
|
||||||
f"Jane pill keeps its blob email, not the contact's (got {jane.get('email')!r})")
|
f"Jane pill keeps its blob email, not the contact's (got {jane.get('email')!r})")
|
||||||
|
|
||||||
|
print("\n[heal: a pill whose fundraising_contacts row has contact_id NULL stays blank (orphan)]")
|
||||||
|
o = by_id.get("rowO", {})
|
||||||
|
orphan = next((c for c in o.get("contacts", []) if c.get("name") == "No Link"), {})
|
||||||
|
check(orphan.get("email", "") == "",
|
||||||
|
f"orphan pill (no contact_id, no email source) stays blank (got {orphan.get('email')!r})")
|
||||||
finally:
|
finally:
|
||||||
httpd.shutdown()
|
httpd.shutdown()
|
||||||
|
|
||||||
|
|||||||
@@ -103,6 +103,21 @@ def main():
|
|||||||
seed()
|
seed()
|
||||||
token = server.create_token("u1", "grant", "admin")
|
token = server.create_token("u1", "grant", "admin")
|
||||||
|
|
||||||
|
# Unit: the distinctive-token similarity edges (the all-generic fallback path the endpoint
|
||||||
|
# seed can't naturally reach — no real investor is named purely with generic descriptors).
|
||||||
|
print("\n[unit: _name_similarity distinctive-token edges]")
|
||||||
|
sim = server._name_similarity
|
||||||
|
check(sim("Fortitude Investment Group", "Aether Investment Group") < 0.62,
|
||||||
|
f"generic-only overlap stays below threshold (got {sim('Fortitude Investment Group', 'Aether Investment Group'):.2f})")
|
||||||
|
check(sim("Aether Capital", "Aether Capital Partners") == 1.0,
|
||||||
|
f"distinctive 'aether' (generic descriptors stripped) scores 1.0 (got {sim('Aether Capital', 'Aether Capital Partners'):.2f})")
|
||||||
|
# Both sides all-generic → fallback compares full tokens on BOTH sides; shared generic word
|
||||||
|
# alone must not clear the bar.
|
||||||
|
check(sim("Capital Group", "Global Capital") < 0.62,
|
||||||
|
f"all-generic both sides stays below threshold (got {sim('Capital Group', 'Global Capital'):.2f})")
|
||||||
|
check(sim("Family Office", "Family Office") == 1.0,
|
||||||
|
"identical all-generic names still score 1.0 (early-out)")
|
||||||
|
|
||||||
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
httpd = ThreadingHTTPServer(("127.0.0.1", 0), _Quiet)
|
||||||
port = httpd.server_address[1]
|
port = httpd.server_address[1]
|
||||||
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
threading.Thread(target=httpd.serve_forever, daemon=True).start()
|
||||||
|
|||||||
Reference in New Issue
Block a user