ten31-database/backend/matrix_intake/redact_intake.py

#!/usr/bin/env python3
"""One-time maintenance: clear the intake room's backlog of resolved/stale messages.

Going forward the bot redacts each intake thread when it's approved/rejected (bot card + ack +
nudge + the user's own note/photo). This clears the messages that piled up BEFORE that shipped.

The intake room is single-purpose and the bot keeps **no durable pending state** (its proposal
store is in-memory and is lost on every restart), so nothing in the room is "still live" after a
restart — every message in it is safe to redact. This walks the room history and redacts every
m.room.message event (text + business-card images), bot's and humans' alike.

Redacting another user's message (the humans' notes/photos) needs the bot to hold a **redact /
moderator power level** in the intake room — without it those just no-op and linger (the bot's own
messages still clear). Make the bot a moderator of the intake room in Element first.

Safe by default: prints what it WOULD redact and does nothing. Pass --apply to actually redact.
Run on the Spark via the bot's own creds/image:
    docker compose run --rm matrix-intake python -u backend/matrix_intake/redact_intake.py
    docker compose run --rm matrix-intake python -u backend/matrix_intake/redact_intake.py --apply
"""
import asyncio
import sys

from nio import AsyncClient, MessageDirection

import settings

MAX_PAGES = 50  # 50 * 100 events is far more history than this room holds


async def main(apply):
    mx = settings.matrix_settings()
    intake_room = mx.get("intake_room")
    if not intake_room:
        print("MATRIX_INTAKE_ROOM is not set — nothing to do.")
        return
    client = AsyncClient(mx["homeserver"], mx["user_id"])
    client.restore_login(user_id=mx["user_id"], device_id=mx["device_id"], access_token=mx["token"])
    try:
        sync = await client.sync(timeout=10000, full_state=False)
        token = sync.next_batch
        targets = []  # (event_id, label)
        seen = set()
        for _ in range(MAX_PAGES):
            resp = await client.room_messages(intake_room, start=token,
                                              direction=MessageDirection.back, limit=100)
            chunk = getattr(resp, "chunk", None)
            if not chunk:
                break
            for ev in chunk:
                src = getattr(ev, "source", None) or {}
                if src.get("type") != "m.room.message":
                    continue  # only chat messages + images; leave membership/state events alone
                eid = getattr(ev, "event_id", None)
                if not eid or eid in seen:
                    continue
                seen.add(eid)
                content = src.get("content") or {}
                if not content:
                    continue  # already redacted (content stripped) — skip
                msgtype = content.get("msgtype") or "?"
                body = (content.get("body", "") or "").replace("\n", " ")
                who = "bot " if getattr(ev, "sender", None) == mx["user_id"] else "user"
                targets.append((eid, f"{who} [{msgtype}] {body[:60]}"))
            token = getattr(resp, "end", None)
            if not token:
                break

        print(f"messages to clear in the intake room: {len(targets)}")
        fails = 0
        for eid, label in targets:
            print(("APPLY redact " if apply else "WOULD redact ") + eid + "  ::  " + label)
            if apply:
                r = await client.room_redact(intake_room, eid, reason="retroactive intake-room cleanup")
                if not hasattr(r, "event_id"):
                    fails += 1
                    print(f"   ! redact failed (need mod power for others' messages?): {r}")
        print(("done — redacted " if apply else "dry run — would redact ")
              + f"{len(targets) - (fails if apply else 0)}/{len(targets)} event(s)"
              + (f"; {fails} failed" if apply and fails else "") + ".")
    finally:
        await client.close()


if __name__ == "__main__":
    asyncio.run(main(apply="--apply" in sys.argv[1:]))