Compare commits
7 Commits
phase-0
...
ff0dada0d5
| Author | SHA1 | Date | |
|---|---|---|---|
| ff0dada0d5 | |||
| e5a751d4f4 | |||
| ee8408d182 | |||
| 8ad1cd8465 | |||
| a7529eb0b7 | |||
| 7a39fec229 | |||
| 76d8a001b1 |
@@ -0,0 +1,21 @@
|
|||||||
|
# Keep the build context minimal and the image generic/secret-free.
|
||||||
|
# .env, config.toml, and the SSH key arrive via read-only mounts at runtime — never baked in.
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
config.toml
|
||||||
|
|
||||||
|
.git
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.egg-info/
|
||||||
|
|
||||||
|
# Mac-side launch scripts run on the Mac, not in this container.
|
||||||
|
scripts/
|
||||||
|
|
||||||
|
# Docs / OS cruft — not needed in the image.
|
||||||
|
*.md
|
||||||
|
.claude/
|
||||||
|
.DS_Store
|
||||||
@@ -8,3 +8,9 @@ MATRIX_ACCESS_TOKEN=
|
|||||||
# Optional — kept for recovery / re-minting a token. The bot authenticates with the access token,
|
# Optional — kept for recovery / re-minting a token. The bot authenticates with the access token,
|
||||||
# not the password (logging in every start would spawn a new device each time).
|
# not the password (logging in every start would spawn a new device each time).
|
||||||
MATRIX_PASSWORD=
|
MATRIX_PASSWORD=
|
||||||
|
|
||||||
|
# Headless "ask" mode (the `?`-prefix path). Used MAC-SIDE by scripts/ask-claude.sh, NOT by the
|
||||||
|
# bot — a non-GUI SSH session can't reach the login Keychain, so `claude -p` needs this token to
|
||||||
|
# authenticate. Mint once on the Mac: `claude setup-token` (requires a Claude subscription), then
|
||||||
|
# paste the value here. Lives on the Mac; the Spark never runs claude, so it needs no copy.
|
||||||
|
CLAUDE_CODE_OAUTH_TOKEN=
|
||||||
|
|||||||
@@ -16,14 +16,15 @@ phone to drive interactively. Single user, private home network, no multi-user/p
|
|||||||
Matrix message in a project room
|
Matrix message in a project room
|
||||||
→ bot (matrix-nio, on the DGX Spark) receives it
|
→ bot (matrix-nio, on the DGX Spark) receives it
|
||||||
→ looks up which repo that room maps to (explicit config — no classification)
|
→ looks up which repo that room maps to (explicit config — no classification)
|
||||||
→ SSHes to the Mac and runs scripts/launch-claude.sh with (repo_dir, message_text)
|
→ SSHes to the Mac and runs scripts/gui-launch.sh → launch-claude.sh (repo_dir, message_text)
|
||||||
→ wrapper cd's into the repo and launches `claude` with the message as the prompt
|
→ wrapper cd's into the repo, opens a desktop Terminal, and launches `claude` on the message
|
||||||
→ Claude Code Remote Control (auto-enabled) pushes a notification to the phone
|
→ Claude Code Remote Control (auto-enabled) pushes a notification to the phone
|
||||||
→ tap in and drive the session from the Claude app
|
→ tap in and drive the session from the Claude app
|
||||||
```
|
```
|
||||||
|
|
||||||
Room determines the repo; the message text becomes the initial prompt. That is the entire
|
Room determines the repo; the message text becomes the initial prompt — the v1 trigger surface.
|
||||||
v1 decision surface.
|
*Variant:* a `?`-prefixed message instead runs `ask-claude.sh` (headless `claude -p`) and posts
|
||||||
|
the full answer back into the room (ask mode, D12).
|
||||||
|
|
||||||
## Stack
|
## Stack
|
||||||
|
|
||||||
@@ -51,19 +52,45 @@ v1 decision surface.
|
|||||||
|
|
||||||
- `scripts/launch-claude.sh <repo_dir> <prompt>` — the Mac wrapper (Phase 0 deliverable;
|
- `scripts/launch-claude.sh <repo_dir> <prompt>` — the Mac wrapper (Phase 0 deliverable;
|
||||||
validate by hand before any bot code).
|
validate by hand before any bot code).
|
||||||
- _TODO (Phase 1+):_ bot build/run (`docker build` / `docker compose up` on the Spark) once
|
- **Bot (Phase 1), containerized on the Spark — preferred:** from `~/matrix-bridge`,
|
||||||
`src/` exists.
|
`docker compose up -d --build` (host networking, `restart: unless-stopped` so it survives
|
||||||
|
reboots; read-only mounts of `.env`/`config.toml`/SSH key). Logs: `docker compose logs -f`.
|
||||||
|
The entrypoint generates `~/.ssh/config` for the `mac-bridge` alias from `config.toml [mac]`
|
||||||
|
(`hostname`/`user`), so the alias resolves inside the container. Override the host key path with
|
||||||
|
`MB_SSH_KEY_HOST` if it isn't `/home/modelo/.ssh/id_ed25519`.
|
||||||
|
- **Bot — venv (dev/fallback):** `python3 -m venv .venv && .venv/bin/pip install -r requirements.txt`,
|
||||||
|
then `.venv/bin/python src/bot.py` — uses modelo's host `~/.ssh/config` for the alias.
|
||||||
|
`MB_SSH_ALIAS` overrides the SSH target for testing.
|
||||||
|
- **Deploy:** pull the bot files from the Mac (no Gitea needed) —
|
||||||
|
`scp mac-bridge:/Users/macpro/Projects/matrix-bridge/{Dockerfile,docker-compose.yml,docker-entrypoint.sh,requirements.txt,config.toml,.env} .`
|
||||||
|
and `scp -r mac-bridge:/Users/macpro/Projects/matrix-bridge/src .`, then rebuild.
|
||||||
|
|
||||||
## Layout
|
## Layout
|
||||||
|
|
||||||
- `AGENTS.md` — this file (canonical; `CLAUDE.md` is a relative symlink to it).
|
- `AGENTS.md` — this file (canonical; `CLAUDE.md` is a relative symlink to it).
|
||||||
- `ROADMAP.md` — Phases 1–4+ with falsifiable exits, plus deferred/future directions.
|
- `ROADMAP.md` — Phases 1–4+ with falsifiable exits, plus deferred/future directions.
|
||||||
- `README.md` — human-facing intro.
|
- `README.md` — human-facing intro.
|
||||||
|
- `docs/spark-control-integration.md` — Phase 3 spec for the Spark Control dev: the SSH
|
||||||
|
command contract (status / restart / git-pull update) the dashboard drives, plus the one-time
|
||||||
|
conversion of the Spark's `~/matrix-bridge` to a Gitea clone. matrix-bridge needs no code change.
|
||||||
- `scripts/launch-claude.sh` — the Mac-side launch wrapper (the only seam that knows the
|
- `scripts/launch-claude.sh` — the Mac-side launch wrapper (the only seam that knows the
|
||||||
Mac's environment).
|
Mac's environment).
|
||||||
- `config.example.toml` — room→repo mapping template; the real `config.toml` is gitignored.
|
- `config.example.toml` — room→repo mapping template; the real `config.toml` is gitignored.
|
||||||
|
- `scripts/gui-launch.sh` — opens the desktop Terminal via `osascript` (Approach B, D11); calls
|
||||||
|
`launch-claude.sh` inside it. The bot invokes this over SSH.
|
||||||
|
- `scripts/ask-claude.sh` — headless `?`-ask wrapper (`#!/bin/zsh -l`): runs `claude -p` in the repo
|
||||||
|
and prints the answer to stdout for the bot to capture and post back. Uses `CLAUDE_CODE_OAUTH_TOKEN`
|
||||||
|
(Mac-side `.env`) because a non-GUI SSH session can't reach the login Keychain (D12).
|
||||||
|
- `src/bot.py` — the matrix-nio bot (Phase 1): listens in mapped rooms; a plain message runs
|
||||||
|
`ssh mac-bridge gui-launch.sh` (interactive, to the phone), a `?`-prefixed message runs
|
||||||
|
`ask-claude.sh` (headless, answer posted back); fans out for all-projects; reports failures back.
|
||||||
|
- `requirements.txt` (matrix-nio) · `.env.example` (credential schema; real `.env` gitignored).
|
||||||
- `.claude/` — Claude wiring (dir only for now).
|
- `.claude/` — Claude wiring (dir only for now).
|
||||||
- _Future:_ `src/` (the matrix-nio bot), `Dockerfile`, dependency manifest — Phase 1.
|
- `Dockerfile` · `docker-compose.yml` · `docker-entrypoint.sh` · `.dockerignore` — the Phase 1
|
||||||
|
container (Spark). Generic image (no secrets/deployment specifics baked in); host networking;
|
||||||
|
read-only mounts of `.env`/`config.toml`/SSH key. The entrypoint generates `~/.ssh/config` for
|
||||||
|
the `mac-bridge` alias from `config.toml [mac]` — the container's environment seam (D4 analog
|
||||||
|
of `launch-claude.sh`).
|
||||||
|
|
||||||
## Decisions (already made — don't relitigate without new information)
|
## Decisions (already made — don't relitigate without new information)
|
||||||
|
|
||||||
@@ -106,6 +133,13 @@ Condensed from the scoping workshop. Each: the call, why, what it beat.
|
|||||||
and is fully unattended, but adds a credential to manage; kept as the documented fallback if the
|
and is fully unattended, but adds a credential to manage; kept as the documented fallback if the
|
||||||
Mac is ever driven headless (logged out). *Cost:* requires the Mac logged in + a one-time
|
Mac is ever driven headless (logged out). *Cost:* requires the Mac logged in + a one-time
|
||||||
Terminal Automation grant.
|
Terminal Automation grant.
|
||||||
|
- **D12 — Headless "ask" mode uses the long-lived token; interactive stays GUI-Terminal (2026-06-16).**
|
||||||
|
A `?`-prefixed message runs `claude -p` headlessly over plain SSH and posts the answer back, so its
|
||||||
|
stdout must be captured over the SSH pipe — which rules out the GUI-Terminal path (D11), and a
|
||||||
|
non-GUI session reports "Not logged in." Ask mode therefore deliberately adopts the long-lived
|
||||||
|
`claude setup-token` (`CLAUDE_CODE_OAUTH_TOKEN`) that D11 deferred — kept **Mac-side only** (in
|
||||||
|
`.env`; the Spark never runs claude). Interactive launches keep the token-free GUI-Terminal path.
|
||||||
|
*Sovereignty unchanged:* `claude -p` uses the subscription, no frontier API touches message payloads.
|
||||||
|
|
||||||
## Sovereignty constraint
|
## Sovereignty constraint
|
||||||
|
|
||||||
@@ -131,56 +165,50 @@ Substance threshold **N = 3** real uses, defined per phase in `ROADMAP.md`. "Don
|
|||||||
falsifiable, scaled substance (it worked 3 real times), never a checkbox. A phase that "works
|
falsifiable, scaled substance (it worked 3 real times), never a checkbox. A phase that "works
|
||||||
once" is not done.
|
once" is not done.
|
||||||
|
|
||||||
|
## Infra facts (proven — stable reference)
|
||||||
|
|
||||||
|
- **WireGuard (`starttunnel`) for Mac↔Spark:** Mac `10.59.211.5`; Spark (`spark-32d0`, user `modelo`)
|
||||||
|
`10.59.211.6`. The Mac↔Spark seam runs over WireGuard (not the Mac's LAN subnet). The Spark *is*
|
||||||
|
on the LAN, same as the Start9 host (`immense-voyage`) — so Spark→Gitea (`immense-voyage.local:59916`)
|
||||||
|
resolves and works directly.
|
||||||
|
- **Spark → Mac:** SSH alias `mac-bridge` → the Mac as user `macpro`, dedicated key
|
||||||
|
(`~/.ssh/id_ed25519` on the Spark, in the Mac's `authorized_keys`). The Spark host's `~/.ssh/config` needs `IdentitiesOnly yes` because a
|
||||||
|
`Host *` rule shadows the default key; the container regenerates a clean config from `config.toml [mac]`.
|
||||||
|
- **Mac → Spark:** no authorized key — direct Mac-initiated Spark ops stay owner-run. (This is *not*
|
||||||
|
what Phase 3 closes: Spark Control already has its own SSH channel into `spark-32d0`, so its
|
||||||
|
status/update/restart buttons ride that, not a Mac→Spark key.)
|
||||||
|
- **Matrix:** homeserver `https://matrix.gilliam.ai` (StartOS Synapse), bot `@agent:matrix.gilliam.ai`,
|
||||||
|
device `matrix-bridge-bot`. The bot reuses the stored access token (`.env`) — never re-logs in
|
||||||
|
(avoids device churn). No E2EE (D9); bot↔Synapse is clearnet TLS, softening D9's WireGuard-only rationale.
|
||||||
|
- **Mac env:** `claude` lives in `~/.local/bin`, on PATH only via `~/.zprofile` — so every wrapper is
|
||||||
|
`#!/bin/zsh -l` (a non-login SSH shell loads neither `.zprofile` nor `.zshrc`).
|
||||||
|
- **Interactive-launch prereqs:** Mac logged into its desktop + a one-time Terminal Automation grant
|
||||||
|
(TCC). If the grant resets, a launch stalls — the bot reports it fail-loud rather than hanging.
|
||||||
|
- **Folder-trust gate:** the first `claude` run in a repo it has never been opened in stalls on the
|
||||||
|
trust prompt; already-used repos are trusted. Affects unattended interactive launches and ask mode.
|
||||||
|
|
||||||
## Current state
|
## Current state
|
||||||
|
|
||||||
- **Scaffolded 2026-06-15** from a prior scoping package (SPEC/DECISIONS/CLAUDE/KICKOFF),
|
- **Working & proven live on the Spark (Phases 0–1 + ask mode, 2026-06-16).** The bot runs as a Docker
|
||||||
folded into this AGENTS.md (decisions + placement), `ROADMAP.md` (phases), and the wrapper +
|
container on the Spark (`~/matrix-bridge`, `docker compose up -d --build`): generic image, host
|
||||||
config skeleton. No bot code yet — by design.
|
networking, `restart: unless-stopped`, read-only mounts of `.env`/`config.toml`/SSH key. Listens as
|
||||||
- **Phase 0 — SSH leg proven (2026-06-15).** Mac Remote Login is on. The Spark `spark-32d0`
|
`@agent` in 11 project rooms + an all-projects fan-out room (each fan-out session named `<repo> - <date>`).
|
||||||
(user `modelo`) reaches the Mac over `starttunnel`/WireGuard at `10.59.211.5` — *not* the
|
- **Interactive** (plain message): `ssh mac-bridge → gui-launch.sh → launch-claude.sh → claude` →
|
||||||
LAN (the Spark isn't on the Mac's LAN subnet). A dedicated per-machine key
|
drivable session on the phone via Remote Control.
|
||||||
(`spark-control@spark-32d0` = `~/.ssh/id_ed25519` on the Spark) is in the Mac's
|
- **Ask mode** (`?`-prefixed message): `ssh mac-bridge → ask-claude.sh → claude -p`, full answer posted
|
||||||
`authorized_keys`. SSH alias **`mac-bridge`** in the Spark's `~/.ssh/config` selects that key
|
back into the room (chunked, no truncation). See D12.
|
||||||
(`IdentityFile ~/.ssh/id_ed25519` + `IdentitiesOnly yes`) — required because the pre-existing
|
- **Phase 2 (multi-room routing) — DONE.** Owner confirmed the N=3 pass: routes by `room_id`,
|
||||||
`Host * → id_ed25519_shared` rule otherwise shadows the default key. The bot's entire Mac hop
|
correct repo, zero wrong-directory launches.
|
||||||
is therefore `ssh mac-bridge '<command>'`. *Phase 1:* bake the dedicated key + an equivalent
|
- **Phase 3 (Spark Control integration) — spec drafted, handed to the Spark Control dev (2026-06-15).**
|
||||||
alias/config into the bot's Docker image (modelo's `~/.ssh/config` won't exist in the
|
See `docs/spark-control-integration.md`: the SSH command contract (status via `docker inspect`;
|
||||||
container).
|
restart via `docker restart`; update via `git fetch && git reset --hard origin/master &&
|
||||||
- **Phase 0 — launch chain proven end-to-end (2026-06-15).** `ssh mac-bridge → gui-launch.sh
|
docker compose up -d --build`) plus a one-time conversion of the Spark's `~/matrix-bridge` from
|
||||||
→ launch-claude.sh → authenticated claude → phone via Remote Control` works against a real
|
scp'd loose files to a Gitea clone (secrets are gitignored, so `reset --hard` preserves them).
|
||||||
repo (`premier-gunner`). Chose **Approach B (desktop Terminal)** over a headless token — see
|
Decisions this session: update source = git-pull-from-Gitea (not scp-from-Mac); Spark Control
|
||||||
**D11**. Two pieces it took: (1) `~/.local/bin` (where `claude` lives) had to be added to
|
already SSHes into `spark-32d0`, so no new key. **matrix-bridge needs no code change** — the work
|
||||||
`~/.zprofile`, because a non-interactive login shell skips `.zshrc`; (2) `scripts/gui-launch.sh`
|
is now Spark Control-side (status tile + buttons) + the one-time Spark migration. Awaiting the dev.
|
||||||
opens a Terminal.app window via `osascript` so `claude` runs inside the GUI session (login
|
- **Open / risks:** a `?`-ask in a repo `claude` has never opened may stall on the folder-trust gate
|
||||||
Keychain + real TTY) — needed a one-time "Allow ssh to control Terminal" Automation grant.
|
— add a trust flag to `ask-claude.sh` if/when hit, not preemptively. (Resolved this session: the
|
||||||
*Known caveats for the bot:* (a) a never-trusted repo stalls at Claude's first-run folder-trust
|
accidental MacBook docker deploy was cleaned up by the owner.)
|
||||||
gate — unattended launches must target already-trusted repos or pass a skip flag; (b) if the
|
- **Repo:** `master` == `phase-1`, pushed to Gitea; the Phase 3 spec + doc updates landed as `e5a751d`.
|
||||||
TCC Automation grant ever resets, a launch stalls until someone clicks Allow — the bot should
|
No test suite (pre-existing); the doc is a spec, no code changed.
|
||||||
detect a failed launch and report it back to the room, not hang.
|
|
||||||
- **Phase 0 — Matrix bot user live (2026-06-15).** Homeserver is the StartOS Synapse exposed on
|
|
||||||
**clearnet at `https://matrix.gilliam.ai`** (`server_name` = `matrix.gilliam.ai`, Synapse
|
|
||||||
1.154.0) — *not* the stale `@gilliam:<onion>` account found in Element. Created a dedicated
|
|
||||||
non-admin bot **`@agent:matrix.gilliam.ai`** (type `bot`) via the Synapse Admin Dashboard
|
|
||||||
(StartOS "Create Bot User" is appservice-only/greyed out). Minted a long-lived access token
|
|
||||||
(fixed `device_id` `matrix-bridge-bot`), verified via `whoami`, and stored
|
|
||||||
homeserver/user/token/device_id (+ password for recovery) in the gitignored **`.env`** (chmod
|
|
||||||
600). `config.toml` holds homeserver+user; `.env.example` documents the schema. Bot reuses the
|
|
||||||
stored token — never re-login per start (avoids device churn); no E2EE (D9). *Note:* the
|
|
||||||
bot↔Synapse hop is now public-internet TLS, which softens D9's "transport already WireGuard-
|
|
||||||
private" rationale (still TLS to the user's own server, single-user content) — revisit if it matters.
|
|
||||||
- **Phase 0 — rooms mapped (2026-06-15).** 9 project rooms in `config.toml` (premier-gunner,
|
|
||||||
recap, recap-relay, spark-control, ten31-transcripts, ten31-signal-engine, keysat, proof-of-work,
|
|
||||||
ten31-database), each `room_id → /Users/macpro/Projects/<repo>`. `@agent` is **joined to all 9**
|
|
||||||
(via its token), so the Phase-1 bot will see messages in each. *Manual by-hand launches must keep
|
|
||||||
message text free of `'`/`"`* — the typed SSH command line breaks on them (PS2 `>` hang); the
|
|
||||||
Phase-1 bot avoids this via `shlex.quote`.
|
|
||||||
- **Phase 0 — PROVEN / DONE (2026-06-15).** N=3 by-hand runs succeeded across multiple rooms
|
|
||||||
(recap, spark-control, premier-gunner): each opened a Terminal in the right repo, started `claude`
|
|
||||||
on the message, and pushed a drivable session to the phone. The deterministic core holds.
|
|
||||||
Added session naming: `launch-claude.sh` now runs `claude -n "<repo> - <topic>"` (topic from the
|
|
||||||
message, overridable via `$MB_SESSION_NAME`) so Remote Control's phone index is readable —
|
|
||||||
confirmed `-n` drives the phone app's conversation label.
|
|
||||||
- **Next: Phase 1 — the matrix-nio bot.** Container on the Spark, logged in as `@agent` (token in
|
|
||||||
`.env`), listening in the 9 mapped rooms; on a message it runs `ssh mac-bridge gui-launch.sh
|
|
||||||
<repo_dir> <message>` (built with `shlex.quote`) and reports failures back to the room. See
|
|
||||||
ROADMAP Phase 1 (also: bake key+config into the image, curated `$MB_SESSION_NAME` topic, fail-loud).
|
|
||||||
|
|||||||
+27
@@ -0,0 +1,27 @@
|
|||||||
|
# matrix-bridge bot — Phase 1 container.
|
||||||
|
#
|
||||||
|
# Runs on the Spark (always-on Linux + Docker). docker-compose uses host networking so the
|
||||||
|
# bot reaches BOTH Synapse (clearnet TLS) and the Mac (WireGuard, via the `mac-bridge` SSH alias).
|
||||||
|
#
|
||||||
|
# The image is GENERIC: no deployment specifics and no secrets are baked in. At runtime
|
||||||
|
# docker-compose mounts .env, config.toml, and the SSH key (all read-only); the entrypoint
|
||||||
|
# generates ~/.ssh/config for the alias from config.toml's [mac] section before launching.
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
# openssh-client: the bot shells out to `ssh mac-bridge ...` (the proven Phase 0 seam).
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends openssh-client \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY src/ ./src/
|
||||||
|
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||||
|
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
|
||||||
|
|
||||||
|
# .env and config.toml arrive via read-only mounts at runtime (never baked).
|
||||||
|
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
|
||||||
|
CMD ["python", "-u", "src/bot.py"]
|
||||||
+17
-3
@@ -26,19 +26,23 @@ after it.
|
|||||||
- **Exit (falsifiable):** 3 consecutive real messages each correctly launch a drivable
|
- **Exit (falsifiable):** 3 consecutive real messages each correctly launch a drivable
|
||||||
session on the phone.
|
session on the phone.
|
||||||
|
|
||||||
## Phase 2 — Multi-room routing
|
## Phase 2 — Multi-room routing — DONE (2026-06-15)
|
||||||
|
|
||||||
- Room → repo mapping table; the bot routes by `room_id` (config over code).
|
- Room → repo mapping table; the bot routes by `room_id` (config over code).
|
||||||
- **Exit (falsifiable):** 3 real uses across ≥2 rooms, correct repo every time, zero
|
- **Exit (falsifiable):** 3 real uses across ≥2 rooms, correct repo every time, zero
|
||||||
wrong-directory launches.
|
wrong-directory launches. *Met — owner-confirmed N=3 pass.*
|
||||||
|
|
||||||
## Phase 3 — Spark Control integration
|
## Phase 3 — Spark Control integration — SPEC DRAFTED (2026-06-15), awaiting Spark Control dev
|
||||||
|
|
||||||
- Bot container status surfaced on the Spark Control dashboard.
|
- Bot container status surfaced on the Spark Control dashboard.
|
||||||
- One-click update (pull + restart) wired the same way Spark Control drives the Sparks today
|
- One-click update (pull + restart) wired the same way Spark Control drives the Sparks today
|
||||||
(SSH/commands behind a button).
|
(SSH/commands behind a button).
|
||||||
- **Exit (falsifiable):** bot status is visible and the bot can be updated/restarted from the
|
- **Exit (falsifiable):** bot status is visible and the bot can be updated/restarted from the
|
||||||
panel.
|
panel.
|
||||||
|
- **Spec:** `docs/spark-control-integration.md` — the SSH command contract + one-time Spark
|
||||||
|
migration to a Gitea clone. Decided: update = git-pull-from-Gitea; Spark Control's existing
|
||||||
|
SSH into `spark-32d0` carries the buttons (no new key). matrix-bridge needs no code change;
|
||||||
|
remaining work is Spark Control-side + the one-time migration.
|
||||||
|
|
||||||
## Phase 4+ — Future direction (documented, not yet scoped to build)
|
## Phase 4+ — Future direction (documented, not yet scoped to build)
|
||||||
|
|
||||||
@@ -54,3 +58,13 @@ after it.
|
|||||||
is actually in use.
|
is actually in use.
|
||||||
- **E2EE (D9).** Add matrix-nio end-to-end encryption (libolm) if the bot ever handles
|
- **E2EE (D9).** Add matrix-nio end-to-end encryption (libolm) if the bot ever handles
|
||||||
sensitive content over untrusted transport. Low priority while everything is WireGuard-local.
|
sensitive content over untrusted transport. Low priority while everything is WireGuard-local.
|
||||||
|
- **Headless "ask" mode — SHIPPED 2026-06-16.** A `?`-prefixed message runs `claude -p "<rest>"`
|
||||||
|
one-shot in the room's repo and posts the **full** answer back into the room — Matrix as a
|
||||||
|
request/response interface, not just a trigger. Built via `scripts/ask-claude.sh` (login-shell
|
||||||
|
wrapper) + the bot's `?`-dispatch (`run_ask`/`ask`). Resolved design choices: selector = `?` prefix
|
||||||
|
(per-message; the room still picks the repo); output posted in full, chunked under Matrix's event
|
||||||
|
cap (no truncation — chosen explicitly); auth = the long-lived `claude setup-token`
|
||||||
|
(`CLAUDE_CODE_OAUTH_TOKEN`, Approach A / D12) because a non-GUI SSH session can't reach the
|
||||||
|
Keychain; sovereignty unchanged (`claude -p` uses the subscription, no frontier API on payloads).
|
||||||
|
*Remaining open Qs:* very-long-output handling beyond chunking (thread / attach file); the
|
||||||
|
first-run folder-trust gate for a repo `claude` has never been opened in.
|
||||||
|
|||||||
@@ -10,6 +10,17 @@ user = "@matrix-bridge-bot:<your-domain>" # a dedicated bot Matrix account (not
|
|||||||
# Credentials (access token or password) come from the environment or a gitignored secret —
|
# Credentials (access token or password) come from the environment or a gitignored secret —
|
||||||
# never commit them. The bot reads the homeserver URL + bot creds at startup.
|
# never commit them. The bot reads the homeserver URL + bot creds at startup.
|
||||||
|
|
||||||
|
# How the bot reaches the Mac (the proven Phase 0 seam). The bot runs on the Spark,
|
||||||
|
# where `ssh_alias` resolves; `launcher` is the absolute path to gui-launch.sh on the Mac.
|
||||||
|
[mac]
|
||||||
|
ssh_alias = "mac-bridge"
|
||||||
|
launcher = "/Users/macpro/Projects/<your-repo>/scripts/gui-launch.sh"
|
||||||
|
ask_launcher = "/Users/macpro/Projects/<your-repo>/scripts/ask-claude.sh" # headless `?`-prefix ask mode
|
||||||
|
# Container only: docker-entrypoint.sh generates ~/.ssh/config for `ssh_alias` from these.
|
||||||
|
# (On a host with `ssh_alias` already in ~/.ssh/config these are ignored.)
|
||||||
|
hostname = "10.0.0.0" # the Mac's address reachable from the Spark (e.g. WireGuard IP)
|
||||||
|
user = "<mac-username>"
|
||||||
|
|
||||||
# One [[room]] block per project.
|
# One [[room]] block per project.
|
||||||
# room_id — the internal Matrix room ID (starts with '!'), NOT the human alias (#name:domain)
|
# room_id — the internal Matrix room ID (starts with '!'), NOT the human alias (#name:domain)
|
||||||
# repo_dir — an absolute path on the Mac (note: ~/Projects uses a capital P)
|
# repo_dir — an absolute path on the Mac (note: ~/Projects uses a capital P)
|
||||||
|
|||||||
@@ -0,0 +1,19 @@
|
|||||||
|
# matrix-bridge bot — Phase 1 deployment on the Spark.
|
||||||
|
#
|
||||||
|
# `docker compose up -d` runs the bot detached; `restart: unless-stopped` brings it back after
|
||||||
|
# a Spark reboot. Host networking lets it reach BOTH Synapse (clearnet TLS) and the Mac
|
||||||
|
# (WireGuard, via the mac-bridge alias the entrypoint generates). The image stays generic — all
|
||||||
|
# deployment specifics and secrets arrive through the read-only mounts below.
|
||||||
|
services:
|
||||||
|
bot:
|
||||||
|
build: .
|
||||||
|
image: matrix-bridge-bot
|
||||||
|
container_name: matrix-bridge
|
||||||
|
network_mode: host
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./.env:/app/.env:ro
|
||||||
|
- ./config.toml:/app/config.toml:ro
|
||||||
|
# Dedicated Phase 0 key (spark-control@spark-32d0). Must be chmod 600 on the host.
|
||||||
|
# Override the host path with MB_SSH_KEY_HOST if the key lives elsewhere.
|
||||||
|
- ${MB_SSH_KEY_HOST:-/home/modelo/.ssh/id_ed25519}:/root/.ssh/id_ed25519:ro
|
||||||
@@ -0,0 +1,40 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# matrix-bridge container entrypoint — the container's "environment seam".
|
||||||
|
#
|
||||||
|
# Generates ~/.ssh/config for the `mac-bridge` alias from config.toml's [mac] section, then
|
||||||
|
# execs the bot. This mirrors the Mac side, where launch-claude.sh owns environment setup and
|
||||||
|
# the bot stays dumb (AGENTS.md D4): SSH-client wiring lives here, not in bot.py. On the Spark
|
||||||
|
# HOST the bot uses modelo's existing ~/.ssh/config; in the container we recreate just the one
|
||||||
|
# alias we need, pointing at the mounted key.
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SSH_DIR="$HOME/.ssh"
|
||||||
|
mkdir -p "$SSH_DIR"
|
||||||
|
chmod 700 "$SSH_DIR"
|
||||||
|
|
||||||
|
# Write ~/.ssh/config straight from config.toml [mac] (no eval; values never hit a shell).
|
||||||
|
# IdentityFile is the in-container mount target (a container constant, see docker-compose.yml).
|
||||||
|
# StrictHostKeyChecking=accept-new auto-trusts the Mac's host key on first connect — acceptable
|
||||||
|
# on the private WireGuard network (same transport-trust reasoning as D9) and avoids an
|
||||||
|
# interactive prompt that would otherwise hang the bot.
|
||||||
|
MB_SSH_KEY="${MB_SSH_KEY:-$SSH_DIR/id_ed25519}" \
|
||||||
|
SSH_CONFIG="$SSH_DIR/config" \
|
||||||
|
KNOWN_HOSTS="$SSH_DIR/known_hosts" \
|
||||||
|
python - <<'PY'
|
||||||
|
import os, tomllib
|
||||||
|
with open("/app/config.toml", "rb") as f:
|
||||||
|
mac = tomllib.load(f)["mac"]
|
||||||
|
config = f"""Host {mac.get('ssh_alias', 'mac-bridge')}
|
||||||
|
HostName {mac['hostname']}
|
||||||
|
User {mac['user']}
|
||||||
|
IdentityFile {os.environ['MB_SSH_KEY']}
|
||||||
|
IdentitiesOnly yes
|
||||||
|
StrictHostKeyChecking accept-new
|
||||||
|
UserKnownHostsFile {os.environ['KNOWN_HOSTS']}
|
||||||
|
"""
|
||||||
|
with open(os.environ['SSH_CONFIG'], "w") as f:
|
||||||
|
f.write(config)
|
||||||
|
PY
|
||||||
|
chmod 600 "$SSH_DIR/config"
|
||||||
|
|
||||||
|
exec "$@"
|
||||||
@@ -0,0 +1,186 @@
|
|||||||
|
# Phase 3 — Spark Control integration (spec for the Spark Control dev)
|
||||||
|
|
||||||
|
**Goal (ROADMAP Phase 3):** surface the matrix-bridge bot's container status on the Spark
|
||||||
|
Control dashboard, and add one-click **update** (pull + rebuild + restart) and **restart**,
|
||||||
|
wired the same SSH-behind-buttons way Spark Control already drives the Sparks.
|
||||||
|
|
||||||
|
**Exit (falsifiable):** bot status is visible on the panel, and the bot can be
|
||||||
|
updated/restarted from the panel.
|
||||||
|
|
||||||
|
This document is the **contract**: what to run, where, and what the output means. The
|
||||||
|
matrix-bridge side is fixed below; map the buttons onto Spark Control's existing
|
||||||
|
managed-service pattern however that codebase already models a Spark/service. No changes to
|
||||||
|
matrix-bridge are required for this.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What the bot is
|
||||||
|
|
||||||
|
A single Docker container on the DGX Spark.
|
||||||
|
|
||||||
|
| Fact | Value |
|
||||||
|
|---|---|
|
||||||
|
| Host | `spark-32d0` (`10.59.211.6` on WireGuard), user **`modelo`** |
|
||||||
|
| Project dir | `/home/modelo/matrix-bridge` (`~/matrix-bridge` for modelo) |
|
||||||
|
| Compose service | `bot` |
|
||||||
|
| Container name | `matrix-bridge` (fixed via `container_name:`) |
|
||||||
|
| Image | `matrix-bridge-bot` |
|
||||||
|
| Lifecycle | host networking, `restart: unless-stopped` (survives Spark reboot) |
|
||||||
|
| Secrets | `.env`, `config.toml` — **gitignored**, live only on the Spark, never in git |
|
||||||
|
|
||||||
|
Spark Control already SSHes into `spark-32d0`, so these ride the existing channel — **no new
|
||||||
|
key needed.** All commands below assume they run **as `modelo`** (owner of the dir, member of
|
||||||
|
the `docker` group). If Spark Control's channel connects as a different user, wrap each command
|
||||||
|
in `sudo -iu modelo bash -lc '<command>'` — running `git` in modelo's repo as root trips git's
|
||||||
|
"dubious ownership" guard, so don't skip this.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## One-time prerequisites (owner, not Spark Control dev)
|
||||||
|
|
||||||
|
The bot dir on the Spark was originally populated by `scp` of loose files. To make
|
||||||
|
git-pull-based updates work it must become a git clone of the Gitea repo **without disturbing
|
||||||
|
the gitignored secrets** (`.env`, `config.toml`). Because those two files are gitignored,
|
||||||
|
`git reset --hard` never touches them — so we can convert the existing dir in place.
|
||||||
|
|
||||||
|
**0a. Confirm the Spark can reach + authenticate to Gitea (fail loud here, not at first button press):**
|
||||||
|
|
||||||
|
```sh
|
||||||
|
git ls-remote ssh://git@immense-voyage.local:59916/grant/matrix-bridge.git >/dev/null \
|
||||||
|
&& echo "gitea reachable" || echo "FIX gitea access first"
|
||||||
|
```
|
||||||
|
|
||||||
|
The Spark is on the same LAN as the Start9 host running Gitea, so `immense-voyage.local`
|
||||||
|
resolves directly — this should just work. If it doesn't, the only likely gap is a key
|
||||||
|
authorized for read on the Gitea repo available to `modelo` (deploy key or existing key).
|
||||||
|
Don't proceed until `git ls-remote` succeeds.
|
||||||
|
|
||||||
|
**0b. Convert `~/matrix-bridge` to a clone tracking `master` (run as `modelo`):**
|
||||||
|
|
||||||
|
```sh
|
||||||
|
cd /home/modelo/matrix-bridge
|
||||||
|
git init -b master
|
||||||
|
git remote add origin ssh://git@immense-voyage.local:59916/grant/matrix-bridge.git
|
||||||
|
git fetch origin
|
||||||
|
git reset --hard origin/master # secrets are gitignored → untouched
|
||||||
|
git branch --set-upstream-to=origin/master master
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify the secrets survived and the container still comes up clean:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
ls -la /home/modelo/matrix-bridge/.env /home/modelo/matrix-bridge/config.toml # both present
|
||||||
|
git -C /home/modelo/matrix-bridge status # .env/config.toml show as ignored, tree clean
|
||||||
|
docker compose up -d --build && docker ps --filter name=^/matrix-bridge$
|
||||||
|
```
|
||||||
|
|
||||||
|
`master` is the release branch (today `master == phase-1`). Track whatever you treat as the
|
||||||
|
release line; the commands below assume `origin/master`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## The contract — commands behind each control
|
||||||
|
|
||||||
|
Run from `/home/modelo/matrix-bridge` as `modelo`. Each is idempotent and fail-loud
|
||||||
|
(non-zero exit ⇒ surface it on the panel; don't swallow).
|
||||||
|
|
||||||
|
### Status (poll for the badge)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker inspect -f '{{.State.Status}}|{{.State.StartedAt}}|{{.RestartCount}}' matrix-bridge
|
||||||
|
```
|
||||||
|
|
||||||
|
- Output e.g. `running|2026-06-15T18:02:11.4Z|0`. Parse field 1 for the badge:
|
||||||
|
- `running` → green/up. Field 3 (`RestartCount`) climbing while status flips to
|
||||||
|
`restarting` ⇒ **crash loop** — show it; that's the most useful signal a dashboard gives here.
|
||||||
|
- `exited` → stopped/crashed.
|
||||||
|
- `restarting` → unhealthy / boot-looping.
|
||||||
|
- **Non-zero exit** (`No such object: matrix-bridge`) ⇒ **not deployed** — distinct from
|
||||||
|
"stopped". Show that state rather than erroring out.
|
||||||
|
|
||||||
|
Friendlier one-liner for a human-readable badge (empty string when not running):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker ps --filter name=^/matrix-bridge$ --format '{{.Status}}' # e.g. "Up 2 hours"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Logs (optional "view logs" action — handy for diagnosing a red badge)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker logs --tail 100 matrix-bridge
|
||||||
|
```
|
||||||
|
|
||||||
|
### Restart (no code change)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker restart matrix-bridge
|
||||||
|
```
|
||||||
|
|
||||||
|
### Update (pull latest code + rebuild + recreate) — the headline button
|
||||||
|
|
||||||
|
```sh
|
||||||
|
cd /home/modelo/matrix-bridge \
|
||||||
|
&& git fetch origin \
|
||||||
|
&& git reset --hard origin/master \
|
||||||
|
&& docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
- `git reset --hard origin/master` is the deploy-box "always match remote" semantic: never gets
|
||||||
|
stuck on divergence, and gitignored secrets are preserved. (If you'd rather detect divergence,
|
||||||
|
`git pull --ff-only` is the gentler alternative — but then a wedged tree needs manual help.)
|
||||||
|
- `docker compose up -d --build` rebuilds the image and recreates the container only if the
|
||||||
|
build changed. First build after a base-image bump is slow (minutes); subsequent builds hit
|
||||||
|
the layer cache. **Treat update as long-running**: stream/await output, set a generous
|
||||||
|
timeout (≥10 min), and don't block the dashboard on it.
|
||||||
|
|
||||||
|
### Stop / Start (optional)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
docker stop matrix-bridge # stop
|
||||||
|
cd /home/modelo/matrix-bridge && docker compose up -d # start (recreates if needed)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Spark Control-side wiring (for the dev)
|
||||||
|
|
||||||
|
Map the above onto however Spark Control already registers a managed Spark/service:
|
||||||
|
|
||||||
|
1. **Register `matrix-bridge`** as a managed service (a tile), targeting `spark-32d0` over the
|
||||||
|
existing SSH channel, commands run as `modelo`.
|
||||||
|
2. **Status badge** ← poll the *Status* command on the panel's normal refresh cadence; map the
|
||||||
|
four states above (running / exited / restarting / not-deployed) to your existing badge
|
||||||
|
vocabulary. Surface `RestartCount` if your tile can show a secondary metric — a climbing
|
||||||
|
count is the crash-loop tell.
|
||||||
|
3. **Buttons:** `Update`, `Restart` (required for the exit criterion); `Logs`, `Stop`/`Start`
|
||||||
|
(optional, nice-to-have).
|
||||||
|
4. **Fail-loud, surfaced.** Every command's non-zero exit + stderr must reach the panel, not a
|
||||||
|
silent failure — this mirrors matrix-bridge's own discipline (a bad launch reports back into
|
||||||
|
the room rather than hanging). Especially: a failed `git fetch` (Gitea unreachable) or a
|
||||||
|
failed build should show the error, not a stuck spinner.
|
||||||
|
5. **`Update` is long-running** — see the timeout/streaming note above.
|
||||||
|
|
||||||
|
What I deliberately left generic: the tile's exact place in Spark Control's code, its UI, and
|
||||||
|
its config schema — that's yours to fit to the existing pattern. If a precise drop-in matters,
|
||||||
|
share how a Spark is currently registered (config entry + the command-runner seam) and I'll
|
||||||
|
tailor steps 1–5 to it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Acceptance (maps to the ROADMAP exit)
|
||||||
|
|
||||||
|
- [ ] Status tile shows the bot's live state and flips correctly across a manual
|
||||||
|
`docker stop` / `docker start` on the Spark.
|
||||||
|
- [ ] `Restart` from the panel cycles the container (status returns to `running`).
|
||||||
|
- [ ] `Update` from the panel pulls a new commit, rebuilds, and recreates the container — and
|
||||||
|
surfaces a clear error if Gitea is unreachable or the build fails.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Note — optional future enhancement (not required for Phase 3)
|
||||||
|
|
||||||
|
The *Status* command reports container liveness (process up), not Matrix connectivity — the bot
|
||||||
|
can be `running` yet disconnected from Synapse. A truer signal would need a Docker `HEALTHCHECK`
|
||||||
|
backed by a bot-side liveness signal (e.g. the bot touches a file or exposes a tiny endpoint on
|
||||||
|
each successful sync loop), after which Status could read `{{.State.Health.Status}}`. That's a
|
||||||
|
matrix-bridge-side change, out of scope here — flag it if/when "running but silent" actually bites.
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
matrix-nio>=0.24
|
||||||
|
tomli>=2.0; python_version < "3.11"
|
||||||
Executable
+45
@@ -0,0 +1,45 @@
|
|||||||
|
#!/bin/zsh -l
|
||||||
|
# ask-claude.sh — matrix-bridge headless "ask" wrapper.
|
||||||
|
#
|
||||||
|
# Invoked over SSH by the bot: ask-claude.sh <repo_dir> <prompt...>
|
||||||
|
# Runs `claude -p` one-shot in the repo and prints the answer to STDOUT, which the bot
|
||||||
|
# captures over the SSH pipe and posts back into the Matrix room. Unlike launch-claude.sh /
|
||||||
|
# gui-launch.sh (interactive, surfaced to the phone), this NEVER opens a GUI Terminal.
|
||||||
|
#
|
||||||
|
# Two seams it owns, both proven the hard way in Phase 0:
|
||||||
|
# - LOGIN shell (-l): a non-login SSH shell loads neither ~/.zprofile nor ~/.zshrc, so
|
||||||
|
# ~/.local/bin isn't on PATH and `claude` isn't found. Same reason as launch-claude.sh.
|
||||||
|
# - Headless auth via CLAUDE_CODE_OAUTH_TOKEN (from `claude setup-token`, stored in ../.env):
|
||||||
|
# a non-GUI SSH session can't reach the login Keychain, so plain `claude -p` reports
|
||||||
|
# "Not logged in" (D11 / Approach A). We export the token to bypass the Keychain.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
script_dir="${0:A:h}"
|
||||||
|
|
||||||
|
# Pull just the token out of ../.env (don't `source` the whole file — other values, e.g. a
|
||||||
|
# password, may not be shell-safe). Absent token => claude reports "Not logged in", reported
|
||||||
|
# back to the room by the bot.
|
||||||
|
env_file="$script_dir/../.env"
|
||||||
|
if [[ -f "$env_file" ]]; then
|
||||||
|
token_line="$(grep -E '^CLAUDE_CODE_OAUTH_TOKEN=' "$env_file" | head -1)"
|
||||||
|
token="${token_line#*=}"
|
||||||
|
token="${token#\"}" # strip one surrounding quote pair if present (KEY="value")
|
||||||
|
token="${token%\"}"
|
||||||
|
export CLAUDE_CODE_OAUTH_TOKEN="$token"
|
||||||
|
fi
|
||||||
|
|
||||||
|
repo_dir="$1"
|
||||||
|
shift
|
||||||
|
prompt="$*"
|
||||||
|
|
||||||
|
if [[ -z "$repo_dir" || -z "$prompt" ]]; then
|
||||||
|
print -u2 "usage: ask-claude.sh <repo_dir> <prompt>"
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Fail loud on a bad directory — never run Claude in the wrong place.
|
||||||
|
cd "$repo_dir" || { print -u2 "ask-claude: no such repo dir: $repo_dir"; exit 1; }
|
||||||
|
|
||||||
|
# < /dev/null: print mode reads stdin by default and otherwise stalls ~3s waiting for it.
|
||||||
|
exec claude -p "$prompt" < /dev/null
|
||||||
@@ -36,6 +36,8 @@ fi
|
|||||||
launch_script="$(mktemp -t mb-launch)"
|
launch_script="$(mktemp -t mb-launch)"
|
||||||
{
|
{
|
||||||
print -r -- '#!/bin/zsh -l'
|
print -r -- '#!/bin/zsh -l'
|
||||||
|
# Propagate a caller-supplied session name (the bot sets this for all-projects launches).
|
||||||
|
[[ -n "$MB_SESSION_NAME" ]] && printf 'export MB_SESSION_NAME=%q\n' "$MB_SESSION_NAME"
|
||||||
printf 'exec %q %q %q\n' "$inner" "$repo_dir" "$prompt"
|
printf 'exec %q %q %q\n' "$inner" "$repo_dir" "$prompt"
|
||||||
} >| "$launch_script"
|
} >| "$launch_script"
|
||||||
chmod +x "$launch_script"
|
chmod +x "$launch_script"
|
||||||
|
|||||||
+201
@@ -0,0 +1,201 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""matrix-bridge bot — Phase 1.
|
||||||
|
|
||||||
|
A text message in a mapped room launches a Claude Code session in that repo on the Mac
|
||||||
|
(ssh -> gui-launch.sh -> launch-claude.sh -> claude), surfaced to the phone by Remote
|
||||||
|
Control. A message in the all-projects room fans out to every mapped repo (each session
|
||||||
|
named "<repo> - <date>"). Launch failures are reported back into the room (fail loud).
|
||||||
|
|
||||||
|
Runs on the Spark, where the SSH alias resolves. Config: ../config.toml Creds: ../.env
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import shlex
|
||||||
|
|
||||||
|
try:
|
||||||
|
import tomllib # py >= 3.11
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
import tomli as tomllib # py < 3.11
|
||||||
|
|
||||||
|
from nio import AsyncClient, MatrixRoom, RoomMessageText
|
||||||
|
|
||||||
|
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
# Headless "ask" mode tunables.
|
||||||
|
ASK_TIMEOUT = 300 # seconds to wait for `claude -p` before giving up
|
||||||
|
MAX_MSG_CHARS = 30000 # split answers into chunks well under Matrix's ~64KB event cap
|
||||||
|
|
||||||
|
|
||||||
|
def load_env(path):
|
||||||
|
env = {}
|
||||||
|
with open(path) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line and not line.startswith("#") and "=" in line:
|
||||||
|
k, v = line.split("=", 1)
|
||||||
|
env[k] = v
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(path):
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
return tomllib.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
def split_message(text, limit=MAX_MSG_CHARS):
|
||||||
|
"""Split text into <=limit-char chunks on newline boundaries (no truncation)."""
|
||||||
|
if len(text) <= limit:
|
||||||
|
return [text]
|
||||||
|
chunks, buf = [], ""
|
||||||
|
for line in text.splitlines(keepends=True):
|
||||||
|
while len(line) > limit: # one oversized line: hard-split it
|
||||||
|
if buf:
|
||||||
|
chunks.append(buf)
|
||||||
|
buf = ""
|
||||||
|
chunks.append(line[:limit])
|
||||||
|
line = line[limit:]
|
||||||
|
if len(buf) + len(line) > limit:
|
||||||
|
chunks.append(buf)
|
||||||
|
buf = ""
|
||||||
|
buf += line
|
||||||
|
if buf:
|
||||||
|
chunks.append(buf)
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
env = load_env(os.path.join(REPO_ROOT, ".env"))
|
||||||
|
cfg = load_config(os.path.join(REPO_ROOT, "config.toml"))
|
||||||
|
|
||||||
|
homeserver = env["MATRIX_HOMESERVER"]
|
||||||
|
user_id = env["MATRIX_USER"]
|
||||||
|
token = env["MATRIX_ACCESS_TOKEN"]
|
||||||
|
device_id = env.get("MATRIX_DEVICE_ID", "matrix-bridge-bot")
|
||||||
|
|
||||||
|
rooms = {r["room_id"]: r for r in cfg.get("room", [])}
|
||||||
|
all_projects_room = cfg.get("all_projects", {}).get("room_id")
|
||||||
|
ssh_alias = os.environ.get("MB_SSH_ALIAS") or cfg["mac"]["ssh_alias"]
|
||||||
|
launcher = cfg["mac"]["launcher"]
|
||||||
|
ask_launcher = cfg["mac"].get("ask_launcher")
|
||||||
|
|
||||||
|
client = AsyncClient(homeserver, user_id)
|
||||||
|
client.restore_login(user_id=user_id, device_id=device_id, access_token=token)
|
||||||
|
|
||||||
|
async def launch(repo_dir, prompt, session_name=None):
|
||||||
|
"""Run gui-launch.sh on the Mac over SSH. Returns (returncode, combined_output).
|
||||||
|
|
||||||
|
All user text is passed through shlex.quote so it survives the remote shell —
|
||||||
|
this is where the cross-shell quoting footgun is actually solved.
|
||||||
|
"""
|
||||||
|
remote = f"{shlex.quote(launcher)} {shlex.quote(repo_dir)} {shlex.quote(prompt)}"
|
||||||
|
if session_name:
|
||||||
|
remote = f"MB_SESSION_NAME={shlex.quote(session_name)} " + remote
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"ssh", ssh_alias, remote,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.STDOUT,
|
||||||
|
)
|
||||||
|
out, _ = await proc.communicate()
|
||||||
|
return proc.returncode, out.decode(errors="replace").strip()
|
||||||
|
|
||||||
|
async def run_ask(repo_dir, prompt):
|
||||||
|
"""Run ask-claude.sh on the Mac over SSH; return (rc, stdout, stderr).
|
||||||
|
|
||||||
|
Headless `claude -p`: its stdout is the answer (captured here), stderr is diagnostics.
|
||||||
|
This path never opens a GUI Terminal and is not surfaced to the phone.
|
||||||
|
"""
|
||||||
|
remote = f"{shlex.quote(ask_launcher)} {shlex.quote(repo_dir)} {shlex.quote(prompt)}"
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
"ssh", ssh_alias, remote,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
out, err = await asyncio.wait_for(proc.communicate(), timeout=ASK_TIMEOUT)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait() # reap the killed ssh client (no zombie)
|
||||||
|
return None, "", f"timed out after {ASK_TIMEOUT}s"
|
||||||
|
return (proc.returncode,
|
||||||
|
out.decode(errors="replace").strip(),
|
||||||
|
err.decode(errors="replace").strip())
|
||||||
|
|
||||||
|
async def say(room_id, text):
|
||||||
|
await client.room_send(
|
||||||
|
room_id, "m.room.message", {"msgtype": "m.text", "body": text}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def launch_one(report_room, repo, prompt, session_name=None):
|
||||||
|
rc, out = await launch(repo["repo_dir"], prompt, session_name)
|
||||||
|
if rc == 0:
|
||||||
|
print(f"launched {repo['label']} -> {repo['repo_dir']}", flush=True)
|
||||||
|
return True
|
||||||
|
print(f"FAILED {repo['label']}: rc={rc} {out[:300]}", flush=True)
|
||||||
|
await say(report_room, f"⚠️ matrix-bridge: failed to launch {repo['label']} "
|
||||||
|
f"(rc={rc}): {out[:300] or 'no output'}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def ask(report_room, repo, prompt):
|
||||||
|
"""Headless ask: run `claude -p` in the repo and post the full answer back."""
|
||||||
|
if not ask_launcher:
|
||||||
|
await say(report_room,
|
||||||
|
"⚠️ matrix-bridge: ask mode not configured ([mac].ask_launcher missing).")
|
||||||
|
return
|
||||||
|
await say(report_room, f"🤔 asking claude in {repo['label']}…")
|
||||||
|
rc, out, err = await run_ask(repo["repo_dir"], prompt)
|
||||||
|
if rc == 0: # success — even an empty answer is not a failure
|
||||||
|
print(f"ask {repo['label']}: {len(out)} chars", flush=True)
|
||||||
|
for chunk in split_message(out or "(claude returned no output)"):
|
||||||
|
await say(report_room, chunk)
|
||||||
|
return
|
||||||
|
detail = err or out or "no output"
|
||||||
|
print(f"ASK FAILED {repo['label']}: rc={rc} {detail[:300]}", flush=True)
|
||||||
|
await say(report_room, f"⚠️ matrix-bridge: ask failed in {repo['label']} "
|
||||||
|
f"(rc={rc}): {detail[:500]}")
|
||||||
|
|
||||||
|
async def on_message(room: MatrixRoom, event: RoomMessageText):
|
||||||
|
if event.sender == user_id:
|
||||||
|
return # never react to our own messages
|
||||||
|
prompt = event.body.strip()
|
||||||
|
if not prompt:
|
||||||
|
return
|
||||||
|
|
||||||
|
if room.room_id == all_projects_room: # fan-out room always launches, never asks
|
||||||
|
date = datetime.date.today().isoformat()
|
||||||
|
print(f"[all-projects] fan-out to {len(rooms)} repos: {prompt!r}", flush=True)
|
||||||
|
results = await asyncio.gather(*[
|
||||||
|
launch_one(room.room_id, r, prompt, f"{r['label']} - {date}")
|
||||||
|
for r in rooms.values()
|
||||||
|
])
|
||||||
|
await say(room.room_id,
|
||||||
|
f"matrix-bridge: launched {sum(results)}/{len(rooms)} sessions ({date}).")
|
||||||
|
elif room.room_id in rooms:
|
||||||
|
r = rooms[room.room_id]
|
||||||
|
if prompt.startswith("?"): # headless ask mode
|
||||||
|
ask_prompt = prompt[1:].strip()
|
||||||
|
if ask_prompt:
|
||||||
|
await ask(room.room_id, r, ask_prompt)
|
||||||
|
elif await launch_one(room.room_id, r, prompt):
|
||||||
|
await say(room.room_id,
|
||||||
|
f"matrix-bridge: launched {r['label']} — drive it on your phone.")
|
||||||
|
|
||||||
|
# Prime the sync token past existing history, THEN register the callback, so the bot
|
||||||
|
# only reacts to messages that arrive after startup (no backlog replay).
|
||||||
|
print("priming sync (skipping backlog)...", flush=True)
|
||||||
|
await client.sync(timeout=30000, full_state=False)
|
||||||
|
client.add_event_callback(on_message, RoomMessageText)
|
||||||
|
who = await client.whoami()
|
||||||
|
print(f"listening as {who.user_id}; {len(rooms)} rooms + all-projects={all_projects_room}",
|
||||||
|
flush=True)
|
||||||
|
try:
|
||||||
|
await client.sync_forever(timeout=30000)
|
||||||
|
finally:
|
||||||
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
asyncio.run(main())
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
pass
|
||||||
Reference in New Issue
Block a user