From 8ad1cd8465dfe07710004e0d4f2958cc60e1da0f Mon Sep 17 00:00:00 2001 From: Keysat Date: Mon, 15 Jun 2026 19:50:36 -0500 Subject: [PATCH] Add headless "ask" mode: ?-prefixed message runs claude -p, answer posted back MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A message starting with `?` in a mapped room runs `claude -p` one-shot in that repo on the Mac and posts the full answer back into the room — Matrix as a request/response interface, not just a trigger. Non-`?` messages keep launching interactive sessions as before. New scripts/ask-claude.sh is a login-shell wrapper (so ~/.zprofile puts claude on PATH) that exports CLAUDE_CODE_OAUTH_TOKEN from the Mac's .env and runs `claude -p "$prompt" < /dev/null`, printing the answer to stdout. The bot adds a `?`-dispatch with run_ask/ask: SSH stdout captured, 300s timeout, fail-loud, output chunked under Matrix's event cap (no truncation). Headless claude -p needs the long-lived token because a non-GUI SSH session can't reach the login Keychain (reports "Not logged in") — the deliberate Approach A that the interactive GUI-Terminal path (D11) avoided. Token is kept Mac-side only; the Spark never runs claude. Sovereignty unchanged: claude -p uses the subscription, no frontier API touches message payloads. Proven live on the Spark; fresh-eyes reviewed before commit. --- .env.example | 6 ++++ AGENTS.md | 37 ++++++++++++++++++---- ROADMAP.md | 20 ++++++------ config.example.toml | 1 + scripts/ask-claude.sh | 45 ++++++++++++++++++++++++++ src/bot.py | 74 +++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 164 insertions(+), 19 deletions(-) create mode 100755 scripts/ask-claude.sh diff --git a/.env.example b/.env.example index 775afad..3846d4b 100644 --- a/.env.example +++ b/.env.example @@ -8,3 +8,9 @@ MATRIX_ACCESS_TOKEN= # Optional — kept for recovery / re-minting a token. The bot authenticates with the access token, # not the password (logging in every start would spawn a new device each time). MATRIX_PASSWORD= + +# Headless "ask" mode (the `?`-prefix path). Used MAC-SIDE by scripts/ask-claude.sh, NOT by the +# bot — a non-GUI SSH session can't reach the login Keychain, so `claude -p` needs this token to +# authenticate. Mint once on the Mac: `claude setup-token` (requires a Claude subscription), then +# paste the value here. Lives on the Mac; the Spark never runs claude, so it needs no copy. +CLAUDE_CODE_OAUTH_TOKEN= diff --git a/AGENTS.md b/AGENTS.md index b6382c6..8bf9571 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -74,8 +74,12 @@ v1 decision surface. - `config.example.toml` — room→repo mapping template; the real `config.toml` is gitignored. - `scripts/gui-launch.sh` — opens the desktop Terminal via `osascript` (Approach B, D11); calls `launch-claude.sh` inside it. The bot invokes this over SSH. -- `src/bot.py` — the matrix-nio bot (Phase 1): listens in mapped rooms; on a message runs - `ssh mac-bridge gui-launch.sh`; fans out for all-projects; reports failures back to the room. +- `scripts/ask-claude.sh` — headless `?`-ask wrapper (`#!/bin/zsh -l`): runs `claude -p` in the repo + and prints the answer to stdout for the bot to capture and post back. Uses `CLAUDE_CODE_OAUTH_TOKEN` + (Mac-side `.env`) because a non-GUI SSH session can't reach the login Keychain (D12). +- `src/bot.py` — the matrix-nio bot (Phase 1): listens in mapped rooms; a plain message runs + `ssh mac-bridge gui-launch.sh` (interactive, to the phone), a `?`-prefixed message runs + `ask-claude.sh` (headless, answer posted back); fans out for all-projects; reports failures back. - `requirements.txt` (matrix-nio) · `.env.example` (credential schema; real `.env` gitignored). - `.claude/` — Claude wiring (dir only for now). - `Dockerfile` · `docker-compose.yml` · `docker-entrypoint.sh` · `.dockerignore` — the Phase 1 @@ -125,6 +129,13 @@ Condensed from the scoping workshop. Each: the call, why, what it beat. and is fully unattended, but adds a credential to manage; kept as the documented fallback if the Mac is ever driven headless (logged out). *Cost:* requires the Mac logged in + a one-time Terminal Automation grant. +- **D12 — Headless "ask" mode uses the long-lived token; interactive stays GUI-Terminal (2026-06-16).** + A `?`-prefixed message runs `claude -p` headlessly over plain SSH and posts the answer back, so its + stdout must be captured over the SSH pipe — which rules out the GUI-Terminal path (D11), and a + non-GUI session reports "Not logged in." Ask mode therefore deliberately adopts the long-lived + `claude setup-token` (`CLAUDE_CODE_OAUTH_TOKEN`) that D11 deferred — kept **Mac-side only** (in + `.env`; the Spark never runs claude). Interactive launches keep the token-free GUI-Terminal path. + *Sovereignty unchanged:* `claude -p` uses the subscription, no frontier API touches message payloads. ## Sovereignty constraint @@ -223,9 +234,21 @@ once" is not done. (`modelo@10.59.211.6` — reachable over WireGuard but not authenticated; Phase 0 only set up the reverse, `mac-bridge`). So deploys/restarts on the Spark are run by the owner from the Spark, not driven from the Mac — until Phase 3 wires it behind Spark Control. +- **Headless "ask" mode — SHIPPED + proven on the Spark (2026-06-16).** A `?`-prefixed message in a + mapped room runs `claude -p` one-shot in that repo on the Mac and posts the **full** answer back + into the room (Matrix as request/response, not just a trigger); non-`?` messages launch + interactively as before. New `scripts/ask-claude.sh` (login-shell wrapper: extracts + `CLAUDE_CODE_OAUTH_TOKEN` from the Mac's `.env`, runs `claude -p "$prompt" < /dev/null`); `bot.py` + gained the `?`-dispatch + `run_ask`/`ask` (SSH stdout captured, 300s timeout, fail-loud, output + chunked under Matrix's ~64KB cap). *Why a token (D12):* a non-GUI SSH session can't reach the login + Keychain, so headless `claude -p` reports "Not logged in" — Approach A, kept Mac-side only (the + Spark never runs claude). Fresh-eyes reviewed before commit; P1 nits fixed (reap killed ssh on + timeout; treat rc=0 + empty output as success, not failure). *Proven:* a real `?`-ask in an + already-trusted repo returned the answer into the room. *Open edge:* a `?`-ask in a repo `claude` + has **never** been opened in may stall on the first-run folder-trust gate (Phase 0 caveat) — add a + trust flag to the wrapper if/when hit, not preemptively. - **Next (open — discuss before building):** Phase 2 (multi-room routing) is effectively already - satisfied — the bot was built multi-room (11 rooms + all-projects) and routed correctly across 2 - rooms in the Phase 1 proof; only a formal confirmation pass remains. Live candidates: **Phase 3** - (Spark Control: bot status + one-click update/restart on the dashboard, the SSH-behind-buttons - pattern — also closes the owner-run-ops gap above) or the **headless "ask" mode** from - `ROADMAP.md` (a message runs `claude -p` and posts the answer back into the room). + satisfied (built multi-room; routed correctly across rooms in the Phase 1 proof) — only a formal + confirmation pass remains. Main remaining candidate: **Phase 3** (Spark Control: bot status + + one-click update/restart on the dashboard, the SSH-behind-buttons pattern — also closes the + owner-run-ops gap above). Other backlog in `ROADMAP.md`. diff --git a/ROADMAP.md b/ROADMAP.md index 961d76c..31ffdf8 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -54,13 +54,13 @@ after it. is actually in use. - **E2EE (D9).** Add matrix-nio end-to-end encryption (libolm) if the bot ever handles sensitive content over untrusted transport. Low priority while everything is WireGuard-local. -- **Headless "ask" mode — return output into the chat (no interactive session).** Today a message - opens an interactive session surfaced to the phone. Add a mode where a message instead runs - `claude -p ""` headlessly in the repo (full Claude Code context, but one-shot), captures - stdout, and posts the result back into the Matrix room — Matrix as a request/response interface, - not just a trigger. *Design notes:* `claude -p` (print mode) is exactly this capability. Likely - uses the long-lived OAuth token (Approach A / D11) so it runs over plain SSH with no GUI Terminal - and stdout is captured directly. *Open Qs:* how to select interactive-vs-ask (per-room? a prefix - like `?` / `/ask`? a dedicated room?); output-length handling (truncate / thread / attach file); - same local-only sovereignty constraints apply (output is the user's own; `claude -p` uses the - subscription, no frontier API on message payloads). +- **Headless "ask" mode — SHIPPED 2026-06-16.** A `?`-prefixed message runs `claude -p ""` + one-shot in the room's repo and posts the **full** answer back into the room — Matrix as a + request/response interface, not just a trigger. Built via `scripts/ask-claude.sh` (login-shell + wrapper) + the bot's `?`-dispatch (`run_ask`/`ask`). Resolved design choices: selector = `?` prefix + (per-message; the room still picks the repo); output posted in full, chunked under Matrix's event + cap (no truncation — chosen explicitly); auth = the long-lived `claude setup-token` + (`CLAUDE_CODE_OAUTH_TOKEN`, Approach A / D12) because a non-GUI SSH session can't reach the + Keychain; sovereignty unchanged (`claude -p` uses the subscription, no frontier API on payloads). + *Remaining open Qs:* very-long-output handling beyond chunking (thread / attach file); the + first-run folder-trust gate for a repo `claude` has never been opened in. diff --git a/config.example.toml b/config.example.toml index b948b04..43c70d1 100644 --- a/config.example.toml +++ b/config.example.toml @@ -15,6 +15,7 @@ user = "@matrix-bridge-bot:" # a dedicated bot Matrix account (not [mac] ssh_alias = "mac-bridge" launcher = "/Users/macpro/Projects//scripts/gui-launch.sh" +ask_launcher = "/Users/macpro/Projects//scripts/ask-claude.sh" # headless `?`-prefix ask mode # Container only: docker-entrypoint.sh generates ~/.ssh/config for `ssh_alias` from these. # (On a host with `ssh_alias` already in ~/.ssh/config these are ignored.) hostname = "10.0.0.0" # the Mac's address reachable from the Spark (e.g. WireGuard IP) diff --git a/scripts/ask-claude.sh b/scripts/ask-claude.sh new file mode 100755 index 0000000..7e4c31b --- /dev/null +++ b/scripts/ask-claude.sh @@ -0,0 +1,45 @@ +#!/bin/zsh -l +# ask-claude.sh — matrix-bridge headless "ask" wrapper. +# +# Invoked over SSH by the bot: ask-claude.sh +# Runs `claude -p` one-shot in the repo and prints the answer to STDOUT, which the bot +# captures over the SSH pipe and posts back into the Matrix room. Unlike launch-claude.sh / +# gui-launch.sh (interactive, surfaced to the phone), this NEVER opens a GUI Terminal. +# +# Two seams it owns, both proven the hard way in Phase 0: +# - LOGIN shell (-l): a non-login SSH shell loads neither ~/.zprofile nor ~/.zshrc, so +# ~/.local/bin isn't on PATH and `claude` isn't found. Same reason as launch-claude.sh. +# - Headless auth via CLAUDE_CODE_OAUTH_TOKEN (from `claude setup-token`, stored in ../.env): +# a non-GUI SSH session can't reach the login Keychain, so plain `claude -p` reports +# "Not logged in" (D11 / Approach A). We export the token to bypass the Keychain. + +set -e + +script_dir="${0:A:h}" + +# Pull just the token out of ../.env (don't `source` the whole file — other values, e.g. a +# password, may not be shell-safe). Absent token => claude reports "Not logged in", reported +# back to the room by the bot. +env_file="$script_dir/../.env" +if [[ -f "$env_file" ]]; then + token_line="$(grep -E '^CLAUDE_CODE_OAUTH_TOKEN=' "$env_file" | head -1)" + token="${token_line#*=}" + token="${token#\"}" # strip one surrounding quote pair if present (KEY="value") + token="${token%\"}" + export CLAUDE_CODE_OAUTH_TOKEN="$token" +fi + +repo_dir="$1" +shift +prompt="$*" + +if [[ -z "$repo_dir" || -z "$prompt" ]]; then + print -u2 "usage: ask-claude.sh " + exit 2 +fi + +# Fail loud on a bad directory — never run Claude in the wrong place. +cd "$repo_dir" || { print -u2 "ask-claude: no such repo dir: $repo_dir"; exit 1; } + +# < /dev/null: print mode reads stdin by default and otherwise stalls ~3s waiting for it. +exec claude -p "$prompt" < /dev/null diff --git a/src/bot.py b/src/bot.py index 71ee040..c984bf8 100644 --- a/src/bot.py +++ b/src/bot.py @@ -22,6 +22,10 @@ from nio import AsyncClient, MatrixRoom, RoomMessageText REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +# Headless "ask" mode tunables. +ASK_TIMEOUT = 300 # seconds to wait for `claude -p` before giving up +MAX_MSG_CHARS = 30000 # split answers into chunks well under Matrix's ~64KB event cap + def load_env(path): env = {} @@ -39,6 +43,27 @@ def load_config(path): return tomllib.load(f) +def split_message(text, limit=MAX_MSG_CHARS): + """Split text into <=limit-char chunks on newline boundaries (no truncation).""" + if len(text) <= limit: + return [text] + chunks, buf = [], "" + for line in text.splitlines(keepends=True): + while len(line) > limit: # one oversized line: hard-split it + if buf: + chunks.append(buf) + buf = "" + chunks.append(line[:limit]) + line = line[limit:] + if len(buf) + len(line) > limit: + chunks.append(buf) + buf = "" + buf += line + if buf: + chunks.append(buf) + return chunks + + async def main(): env = load_env(os.path.join(REPO_ROOT, ".env")) cfg = load_config(os.path.join(REPO_ROOT, "config.toml")) @@ -52,6 +77,7 @@ async def main(): all_projects_room = cfg.get("all_projects", {}).get("room_id") ssh_alias = os.environ.get("MB_SSH_ALIAS") or cfg["mac"]["ssh_alias"] launcher = cfg["mac"]["launcher"] + ask_launcher = cfg["mac"].get("ask_launcher") client = AsyncClient(homeserver, user_id) client.restore_login(user_id=user_id, device_id=device_id, access_token=token) @@ -73,6 +99,28 @@ async def main(): out, _ = await proc.communicate() return proc.returncode, out.decode(errors="replace").strip() + async def run_ask(repo_dir, prompt): + """Run ask-claude.sh on the Mac over SSH; return (rc, stdout, stderr). + + Headless `claude -p`: its stdout is the answer (captured here), stderr is diagnostics. + This path never opens a GUI Terminal and is not surfaced to the phone. + """ + remote = f"{shlex.quote(ask_launcher)} {shlex.quote(repo_dir)} {shlex.quote(prompt)}" + proc = await asyncio.create_subprocess_exec( + "ssh", ssh_alias, remote, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + try: + out, err = await asyncio.wait_for(proc.communicate(), timeout=ASK_TIMEOUT) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() # reap the killed ssh client (no zombie) + return None, "", f"timed out after {ASK_TIMEOUT}s" + return (proc.returncode, + out.decode(errors="replace").strip(), + err.decode(errors="replace").strip()) + async def say(room_id, text): await client.room_send( room_id, "m.room.message", {"msgtype": "m.text", "body": text} @@ -88,6 +136,24 @@ async def main(): f"(rc={rc}): {out[:300] or 'no output'}") return False + async def ask(report_room, repo, prompt): + """Headless ask: run `claude -p` in the repo and post the full answer back.""" + if not ask_launcher: + await say(report_room, + "⚠️ matrix-bridge: ask mode not configured ([mac].ask_launcher missing).") + return + await say(report_room, f"🤔 asking claude in {repo['label']}…") + rc, out, err = await run_ask(repo["repo_dir"], prompt) + if rc == 0: # success — even an empty answer is not a failure + print(f"ask {repo['label']}: {len(out)} chars", flush=True) + for chunk in split_message(out or "(claude returned no output)"): + await say(report_room, chunk) + return + detail = err or out or "no output" + print(f"ASK FAILED {repo['label']}: rc={rc} {detail[:300]}", flush=True) + await say(report_room, f"⚠️ matrix-bridge: ask failed in {repo['label']} " + f"(rc={rc}): {detail[:500]}") + async def on_message(room: MatrixRoom, event: RoomMessageText): if event.sender == user_id: return # never react to our own messages @@ -95,7 +161,7 @@ async def main(): if not prompt: return - if room.room_id == all_projects_room: + if room.room_id == all_projects_room: # fan-out room always launches, never asks date = datetime.date.today().isoformat() print(f"[all-projects] fan-out to {len(rooms)} repos: {prompt!r}", flush=True) results = await asyncio.gather(*[ @@ -106,7 +172,11 @@ async def main(): f"matrix-bridge: launched {sum(results)}/{len(rooms)} sessions ({date}).") elif room.room_id in rooms: r = rooms[room.room_id] - if await launch_one(room.room_id, r, prompt): + if prompt.startswith("?"): # headless ask mode + ask_prompt = prompt[1:].strip() + if ask_prompt: + await ask(room.room_id, r, ask_prompt) + elif await launch_one(room.room_id, r, prompt): await say(room.room_id, f"matrix-bridge: launched {r['label']} — drive it on your phone.")