email: fix backfill crash on emails with no Reply-To; Sync now retries errored mailboxes (v0.1.0:62)
insert_email's recipients loop did `for a in parsed.get(kind, [])`, but the parser sets reply_to=None when there is no Reply-To header, so .get returns None (key present) and the loop raised 'NoneType' object is not iterable — aborting the entire Gmail backfill on the first such email (i.e. almost immediately). Fixed with `or []`. Regression test test_insert_email.py (reply_to=None, all-None recipients, happy path). Because the scheduler intentionally skips error-status accounts (no retry storms), an errored mailbox would never resume on its own. "Sync now" now clears error status first, so it is an explicit retry; backfill resumes from its saved cursor and dedups by Message-ID, so nothing is re-captured. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -188,7 +188,10 @@ def insert_email(conn: sqlite3.Connection, *, parsed: dict, match_status: str) -
|
||||
elif kind == "reply_to" and parsed.get("reply_to"):
|
||||
addrs = [(parsed["reply_to"], None)]
|
||||
else:
|
||||
for a in parsed.get(kind, []):
|
||||
# `or []` (not get(kind, [])): the key is often present with value None
|
||||
# (e.g. reply_to when there is no Reply-To header), and `for a in None`
|
||||
# would raise TypeError and abort the whole backfill.
|
||||
for a in (parsed.get(kind) or []):
|
||||
if isinstance(a, dict):
|
||||
addrs.append((a.get("email"), a.get("name")))
|
||||
else:
|
||||
|
||||
@@ -377,6 +377,17 @@ def _h_run_now(handler):
|
||||
# Reuse existing rate limit so admins can't hammer this.
|
||||
if handler.rate_limited("email-sync-now", 6):
|
||||
return handler.send_error_json("Too many requests", 429)
|
||||
# A manual sync is an explicit retry. The scheduler intentionally skips
|
||||
# error-status accounts (no retry storms), so clear that status here so a
|
||||
# mailbox that previously errored is re-attempted. Backfill resumes from its
|
||||
# saved cursor and dedups by Message-ID, so nothing is re-captured twice.
|
||||
conn = _conn()
|
||||
try:
|
||||
conn.execute("UPDATE email_accounts SET sync_status = 'pending', sync_error = NULL "
|
||||
"WHERE sync_enabled = 1 AND sync_status = 'error'")
|
||||
conn.commit()
|
||||
finally:
|
||||
conn.close()
|
||||
result = _sched.trigger_run_now()
|
||||
handler.send_json(result)
|
||||
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Regression test for insert_email: a parsed email with no Reply-To header (reply_to=None)
|
||||
must not crash the recipients loop. This bug (`for a in parsed.get('reply_to', [])` returning
|
||||
None because the key is present with value None) aborted the whole Gmail backfill on the first
|
||||
email lacking a Reply-To header. Synthetic data only (guardrail #9).
|
||||
Run: cd backend && python3 email_integration/test_insert_email.py
|
||||
"""
|
||||
import os
|
||||
import sqlite3
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from email_integration import db as edb # noqa: E402
|
||||
|
||||
FAILS = []
|
||||
|
||||
|
||||
def check(cond, msg):
|
||||
print((" PASS " if cond else " FAIL ") + msg)
|
||||
if not cond:
|
||||
FAILS.append(msg)
|
||||
|
||||
|
||||
def fresh_conn():
|
||||
conn = sqlite3.connect(":memory:")
|
||||
conn.row_factory = sqlite3.Row
|
||||
edb.apply_migrations(conn.cursor())
|
||||
return conn
|
||||
|
||||
|
||||
def main():
|
||||
# 1) the exact crash case: no Reply-To header -> reply_to is None
|
||||
conn = fresh_conn()
|
||||
parsed = {
|
||||
"rfc_message_id": "<m1@example.com>",
|
||||
"from_email": "lp@example.com", "from_name": "An LP",
|
||||
"sent_at": "2026-05-01T10:00:00Z", "subject": "Re: the fund",
|
||||
"to": [{"email": "grant@ten31.xyz", "name": "Grant"}],
|
||||
"cc": [], "bcc": [], "references": [],
|
||||
"reply_to": None, # <-- previously crashed: 'NoneType' object is not iterable
|
||||
"body_text": "Some concern about lock-up.",
|
||||
}
|
||||
try:
|
||||
eid = edb.insert_email(conn, parsed=parsed, match_status="unmatched")
|
||||
ok = bool(eid)
|
||||
except TypeError as e:
|
||||
ok = False
|
||||
print(" (raised)", e)
|
||||
check(ok, "insert_email with reply_to=None does not raise")
|
||||
if ok:
|
||||
kinds = sorted(r["kind"] for r in conn.execute("SELECT kind FROM email_recipients WHERE email_id=?", (eid,)))
|
||||
check(kinds == ["from", "to"], f"recipients are from+to, reply_to skipped (got {kinds})")
|
||||
|
||||
# 2) defensive: every address field present-but-None must not crash either
|
||||
conn2 = fresh_conn()
|
||||
parsed2 = {
|
||||
"rfc_message_id": "<m2@example.com>", "from_email": "x@example.com",
|
||||
"sent_at": "2026-05-02T10:00:00Z",
|
||||
"to": None, "cc": None, "bcc": None, "references": None, "reply_to": None,
|
||||
"body_text": "no recipients parsed",
|
||||
}
|
||||
try:
|
||||
eid2 = edb.insert_email(conn2, parsed=parsed2, match_status="unmatched")
|
||||
ok2 = bool(eid2)
|
||||
except TypeError as e:
|
||||
ok2 = False
|
||||
print(" (raised)", e)
|
||||
check(ok2, "insert_email with all recipient fields None does not raise")
|
||||
|
||||
# 3) the happy path still records a real Reply-To
|
||||
conn3 = fresh_conn()
|
||||
parsed3 = dict(parsed, rfc_message_id="<m3@example.com>", reply_to="replies@example.com")
|
||||
eid3 = edb.insert_email(conn3, parsed=parsed3, match_status="matched")
|
||||
rt = conn3.execute("SELECT address FROM email_recipients WHERE email_id=? AND kind='reply_to'", (eid3,)).fetchone()
|
||||
check(rt and rt["address"] == "replies@example.com", "a present Reply-To is still recorded")
|
||||
|
||||
if FAILS:
|
||||
print(f"\nFAILED ({len(FAILS)})")
|
||||
for f in FAILS:
|
||||
print(" - " + f)
|
||||
sys.exit(1)
|
||||
print("\nALL PASS (insert_email reply_to/None regression)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -26,8 +26,9 @@ export const PACKAGE_TITLE = 'Ten31 Database'
|
||||
// * 0.1.0:58 (seed 5 Architect positioning framings into the Workshop as candidate options)
|
||||
// * 0.1.0:59 (Email Capture admin panel + matched email into the grounding corpus)
|
||||
// * 0.1.0:60 (Email Capture: single-mailbox enroll field for testing)
|
||||
// * Current: 0.1.0:61 (Email Capture: live backfill progress + auto-refresh)
|
||||
export const PACKAGE_VERSION = '0.1.0:61'
|
||||
// * 0.1.0:61 (Email Capture: live backfill progress + auto-refresh)
|
||||
// * Current: 0.1.0:62 (fix backfill crash on no-Reply-To emails; Sync now retries errored mailboxes)
|
||||
export const PACKAGE_VERSION = '0.1.0:62'
|
||||
|
||||
export const DATA_MOUNT_PATH = '/data'
|
||||
export const WEB_PORT = 8080
|
||||
|
||||
@@ -22,8 +22,9 @@ import { v_0_1_0_58 } from './v0.1.0.58'
|
||||
import { v_0_1_0_59 } from './v0.1.0.59'
|
||||
import { v_0_1_0_60 } from './v0.1.0.60'
|
||||
import { v_0_1_0_61 } from './v0.1.0.61'
|
||||
import { v_0_1_0_62 } from './v0.1.0.62'
|
||||
|
||||
export const versionGraph = VersionGraph.of({
|
||||
current: v_0_1_0_61,
|
||||
other: [v_0_1_0_39, v_0_1_0_40, v_0_1_0_41, v_0_1_0_42, v_0_1_0_43, v_0_1_0_44, v_0_1_0_45, v_0_1_0_46, v_0_1_0_47, v_0_1_0_48, v_0_1_0_49, v_0_1_0_50, v_0_1_0_51, v_0_1_0_52, v_0_1_0_53, v_0_1_0_54, v_0_1_0_55, v_0_1_0_56, v_0_1_0_57, v_0_1_0_58, v_0_1_0_59, v_0_1_0_60],
|
||||
current: v_0_1_0_62,
|
||||
other: [v_0_1_0_39, v_0_1_0_40, v_0_1_0_41, v_0_1_0_42, v_0_1_0_43, v_0_1_0_44, v_0_1_0_45, v_0_1_0_46, v_0_1_0_47, v_0_1_0_48, v_0_1_0_49, v_0_1_0_50, v_0_1_0_51, v_0_1_0_52, v_0_1_0_53, v_0_1_0_54, v_0_1_0_55, v_0_1_0_56, v_0_1_0_57, v_0_1_0_58, v_0_1_0_59, v_0_1_0_60, v_0_1_0_61],
|
||||
})
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
import { VersionInfo } from '@start9labs/start-sdk'
|
||||
|
||||
// Bugfix: the Gmail backfill crashed with "'NoneType' object is not iterable" on any
|
||||
// email with no Reply-To header (parsed reply_to=None hit `for a in parsed.get(kind, [])`,
|
||||
// which returns None when the key is present-but-None) — aborting the whole backfill.
|
||||
// Fixed with `or []`. Also: "Sync now" now clears error status so a previously-errored
|
||||
// mailbox is retried (the scheduler deliberately skips error accounts); backfill resumes
|
||||
// from its saved cursor and dedups by Message-ID. No schema migration.
|
||||
export const v_0_1_0_62 = VersionInfo.of({
|
||||
version: '0.1.0:62',
|
||||
releaseNotes: {
|
||||
en_US: [
|
||||
'Fixes a bug that stopped the Gmail backfill on any email without a Reply-To header',
|
||||
'(most of them), which is why capture stalled. After updating, open Email Capture and',
|
||||
'click Sync now to resume — it picks up where it left off and will not double-capture.',
|
||||
].join(' '),
|
||||
},
|
||||
migrations: { up: async () => {}, down: async () => {} },
|
||||
})
|
||||
Reference in New Issue
Block a user