a02f4db850
wol.py:
- build_magic_packet(): standard 6x0xFF + 16x MAC layout
- send_local_broadcast(): direct from container (ports 9 + 7 for safety)
- send_via_peer(): preferred path; SSHes to the OTHER Spark and runs a Python one-liner there so the packet originates on the target's LAN segment (most reliable)
- MAC validation + normalization
connectivity.py:
- /data/connectivity.json persistence (thread-safe, atomic rename)
- Stores per-Spark current state + last_change timestamp + rolling 200-event log
- Records up/down transitions; computes down_seconds / up_seconds durations
- MAC cache populated lazily during hardware probes
hardware.py:
- Probe now reads MAC via /sys/class/net/<default-route-iface>/address
- After each probe, record_state() emits a transition event if state changed
- record_mac() caches the address so WoL works when the Spark next goes down
Endpoints:
- GET /api/connectivity: macs, current state, last_change, events[]
- POST /api/spark/{name}/wake: tries via-peer first, falls back to direct broadcast
UI:
- Unreachable hardware card shows the cached MAC + 'Wake (WoL)' button (only if MAC known)
- New 'Connectivity log' button opens a modal with per-Spark transition history (last 25 each), including duration of each prior up/down period
- pollHardware also pulls /api/connectivity so WoL buttons appear without an extra fetch
Package: bump 0.5.0:0; main.ts sets CONNECTIVITY_LOG=/data/connectivity.json
70 lines
2.4 KiB
Python
70 lines
2.4 KiB
Python
"""Wake-on-LAN.
|
|
|
|
Two delivery paths, tried in order:
|
|
|
|
1. SSH into the other Spark and have IT broadcast — most reliable because the
|
|
packet originates from the same LAN subnet as the sleeping Spark.
|
|
2. Direct UDP broadcast from this container. May or may not work depending
|
|
on the StartOS container's network namespace.
|
|
|
|
The DGX Spark's NIC must have WoL enabled in firmware/OS for either path to
|
|
actually wake the box; this module just delivers the magic packet correctly.
|
|
"""
|
|
from __future__ import annotations
|
|
import asyncio
|
|
import re
|
|
import socket
|
|
|
|
from .config import Settings
|
|
from .ssh import ssh_run
|
|
|
|
|
|
_MAC_RE = re.compile(r"^[0-9a-fA-F]{2}([:-]?[0-9a-fA-F]{2}){5}$")
|
|
|
|
|
|
def normalize_mac(mac: str) -> str:
|
|
mac = mac.strip().lower()
|
|
if not _MAC_RE.match(mac):
|
|
raise ValueError(f"invalid MAC address: {mac!r}")
|
|
return mac.replace("-", ":")
|
|
|
|
|
|
def build_magic_packet(mac: str) -> bytes:
|
|
mac_bytes = bytes.fromhex(normalize_mac(mac).replace(":", ""))
|
|
return b"\xff" * 6 + mac_bytes * 16
|
|
|
|
|
|
def send_local_broadcast(mac: str, broadcast: str = "255.255.255.255", port: int = 9) -> None:
|
|
"""Send from THIS container. May not reach the LAN in some topologies."""
|
|
pkt = build_magic_packet(mac)
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
try:
|
|
s.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
|
|
s.sendto(pkt, (broadcast, port))
|
|
# Also send to port 7 (alternate WoL convention) for safety
|
|
s.sendto(pkt, (broadcast, 7))
|
|
finally:
|
|
s.close()
|
|
|
|
|
|
async def send_via_peer(host: str, user: str, mac: str, settings: Settings) -> tuple[bool, str]:
|
|
"""Use a different (reachable) Spark to send the WoL packet to its peer.
|
|
|
|
Uses Python 3 (always present on the Sparks for vLLM) to avoid depending on
|
|
wakeonlan / etherwake being installed.
|
|
"""
|
|
normalized = normalize_mac(mac)
|
|
mac_hex = normalized.replace(":", "")
|
|
py = (
|
|
"python3 -c \""
|
|
"import socket; "
|
|
f"m=bytes.fromhex('{mac_hex}'); "
|
|
"s=socket.socket(socket.AF_INET, socket.SOCK_DGRAM); "
|
|
"s.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1); "
|
|
"s.sendto(b'\\xff'*6 + m*16, ('255.255.255.255', 9)); "
|
|
"s.sendto(b'\\xff'*6 + m*16, ('255.255.255.255', 7)); "
|
|
"print('sent')\""
|
|
)
|
|
rc, out, err = await ssh_run(host, user, py, settings, timeout=8)
|
|
return rc == 0 and "sent" in out, (err.strip() or out.strip() or f"rc={rc}")
|