spark-control/image/app/wol.py

"""Wake-on-LAN.

Two delivery paths, tried in order:

  1. SSH into the other Spark and have IT broadcast — most reliable because the
     packet originates from the same LAN subnet as the sleeping Spark.
  2. Direct UDP broadcast from this container. May or may not work depending
     on the StartOS container's network namespace.

The DGX Spark's NIC must have WoL enabled in firmware/OS for either path to
actually wake the box; this module just delivers the magic packet correctly.
"""
from __future__ import annotations
import asyncio
import re
import socket

from .config import Settings
from .ssh import ssh_run


_MAC_RE = re.compile(r"^[0-9a-fA-F]{2}([:-]?[0-9a-fA-F]{2}){5}$")


def normalize_mac(mac: str) -> str:
    mac = mac.strip().lower()
    if not _MAC_RE.match(mac):
        raise ValueError(f"invalid MAC address: {mac!r}")
    return mac.replace("-", ":")


def build_magic_packet(mac: str) -> bytes:
    mac_bytes = bytes.fromhex(normalize_mac(mac).replace(":", ""))
    return b"\xff" * 6 + mac_bytes * 16


def send_local_broadcast(mac: str, broadcast: str = "255.255.255.255", port: int = 9) -> None:
    """Send from THIS container. May not reach the LAN in some topologies."""
    pkt = build_magic_packet(mac)
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    try:
        s.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
        s.sendto(pkt, (broadcast, port))
        # Also send to port 7 (alternate WoL convention) for safety
        s.sendto(pkt, (broadcast, 7))
    finally:
        s.close()


async def send_via_peer(host: str, user: str, mac: str, settings: Settings) -> tuple[bool, str]:
    """Use a different (reachable) Spark to send the WoL packet to its peer.

    Uses Python 3 (always present on the Sparks for vLLM) to avoid depending on
    wakeonlan / etherwake being installed.
    """
    normalized = normalize_mac(mac)
    mac_hex = normalized.replace(":", "")
    py = (
        "python3 -c \""
        "import socket; "
        f"m=bytes.fromhex('{mac_hex}'); "
        "s=socket.socket(socket.AF_INET, socket.SOCK_DGRAM); "
        "s.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1); "
        "s.sendto(b'\\xff'*6 + m*16, ('255.255.255.255', 9)); "
        "s.sendto(b'\\xff'*6 + m*16, ('255.255.255.255', 7)); "
        "print('sent')\""
    )
    rc, out, err = await ssh_run(host, user, py, settings, timeout=8)
    return rc == 0 and "sent" in out, (err.strip() or out.strip() or f"rc={rc}")