From 5e23e0ab4ef9f4ee65a3138cf6038b664309f21e Mon Sep 17 00:00:00 2001 From: Honza Novak Date: Mon, 13 Apr 2026 11:58:59 +0000 Subject: [PATCH] feat: add guest network autoconfiguration via Firecracker MMDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces optional per-clone IP assignment using the Firecracker Microvm Metadata Service (MMDS). A background daemon (fc-net-init) is baked into the rootfs during init and captured in the golden snapshot — on clone resume it polls 169.254.169.254 and applies the IP/GW/DNS config injected by the orchestrator immediately after snapshot restore. - config.go: add AutoNetConfig bool (FC_AUTO_NET_CONFIG=1) - orchestrator.go: embed fc-net-init daemon + MMDS link-local route in init script; set AllowMMDS: true on golden NIC; spawnOne/SpawnSingle accept net bool and propagate it via FC_AUTO_NET_CONFIG in proxy env - console.go: set AllowMMDS: true on clone NIC; call configureMmds() after m.Start() when AutoNetConfig is enabled - network.go: add configureMmds() — PUT /mmds with ip/gw/dns over the clone's Firecracker Unix socket - serve.go: POST /clones accepts optional {"net": bool} body to override the global AutoNetConfig default per-request - web/terminal.html: spawn button always sends {"net": true} - docs/commands.md: document manual config + MMDS autoconfiguration Co-Authored-By: Claude Sonnet 4.6 --- docs/commands.md | 64 +++++++++++++++++++++++++++++++--- orchestrator/config.go | 18 +++++----- orchestrator/console.go | 12 +++++++ orchestrator/network.go | 54 ++++++++++++++++++++++++++++ orchestrator/orchestrator.go | 48 ++++++++++++++++++++++--- orchestrator/serve.go | 14 +++++++- orchestrator/web/terminal.html | 6 +++- 7 files changed, 197 insertions(+), 19 deletions(-) diff --git a/docs/commands.md b/docs/commands.md index 46f19aa..82d05e6 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -35,8 +35,9 @@ All tunables are set via environment variables. Every variable has a default; no | `FC_MEM_MIB` | `128` | Memory per VM in MiB | | `FC_BRIDGE` | `fcbr0` | Host bridge name. Set to `none` to disable all networking | | `FC_BRIDGE_CIDR` | `172.30.0.1/24` | IP address and prefix assigned to the host bridge | -| `FC_GUEST_PREFIX` | `172.30.0` | IP prefix for guest address allocation | -| `FC_GUEST_GW` | `172.30.0.1` | Default gateway advertised to guests | +| `FC_GUEST_PREFIX` | `172.30.0` | IP prefix for guest address allocation (used with `FC_AUTO_NET_CONFIG`) | +| `FC_GUEST_GW` | `172.30.0.1` | Default gateway advertised to guests (used with `FC_AUTO_NET_CONFIG`) | +| `FC_AUTO_NET_CONFIG` | _(unset)_ | Set to `1` to automatically assign guest IPs via MMDS on clone start | Kernel boot arguments are hardcoded and not user-configurable: @@ -95,10 +96,49 @@ When `FC_BRIDGE` is not `none` (the default), a Linux bridge and per-VM TAP devi └── fctapN (clone N) ``` -Each clone receives a unique TAP device and MAC address (`AA:FC:00:00:XX:XX`). IP assignment inside the guest is the guest OS's responsibility (the rootfs init script only brings `eth0` up; no DHCP server is included). +Each clone receives a unique TAP device and MAC address (`AA:FC:00:00:XX:XX`). The host-side bridge has NAT masquerading enabled so guests can reach the internet through the host's default route. Set `FC_BRIDGE=none` to skip all network configuration. VMs will boot without a network interface. +### Guest IP assignment + +The rootfs init script brings `eth0` up at the link layer only. Guests have no IP address by default. There are two ways to configure networking inside a VM: + +#### Manual configuration (inside the VM console) + +```sh +# Pick an unused IP in the bridge subnet — e.g. .11 for clone 1, .12 for clone 2 +ip addr add 172.30.0.11/24 dev eth0 +ip route add default via 172.30.0.1 +echo "nameserver 1.1.1.1" > /etc/resolv.conf +ping 1.1.1.1 # verify +``` + +Manual config is ephemeral — it is lost when the clone is stopped. Use the automatic option below for persistent configuration. + +#### Automatic configuration via MMDS (`FC_AUTO_NET_CONFIG=1`) + +When `FC_AUTO_NET_CONFIG=1` is set, the orchestrator uses the Firecracker **Microvm Metadata Service (MMDS)** to inject per-clone network config immediately after the VM starts. A small background daemon embedded in the rootfs (`/sbin/fc-net-init`) polls `169.254.169.254` and applies the config automatically — no manual steps needed. + +IPs are assigned deterministically from `FC_GUEST_PREFIX`: + +``` +clone 1 → 172.30.0.11/24 +clone 2 → 172.30.0.12/24 +… +clone N → 172.30.0.(10+N)/24 +``` + +Usage: + +```sh +sudo FC_AUTO_NET_CONFIG=1 ./fc-orch start +``` + +Within ~1–2 seconds of clone start, `eth0` inside the VM will have the assigned IP, default route, and DNS (`1.1.1.1`) configured. + +> **Note:** `FC_AUTO_NET_CONFIG` requires `fc-orch init` and `fc-orch golden` to have been run (or re-run) after this feature was added, so that the `fc-net-init` daemon is present in the golden snapshot. + --- ## `init` @@ -511,13 +551,27 @@ The following steps are performed once for each requested clone. Let `{id}` be t Restoration time (from `m.Start` call to return) is measured and logged. -11. **Record PID** +11. **Inject network config via MMDS** (only when `FC_AUTO_NET_CONFIG=1` and networking is enabled) + + Immediately after the snapshot is restored, the orchestrator configures the MMDS for this clone via two API calls to the clone's Firecracker socket: + + ``` + PUT /mmds/config + {"version": "V1", "network_interfaces": ["1"]} + + PUT /mmds + {"ip": "172.30.0.{10+id}/24", "gw": "172.30.0.1", "dns": "1.1.1.1"} + ``` + + The `fc-net-init` daemon already running inside the guest (started during golden VM boot, captured in the snapshot) polls `169.254.169.254` via a link-local route and applies the config to `eth0` within ~1 second of clone resume. + +12. **Record PID** ```sh echo {pid} > /tmp/fc-orch/pids/clone-{id}.pid ``` -12. **Register clone in memory** +13. **Register clone in memory** The running clone is tracked in an in-process map keyed by clone ID, holding the Firecracker SDK handle, context cancel function, and TAP device name. This allows `kill` to cleanly terminate clones started in the same process invocation. diff --git a/orchestrator/config.go b/orchestrator/config.go index f28fea2..44d8b8c 100644 --- a/orchestrator/config.go +++ b/orchestrator/config.go @@ -14,11 +14,12 @@ type Config struct { Rootfs string // path to base rootfs.ext4 VCPUs int64 MemMiB int64 - Bridge string // host bridge name, or "none" to skip networking - BridgeCIDR string // e.g. "172.30.0.1/24" - GuestPrefix string // e.g. "172.30.0" — clones get .10, .11, ... - GuestGW string - BootArgs string + Bridge string // host bridge name, or "none" to skip networking + BridgeCIDR string // e.g. "172.30.0.1/24" + GuestPrefix string // e.g. "172.30.0" — clones get .11, .12, ... + GuestGW string // default gateway for guest VMs + AutoNetConfig bool // inject guest IP/GW/DNS via MMDS on clone start + BootArgs string } func DefaultConfig() Config { @@ -29,9 +30,10 @@ func DefaultConfig() Config { MemMiB: envOrInt("FC_MEM_MIB", 128), Bridge: envOr("FC_BRIDGE", "fcbr0"), BridgeCIDR: envOr("FC_BRIDGE_CIDR", "172.30.0.1/24"), - GuestPrefix: envOr("FC_GUEST_PREFIX", "172.30.0"), - GuestGW: envOr("FC_GUEST_GW", "172.30.0.1"), - BootArgs: "console=ttyS0 reboot=k panic=1 pci=off i8042.noaux quiet loglevel=0", + GuestPrefix: envOr("FC_GUEST_PREFIX", "172.30.0"), + GuestGW: envOr("FC_GUEST_GW", "172.30.0.1"), + AutoNetConfig: envOr("FC_AUTO_NET_CONFIG", "") == "1", + BootArgs: "console=ttyS0 reboot=k panic=1 pci=off i8042.noaux quiet loglevel=0", } c.Kernel = envOr("FC_KERNEL", c.BaseDir+"/vmlinux") c.KernelURL = envOr("FC_KERNEL_URL", diff --git a/orchestrator/console.go b/orchestrator/console.go index 82780d7..5004b29 100644 --- a/orchestrator/console.go +++ b/orchestrator/console.go @@ -88,6 +88,7 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error { MacAddress: mac, HostDevName: tapName, }, + AllowMMDS: true, }, } } @@ -127,6 +128,17 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error { } elapsed := time.Since(start) + // Inject per-clone IP config via MMDS so the fc-net-init guest daemon + // can configure eth0 without any manual steps inside the VM. + if cfg.AutoNetConfig && cfg.Bridge != "none" { + guestIP := fmt.Sprintf("%s.%d/24", cfg.GuestPrefix, 10+id) + if err := configureMmds(ctx, sockPath, guestIP, cfg.GuestGW, "1.1.1.1"); err != nil { + logger.Warnf("MMDS config failed (guest network will be unconfigured): %v", err) + } else { + logger.Infof("MMDS: assigned %s gw %s to clone %d", guestIP, cfg.GuestGW, id) + } + } + // Release our copy of the slave — firecracker holds its own fd now. // Closing here ensures we get EOF on ptm when firecracker exits. pts.Close() diff --git a/orchestrator/network.go b/orchestrator/network.go index d5d25bd..df02c7d 100644 --- a/orchestrator/network.go +++ b/orchestrator/network.go @@ -1,7 +1,13 @@ package orchestrator import ( + "bytes" + "context" + "encoding/json" "fmt" + "io" + "net" + "net/http" "os/exec" "strings" ) @@ -66,6 +72,54 @@ func destroyTap(name string) { _ = run("ip", "link", "del", name) } +// configureMmds writes per-clone IP config to the Firecracker MMDS so that +// the fc-net-init daemon running inside the guest can read and apply it. +// It makes two API calls to the Firecracker Unix socket: +// +// 1. PUT /mmds/config — associates MMDS with the guest's first NIC ("1") +// 2. PUT /mmds — stores ip/gw/dns values the guest daemon will read +func configureMmds(ctx context.Context, sockPath, ip, gw, dns string) error { + httpClient := &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + return net.Dial("unix", sockPath) + }, + }, + } + + doJSON := func(method, path string, body any) error { + data, err := json.Marshal(body) + if err != nil { + return fmt.Errorf("marshal %s: %w", path, err) + } + req, err := http.NewRequestWithContext(ctx, method, + "http://localhost"+path, bytes.NewReader(data)) + if err != nil { + return fmt.Errorf("build request %s: %w", path, err) + } + req.Header.Set("Content-Type", "application/json") + resp, err := httpClient.Do(req) + if err != nil { + return fmt.Errorf("%s %s: %w", method, path, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNoContent { + b, _ := io.ReadAll(resp.Body) + return fmt.Errorf("%s %s failed (%d): %s", method, path, resp.StatusCode, b) + } + return nil + } + + // Store the network config the guest daemon will poll for. + // PUT /mmds/config (interface association) was already handled by the SDK + // via AllowMMDS: true on the NetworkInterface before the VM started. + return doJSON(http.MethodPut, "/mmds", map[string]string{ + "ip": ip, + "gw": gw, + "dns": dns, + }) +} + // run executes a command, returning an error if it fails. func run(name string, args ...string) error { return exec.Command(name, args...).Run() diff --git a/orchestrator/orchestrator.go b/orchestrator/orchestrator.go index c17ed9f..487a9d1 100644 --- a/orchestrator/orchestrator.go +++ b/orchestrator/orchestrator.go @@ -115,12 +115,36 @@ func (o *Orchestrator) buildRootfs() error { return err } + // write fc-net-init daemon: polls MMDS for IP config and applies it. + // Always embedded — harmless if MMDS is never populated (sleeps 1 s/loop). + // Captured in the golden snapshot so it runs on every clone resume too. + netInitScript := `#!/bin/sh +# Poll Firecracker MMDS for network config, apply it, then exit. +# Runs in background; loops until MMDS responds (survives snapshot resume). +while true; do + ip=$(wget -q -T1 -O- http://169.254.169.254/ip 2>/dev/null) + [ -n "$ip" ] || { sleep 1; continue; } + gw=$(wget -q -T1 -O- http://169.254.169.254/gw 2>/dev/null) + dns=$(wget -q -T1 -O- http://169.254.169.254/dns 2>/dev/null) + ip addr add "$ip" dev eth0 2>/dev/null + ip route add default via "$gw" dev eth0 2>/dev/null + printf "nameserver %s\n" "$dns" > /etc/resolv.conf + break +done +` + os.MkdirAll(filepath.Join(mnt, "sbin"), 0o755) + if err := os.WriteFile(filepath.Join(mnt, "sbin", "fc-net-init"), []byte(netInitScript), 0o755); err != nil { + return err + } + // write init script initScript := `#!/bin/sh mount -t proc proc /proc mount -t sysfs sys /sys mount -t devtmpfs devtmpfs /dev ip link set eth0 up 2>/dev/null +ip route add 169.254.169.254 dev eth0 2>/dev/null +/sbin/fc-net-init & ` initPath := filepath.Join(mnt, "etc", "init.d", "rcS") os.MkdirAll(filepath.Dir(initPath), 0o755) @@ -175,6 +199,7 @@ func (o *Orchestrator) Golden() error { MacAddress: "AA:FC:00:00:00:01", HostDevName: tap, }, + AllowMMDS: true, }, } } @@ -294,7 +319,7 @@ func (o *Orchestrator) Spawn(count int) error { for i := 0; i < count; i++ { id := o.nextCloneID() - if err := o.spawnOne(id); err != nil { + if err := o.spawnOne(id, o.cfg.AutoNetConfig); err != nil { o.log.Errorf("clone %d failed: %v", id, err) continue } @@ -308,7 +333,10 @@ func (o *Orchestrator) Spawn(count int) error { // SpawnSingle spawns exactly one new clone and returns its ID. // It is safe to call from multiple goroutines (nextCloneID is serialised by the // filesystem scan, and each clone gets its own directory/tap). -func (o *Orchestrator) SpawnSingle() (int, error) { +// SpawnSingle spawns one clone. net controls whether the guest receives +// automatic IP configuration via MMDS (overrides FC_AUTO_NET_CONFIG for this +// clone). Pass cfg.AutoNetConfig to preserve the global default. +func (o *Orchestrator) SpawnSingle(net bool) (int, error) { goldenDir := o.goldenDir() for _, f := range []string{"vmstate", "mem"} { if _, err := os.Stat(filepath.Join(goldenDir, f)); err != nil { @@ -323,7 +351,7 @@ func (o *Orchestrator) SpawnSingle() (int, error) { } } id := o.nextCloneID() - if err := o.spawnOne(id); err != nil { + if err := o.spawnOne(id, net); err != nil { return 0, err } return id, nil @@ -349,7 +377,7 @@ func (o *Orchestrator) KillClone(id int) error { return nil } -func (o *Orchestrator) spawnOne(id int) error { +func (o *Orchestrator) spawnOne(id int, net bool) error { goldenDir := o.goldenDir() cloneDir := filepath.Join(o.clonesDir(), strconv.Itoa(id)) os.MkdirAll(cloneDir, 0o755) @@ -399,6 +427,18 @@ func (o *Orchestrator) spawnOne(id int) error { proxyCmd.Stdin = nil proxyCmd.Stdout = nil proxyCmd.Stderr = nil + // Build proxy env: inherit parent env, then force FC_AUTO_NET_CONFIG to + // match the per-clone net flag so the proxy picks it up via DefaultConfig(). + proxyEnv := make([]string, 0, len(os.Environ())+1) + for _, kv := range os.Environ() { + if !strings.HasPrefix(kv, "FC_AUTO_NET_CONFIG=") { + proxyEnv = append(proxyEnv, kv) + } + } + if net { + proxyEnv = append(proxyEnv, "FC_AUTO_NET_CONFIG=1") + } + proxyCmd.Env = proxyEnv if err := proxyCmd.Start(); err != nil { return fmt.Errorf("start console proxy: %w", err) diff --git a/orchestrator/serve.go b/orchestrator/serve.go index 5610513..5ee6e8a 100644 --- a/orchestrator/serve.go +++ b/orchestrator/serve.go @@ -53,7 +53,19 @@ func Serve(orch *Orchestrator, addr string) error { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(ids) //nolint:errcheck case http.MethodPost: - id, err := orch.SpawnSingle() + // Optional JSON body: {"net": bool} + // Defaults to the server's FC_AUTO_NET_CONFIG setting. + var req struct { + Net *bool `json:"net"` + } + if r.ContentLength > 0 { + json.NewDecoder(r.Body).Decode(&req) //nolint:errcheck + } + net := orch.cfg.AutoNetConfig + if req.Net != nil { + net = *req.Net + } + id, err := orch.SpawnSingle(net) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return diff --git a/orchestrator/web/terminal.html b/orchestrator/web/terminal.html index 5db7e60..b665b9f 100644 --- a/orchestrator/web/terminal.html +++ b/orchestrator/web/terminal.html @@ -200,7 +200,11 @@ spawnBtn.disabled = true; spawnBtn.textContent = 'Spawning…'; clearError(); - fetch('/clones', { method: 'POST' }) + fetch('/clones', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ net: true }), + }) .then(r => { if (!r.ok) return r.text().then(t => { throw new Error(t); }); return r.json();