diff --git a/orchestrator/console.go b/orchestrator/console.go index 5004b29..5514d5b 100644 --- a/orchestrator/console.go +++ b/orchestrator/console.go @@ -118,7 +118,7 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error { }) } - // --- Start VM (blocks until snapshot is loaded and VM is running) --- + // --- Start VM (blocks until snapshot is loaded and VM is PAUSED) --- start := time.Now() logger.Infof("restoring clone %d from snapshot ...", id) if err := m.Start(ctx); err != nil { @@ -126,10 +126,10 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error { ptm.Close() return fmt.Errorf("restore clone %d: %w", id, err) } - elapsed := time.Since(start) // Inject per-clone IP config via MMDS so the fc-net-init guest daemon // can configure eth0 without any manual steps inside the VM. + // This must happen while the VM is PAUSED (ResumeVM: false in snapshot load). if cfg.AutoNetConfig && cfg.Bridge != "none" { guestIP := fmt.Sprintf("%s.%d/24", cfg.GuestPrefix, 10+id) if err := configureMmds(ctx, sockPath, guestIP, cfg.GuestGW, "1.1.1.1"); err != nil { @@ -139,6 +139,14 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error { } } + // Now RESUME the VM to start execution! + if err := m.ResumeVM(ctx); err != nil { + pts.Close() + ptm.Close() + return fmt.Errorf("resume clone %d: %w", id, err) + } + elapsed := time.Since(start) + // Release our copy of the slave — firecracker holds its own fd now. // Closing here ensures we get EOF on ptm when firecracker exits. pts.Close() diff --git a/orchestrator/network.go b/orchestrator/network.go index df02c7d..2700cd1 100644 --- a/orchestrator/network.go +++ b/orchestrator/network.go @@ -110,9 +110,12 @@ func configureMmds(ctx context.Context, sockPath, ip, gw, dns string) error { return nil } - // Store the network config the guest daemon will poll for. - // PUT /mmds/config (interface association) was already handled by the SDK - // via AllowMMDS: true on the NetworkInterface before the VM started. + // 1. MMDS configuration (version, network_interfaces binding, etc.) is + // persisted in the golden snapshot, so we don't need to configure it here. + // In fact, Firecracker will reject PUT /mmds/config with a 400 error + // on a restored VM, which previously caused this function to abort early. + + // 2. Store the network config the guest daemon will poll for. return doJSON(http.MethodPut, "/mmds", map[string]string{ "ip": ip, "gw": gw, diff --git a/orchestrator/orchestrator.go b/orchestrator/orchestrator.go index 487a9d1..41cbef2 100644 --- a/orchestrator/orchestrator.go +++ b/orchestrator/orchestrator.go @@ -121,11 +121,13 @@ func (o *Orchestrator) buildRootfs() error { netInitScript := `#!/bin/sh # Poll Firecracker MMDS for network config, apply it, then exit. # Runs in background; loops until MMDS responds (survives snapshot resume). +ip addr add 169.254.169.2/32 dev eth0 2>/dev/null while true; do - ip=$(wget -q -T1 -O- http://169.254.169.254/ip 2>/dev/null) + ip=$(wget -q -T1 -O- http://169.254.169.254/ip 2>/dev/null | tr -d '"') [ -n "$ip" ] || { sleep 1; continue; } - gw=$(wget -q -T1 -O- http://169.254.169.254/gw 2>/dev/null) - dns=$(wget -q -T1 -O- http://169.254.169.254/dns 2>/dev/null) + gw=$(wget -q -T1 -O- http://169.254.169.254/gw 2>/dev/null | tr -d '"') + dns=$(wget -q -T1 -O- http://169.254.169.254/dns 2>/dev/null | tr -d '"') + ip addr flush dev eth0 2>/dev/null ip addr add "$ip" dev eth0 2>/dev/null ip route add default via "$gw" dev eth0 2>/dev/null printf "nameserver %s\n" "$dns" > /etc/resolv.conf diff --git a/orchestrator/snapshot.go b/orchestrator/snapshot.go index f851b82..5f0b42c 100644 --- a/orchestrator/snapshot.go +++ b/orchestrator/snapshot.go @@ -30,7 +30,7 @@ func loadSnapshotWithNetworkOverride(ctx context.Context, sockPath, memPath, vms payload := snapshotLoadRequest{ MemFilePath: memPath, SnapshotPath: vmstatePath, - ResumeVM: true, + ResumeVM: false, // Changed: We pause here so MMDS can be configured BEFORE Resume. NetworkOverrides: []networkOverride{ {IfaceID: "1", HostDevName: tapName}, },