fix: pause VM before MMDS injection, resume after to ensure config is applied
- Load snapshot with ResumeVM: false so MMDS data can be written while VM is paused - Call ResumeVM explicitly after configureMmds succeeds - Skip PUT /mmds/config on restored VMs (Firecracker rejects it with 400) - Strip JSON quotes from MMDS values with tr -d '"' in net-init script - Add 169.254.169.2/32 link-local addr and flush eth0 before applying new IP Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,7 +118,7 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Start VM (blocks until snapshot is loaded and VM is running) ---
|
// --- Start VM (blocks until snapshot is loaded and VM is PAUSED) ---
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
logger.Infof("restoring clone %d from snapshot ...", id)
|
logger.Infof("restoring clone %d from snapshot ...", id)
|
||||||
if err := m.Start(ctx); err != nil {
|
if err := m.Start(ctx); err != nil {
|
||||||
@@ -126,10 +126,10 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error {
|
|||||||
ptm.Close()
|
ptm.Close()
|
||||||
return fmt.Errorf("restore clone %d: %w", id, err)
|
return fmt.Errorf("restore clone %d: %w", id, err)
|
||||||
}
|
}
|
||||||
elapsed := time.Since(start)
|
|
||||||
|
|
||||||
// Inject per-clone IP config via MMDS so the fc-net-init guest daemon
|
// Inject per-clone IP config via MMDS so the fc-net-init guest daemon
|
||||||
// can configure eth0 without any manual steps inside the VM.
|
// can configure eth0 without any manual steps inside the VM.
|
||||||
|
// This must happen while the VM is PAUSED (ResumeVM: false in snapshot load).
|
||||||
if cfg.AutoNetConfig && cfg.Bridge != "none" {
|
if cfg.AutoNetConfig && cfg.Bridge != "none" {
|
||||||
guestIP := fmt.Sprintf("%s.%d/24", cfg.GuestPrefix, 10+id)
|
guestIP := fmt.Sprintf("%s.%d/24", cfg.GuestPrefix, 10+id)
|
||||||
if err := configureMmds(ctx, sockPath, guestIP, cfg.GuestGW, "1.1.1.1"); err != nil {
|
if err := configureMmds(ctx, sockPath, guestIP, cfg.GuestGW, "1.1.1.1"); err != nil {
|
||||||
@@ -139,6 +139,14 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Now RESUME the VM to start execution!
|
||||||
|
if err := m.ResumeVM(ctx); err != nil {
|
||||||
|
pts.Close()
|
||||||
|
ptm.Close()
|
||||||
|
return fmt.Errorf("resume clone %d: %w", id, err)
|
||||||
|
}
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
// Release our copy of the slave — firecracker holds its own fd now.
|
// Release our copy of the slave — firecracker holds its own fd now.
|
||||||
// Closing here ensures we get EOF on ptm when firecracker exits.
|
// Closing here ensures we get EOF on ptm when firecracker exits.
|
||||||
pts.Close()
|
pts.Close()
|
||||||
|
|||||||
@@ -110,9 +110,12 @@ func configureMmds(ctx context.Context, sockPath, ip, gw, dns string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the network config the guest daemon will poll for.
|
// 1. MMDS configuration (version, network_interfaces binding, etc.) is
|
||||||
// PUT /mmds/config (interface association) was already handled by the SDK
|
// persisted in the golden snapshot, so we don't need to configure it here.
|
||||||
// via AllowMMDS: true on the NetworkInterface before the VM started.
|
// In fact, Firecracker will reject PUT /mmds/config with a 400 error
|
||||||
|
// on a restored VM, which previously caused this function to abort early.
|
||||||
|
|
||||||
|
// 2. Store the network config the guest daemon will poll for.
|
||||||
return doJSON(http.MethodPut, "/mmds", map[string]string{
|
return doJSON(http.MethodPut, "/mmds", map[string]string{
|
||||||
"ip": ip,
|
"ip": ip,
|
||||||
"gw": gw,
|
"gw": gw,
|
||||||
|
|||||||
@@ -121,11 +121,13 @@ func (o *Orchestrator) buildRootfs() error {
|
|||||||
netInitScript := `#!/bin/sh
|
netInitScript := `#!/bin/sh
|
||||||
# Poll Firecracker MMDS for network config, apply it, then exit.
|
# Poll Firecracker MMDS for network config, apply it, then exit.
|
||||||
# Runs in background; loops until MMDS responds (survives snapshot resume).
|
# Runs in background; loops until MMDS responds (survives snapshot resume).
|
||||||
|
ip addr add 169.254.169.2/32 dev eth0 2>/dev/null
|
||||||
while true; do
|
while true; do
|
||||||
ip=$(wget -q -T1 -O- http://169.254.169.254/ip 2>/dev/null)
|
ip=$(wget -q -T1 -O- http://169.254.169.254/ip 2>/dev/null | tr -d '"')
|
||||||
[ -n "$ip" ] || { sleep 1; continue; }
|
[ -n "$ip" ] || { sleep 1; continue; }
|
||||||
gw=$(wget -q -T1 -O- http://169.254.169.254/gw 2>/dev/null)
|
gw=$(wget -q -T1 -O- http://169.254.169.254/gw 2>/dev/null | tr -d '"')
|
||||||
dns=$(wget -q -T1 -O- http://169.254.169.254/dns 2>/dev/null)
|
dns=$(wget -q -T1 -O- http://169.254.169.254/dns 2>/dev/null | tr -d '"')
|
||||||
|
ip addr flush dev eth0 2>/dev/null
|
||||||
ip addr add "$ip" dev eth0 2>/dev/null
|
ip addr add "$ip" dev eth0 2>/dev/null
|
||||||
ip route add default via "$gw" dev eth0 2>/dev/null
|
ip route add default via "$gw" dev eth0 2>/dev/null
|
||||||
printf "nameserver %s\n" "$dns" > /etc/resolv.conf
|
printf "nameserver %s\n" "$dns" > /etc/resolv.conf
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ func loadSnapshotWithNetworkOverride(ctx context.Context, sockPath, memPath, vms
|
|||||||
payload := snapshotLoadRequest{
|
payload := snapshotLoadRequest{
|
||||||
MemFilePath: memPath,
|
MemFilePath: memPath,
|
||||||
SnapshotPath: vmstatePath,
|
SnapshotPath: vmstatePath,
|
||||||
ResumeVM: true,
|
ResumeVM: false, // Changed: We pause here so MMDS can be configured BEFORE Resume.
|
||||||
NetworkOverrides: []networkOverride{
|
NetworkOverrides: []networkOverride{
|
||||||
{IfaceID: "1", HostDevName: tapName},
|
{IfaceID: "1", HostDevName: tapName},
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user