fix: pause VM before MMDS injection, resume after to ensure config is applied

- Load snapshot with ResumeVM: false so MMDS data can be written while VM is paused
- Call ResumeVM explicitly after configureMmds succeeds
- Skip PUT /mmds/config on restored VMs (Firecracker rejects it with 400)
- Strip JSON quotes from MMDS values with tr -d '"' in net-init script
- Add 169.254.169.2/32 link-local addr and flush eth0 before applying new IP

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-14 15:11:14 +00:00
parent 5e23e0ab4e
commit bfc1f47287
4 changed files with 22 additions and 9 deletions

View File

@@ -118,7 +118,7 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error {
})
}
// --- Start VM (blocks until snapshot is loaded and VM is running) ---
// --- Start VM (blocks until snapshot is loaded and VM is PAUSED) ---
start := time.Now()
logger.Infof("restoring clone %d from snapshot ...", id)
if err := m.Start(ctx); err != nil {
@@ -126,10 +126,10 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error {
ptm.Close()
return fmt.Errorf("restore clone %d: %w", id, err)
}
elapsed := time.Since(start)
// Inject per-clone IP config via MMDS so the fc-net-init guest daemon
// can configure eth0 without any manual steps inside the VM.
// This must happen while the VM is PAUSED (ResumeVM: false in snapshot load).
if cfg.AutoNetConfig && cfg.Bridge != "none" {
guestIP := fmt.Sprintf("%s.%d/24", cfg.GuestPrefix, 10+id)
if err := configureMmds(ctx, sockPath, guestIP, cfg.GuestGW, "1.1.1.1"); err != nil {
@@ -139,6 +139,14 @@ func RunConsoleProxy(cfg Config, id int, tapName string) error {
}
}
// Now RESUME the VM to start execution!
if err := m.ResumeVM(ctx); err != nil {
pts.Close()
ptm.Close()
return fmt.Errorf("resume clone %d: %w", id, err)
}
elapsed := time.Since(start)
// Release our copy of the slave — firecracker holds its own fd now.
// Closing here ensures we get EOF on ptm when firecracker exits.
pts.Close()