feat: add structured logging, --dev flag, and snapshot network_overrides

- Add --dev flag to main that enables logrus caller info (file:line) for
  easier debugging without changing production log output
- Wire firecracker SDK logger (WithLogger) and FIFO log file to both the
  golden VM and each clone machine so Firecracker's own logs are surfaced
- Log the exact shell commands being run (cp --reflink, ip tuntap, ip link,
  firecracker binary) at Info level before each syscall/exec, making it
  straightforward to reproduce steps manually
- Extract snapshot.go with loadSnapshotWithNetworkOverride: a direct PUT
  /snapshot/load call over the Unix socket that includes network_overrides,
  remapping the stored tap to the per-clone tap name (Firecracker v1.15+
  feature not yet exposed by SDK v1.0.0)
- Use firecracker.WithSnapshot + a Handlers.FcInit.Swap to replace the SDK's
  LoadSnapshotHandler with the above when Bridge != "none"

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-13 09:07:35 +00:00
parent b46d510cb7
commit 04067f7e6b
3 changed files with 141 additions and 9 deletions

View File

@@ -203,6 +203,9 @@ func (o *Orchestrator) Golden() error {
},
},
NetworkInterfaces: netIfaces,
LogPath: sockPath + ".log",
LogLevel: "Debug",
FifoLogWriter: o.log.Writer(),
}
ctx, cancel := context.WithCancel(context.Background())
@@ -218,7 +221,10 @@ func (o *Orchestrator) Golden() error {
WithSocketPath(sockPath).
Build(ctx)
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
m, err := firecracker.NewMachine(ctx, fcCfg,
firecracker.WithProcessRunner(cmd),
firecracker.WithLogger(o.log),
)
if err != nil {
return fmt.Errorf("new machine: %w", err)
}
@@ -308,6 +314,7 @@ func (o *Orchestrator) spawnOne(id int) error {
// --- COW rootfs ---
cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4")
o.log.Infof("clone %d: running: cp --reflink=always %s %s", id, filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs)
if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil {
return fmt.Errorf("copy rootfs: %w", err)
}
@@ -326,6 +333,9 @@ func (o *Orchestrator) spawnOne(id int) error {
tapName := fmt.Sprintf("fctap%d", id)
var netIfaces firecracker.NetworkInterfaces
if o.cfg.Bridge != "none" {
o.log.Infof("clone %d: running: ip tuntap add dev %s mode tap", id, tapName)
o.log.Infof("clone %d: running: ip link set %s up", id, tapName)
o.log.Infof("clone %d: running: ip link set %s master %s", id, tapName, o.cfg.Bridge)
if err := o.createTap(tapName); err != nil {
return err
}
@@ -354,6 +364,8 @@ func (o *Orchestrator) spawnOne(id int) error {
WithSocketPath(sockPath).
Build(ctx)
o.log.Infof("clone %d: running: %s", id, strings.Join(cmd.Args, " "))
vcpus := o.cfg.VCPUs
mem := o.cfg.MemMiB
@@ -364,20 +376,41 @@ func (o *Orchestrator) spawnOne(id int) error {
MemSizeMib: &mem,
},
NetworkInterfaces: netIfaces,
// Snapshot config: tells the SDK to restore instead of fresh boot.
Snapshot: firecracker.SnapshotConfig{
MemFilePath: sharedMem,
SnapshotPath: cloneVmstate,
ResumeVM: true,
},
LogPath: sockPath + ".log",
LogLevel: "Debug",
FifoLogWriter: o.log.Writer(),
}
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
m, err := firecracker.NewMachine(ctx, fcCfg,
firecracker.WithProcessRunner(cmd),
firecracker.WithLogger(o.log),
// WithSnapshot replaces the default handler set with snapshot-specific
// handlers: skips validate.Cfg (no KernelImagePath needed) and uses
// LoadSnapshotHandler instead of CreateBootSourceHandler.
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
sc.ResumeVM = true
}),
)
if err != nil {
cancel()
return fmt.Errorf("new machine: %w", err)
}
// Firecracker v1.15+ supports network_overrides in PUT /snapshot/load to
// remap the tap backend stored in the snapshot. The SDK v1.0.0 doesn't
// expose this field, so we replace the SDK's LoadSnapshotHandler with a
// direct HTTP call that includes the per-clone tap name.
if o.cfg.Bridge != "none" {
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
Name: firecracker.LoadSnapshotHandlerName,
Fn: func(ctx context.Context, m *firecracker.Machine) error {
return loadSnapshotWithNetworkOverride(
ctx, sockPath, sharedMem, cloneVmstate, tapName,
)
},
})
}
start := time.Now()
if err := m.Start(ctx); err != nil {
cancel()