feat: add structured logging, --dev flag, and snapshot network_overrides
- Add --dev flag to main that enables logrus caller info (file:line) for easier debugging without changing production log output - Wire firecracker SDK logger (WithLogger) and FIFO log file to both the golden VM and each clone machine so Firecracker's own logs are surfaced - Log the exact shell commands being run (cp --reflink, ip tuntap, ip link, firecracker binary) at Info level before each syscall/exec, making it straightforward to reproduce steps manually - Extract snapshot.go with loadSnapshotWithNetworkOverride: a direct PUT /snapshot/load call over the Unix socket that includes network_overrides, remapping the stored tap to the per-clone tap name (Firecracker v1.15+ feature not yet exposed by SDK v1.0.0) - Use firecracker.WithSnapshot + a Handlers.FcInit.Swap to replace the SDK's LoadSnapshotHandler with the above when Bridge != "none" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -203,6 +203,9 @@ func (o *Orchestrator) Golden() error {
|
||||
},
|
||||
},
|
||||
NetworkInterfaces: netIfaces,
|
||||
LogPath: sockPath + ".log",
|
||||
LogLevel: "Debug",
|
||||
FifoLogWriter: o.log.Writer(),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -218,7 +221,10 @@ func (o *Orchestrator) Golden() error {
|
||||
WithSocketPath(sockPath).
|
||||
Build(ctx)
|
||||
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg,
|
||||
firecracker.WithProcessRunner(cmd),
|
||||
firecracker.WithLogger(o.log),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("new machine: %w", err)
|
||||
}
|
||||
@@ -308,6 +314,7 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
|
||||
// --- COW rootfs ---
|
||||
cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4")
|
||||
o.log.Infof("clone %d: running: cp --reflink=always %s %s", id, filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs)
|
||||
if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil {
|
||||
return fmt.Errorf("copy rootfs: %w", err)
|
||||
}
|
||||
@@ -326,6 +333,9 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
tapName := fmt.Sprintf("fctap%d", id)
|
||||
var netIfaces firecracker.NetworkInterfaces
|
||||
if o.cfg.Bridge != "none" {
|
||||
o.log.Infof("clone %d: running: ip tuntap add dev %s mode tap", id, tapName)
|
||||
o.log.Infof("clone %d: running: ip link set %s up", id, tapName)
|
||||
o.log.Infof("clone %d: running: ip link set %s master %s", id, tapName, o.cfg.Bridge)
|
||||
if err := o.createTap(tapName); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -354,6 +364,8 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
WithSocketPath(sockPath).
|
||||
Build(ctx)
|
||||
|
||||
o.log.Infof("clone %d: running: %s", id, strings.Join(cmd.Args, " "))
|
||||
|
||||
vcpus := o.cfg.VCPUs
|
||||
mem := o.cfg.MemMiB
|
||||
|
||||
@@ -364,20 +376,41 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
MemSizeMib: &mem,
|
||||
},
|
||||
NetworkInterfaces: netIfaces,
|
||||
// Snapshot config: tells the SDK to restore instead of fresh boot.
|
||||
Snapshot: firecracker.SnapshotConfig{
|
||||
MemFilePath: sharedMem,
|
||||
SnapshotPath: cloneVmstate,
|
||||
ResumeVM: true,
|
||||
},
|
||||
LogPath: sockPath + ".log",
|
||||
LogLevel: "Debug",
|
||||
FifoLogWriter: o.log.Writer(),
|
||||
}
|
||||
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg,
|
||||
firecracker.WithProcessRunner(cmd),
|
||||
firecracker.WithLogger(o.log),
|
||||
// WithSnapshot replaces the default handler set with snapshot-specific
|
||||
// handlers: skips validate.Cfg (no KernelImagePath needed) and uses
|
||||
// LoadSnapshotHandler instead of CreateBootSourceHandler.
|
||||
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
|
||||
sc.ResumeVM = true
|
||||
}),
|
||||
)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return fmt.Errorf("new machine: %w", err)
|
||||
}
|
||||
|
||||
// Firecracker v1.15+ supports network_overrides in PUT /snapshot/load to
|
||||
// remap the tap backend stored in the snapshot. The SDK v1.0.0 doesn't
|
||||
// expose this field, so we replace the SDK's LoadSnapshotHandler with a
|
||||
// direct HTTP call that includes the per-clone tap name.
|
||||
if o.cfg.Bridge != "none" {
|
||||
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
|
||||
Name: firecracker.LoadSnapshotHandlerName,
|
||||
Fn: func(ctx context.Context, m *firecracker.Machine) error {
|
||||
return loadSnapshotWithNetworkOverride(
|
||||
ctx, sockPath, sharedMem, cloneVmstate, tapName,
|
||||
)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
if err := m.Start(ctx); err != nil {
|
||||
cancel()
|
||||
|
||||
Reference in New Issue
Block a user