feat: add structured logging, --dev flag, and snapshot network_overrides
- Add --dev flag to main that enables logrus caller info (file:line) for easier debugging without changing production log output - Wire firecracker SDK logger (WithLogger) and FIFO log file to both the golden VM and each clone machine so Firecracker's own logs are surfaced - Log the exact shell commands being run (cp --reflink, ip tuntap, ip link, firecracker binary) at Info level before each syscall/exec, making it straightforward to reproduce steps manually - Extract snapshot.go with loadSnapshotWithNetworkOverride: a direct PUT /snapshot/load call over the Unix socket that includes network_overrides, remapping the stored tap to the per-clone tap name (Firecracker v1.15+ feature not yet exposed by SDK v1.0.0) - Use firecracker.WithSnapshot + a Handlers.FcInit.Swap to replace the SDK's LoadSnapshotHandler with the above when Bridge != "none" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
30
main.go
30
main.go
@@ -18,11 +18,36 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/kacerr/fc-orchestrator/orchestrator"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// strip --dev flag before subcommand routing
|
||||
dev := false
|
||||
filtered := os.Args[:1]
|
||||
for _, a := range os.Args[1:] {
|
||||
if a == "--dev" {
|
||||
dev = true
|
||||
} else {
|
||||
filtered = append(filtered, a)
|
||||
}
|
||||
}
|
||||
os.Args = filtered
|
||||
|
||||
if dev {
|
||||
log.SetReportCaller(true)
|
||||
log.SetFormatter(&log.TextFormatter{
|
||||
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
|
||||
return "", fmt.Sprintf("%s:%d", filepath.Base(f.File), f.Line)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// figure out if we are running as root
|
||||
if os.Geteuid() == 0 {
|
||||
fmt.Println("Running with root/sudo privileges!")
|
||||
@@ -61,7 +86,10 @@ func main() {
|
||||
}
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, `Usage: %s <command> [args]
|
||||
fmt.Fprintf(os.Stderr, `Usage: %s [--dev] <command> [args]
|
||||
|
||||
Flags:
|
||||
--dev log format with source file:line (e.g. file="orchestrator.go:123")
|
||||
|
||||
Commands:
|
||||
init Download kernel + create Alpine rootfs
|
||||
|
||||
@@ -203,6 +203,9 @@ func (o *Orchestrator) Golden() error {
|
||||
},
|
||||
},
|
||||
NetworkInterfaces: netIfaces,
|
||||
LogPath: sockPath + ".log",
|
||||
LogLevel: "Debug",
|
||||
FifoLogWriter: o.log.Writer(),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
@@ -218,7 +221,10 @@ func (o *Orchestrator) Golden() error {
|
||||
WithSocketPath(sockPath).
|
||||
Build(ctx)
|
||||
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg,
|
||||
firecracker.WithProcessRunner(cmd),
|
||||
firecracker.WithLogger(o.log),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("new machine: %w", err)
|
||||
}
|
||||
@@ -308,6 +314,7 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
|
||||
// --- COW rootfs ---
|
||||
cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4")
|
||||
o.log.Infof("clone %d: running: cp --reflink=always %s %s", id, filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs)
|
||||
if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil {
|
||||
return fmt.Errorf("copy rootfs: %w", err)
|
||||
}
|
||||
@@ -326,6 +333,9 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
tapName := fmt.Sprintf("fctap%d", id)
|
||||
var netIfaces firecracker.NetworkInterfaces
|
||||
if o.cfg.Bridge != "none" {
|
||||
o.log.Infof("clone %d: running: ip tuntap add dev %s mode tap", id, tapName)
|
||||
o.log.Infof("clone %d: running: ip link set %s up", id, tapName)
|
||||
o.log.Infof("clone %d: running: ip link set %s master %s", id, tapName, o.cfg.Bridge)
|
||||
if err := o.createTap(tapName); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -354,6 +364,8 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
WithSocketPath(sockPath).
|
||||
Build(ctx)
|
||||
|
||||
o.log.Infof("clone %d: running: %s", id, strings.Join(cmd.Args, " "))
|
||||
|
||||
vcpus := o.cfg.VCPUs
|
||||
mem := o.cfg.MemMiB
|
||||
|
||||
@@ -364,20 +376,41 @@ func (o *Orchestrator) spawnOne(id int) error {
|
||||
MemSizeMib: &mem,
|
||||
},
|
||||
NetworkInterfaces: netIfaces,
|
||||
// Snapshot config: tells the SDK to restore instead of fresh boot.
|
||||
Snapshot: firecracker.SnapshotConfig{
|
||||
MemFilePath: sharedMem,
|
||||
SnapshotPath: cloneVmstate,
|
||||
ResumeVM: true,
|
||||
},
|
||||
LogPath: sockPath + ".log",
|
||||
LogLevel: "Debug",
|
||||
FifoLogWriter: o.log.Writer(),
|
||||
}
|
||||
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg,
|
||||
firecracker.WithProcessRunner(cmd),
|
||||
firecracker.WithLogger(o.log),
|
||||
// WithSnapshot replaces the default handler set with snapshot-specific
|
||||
// handlers: skips validate.Cfg (no KernelImagePath needed) and uses
|
||||
// LoadSnapshotHandler instead of CreateBootSourceHandler.
|
||||
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
|
||||
sc.ResumeVM = true
|
||||
}),
|
||||
)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return fmt.Errorf("new machine: %w", err)
|
||||
}
|
||||
|
||||
// Firecracker v1.15+ supports network_overrides in PUT /snapshot/load to
|
||||
// remap the tap backend stored in the snapshot. The SDK v1.0.0 doesn't
|
||||
// expose this field, so we replace the SDK's LoadSnapshotHandler with a
|
||||
// direct HTTP call that includes the per-clone tap name.
|
||||
if o.cfg.Bridge != "none" {
|
||||
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
|
||||
Name: firecracker.LoadSnapshotHandlerName,
|
||||
Fn: func(ctx context.Context, m *firecracker.Machine) error {
|
||||
return loadSnapshotWithNetworkOverride(
|
||||
ctx, sockPath, sharedMem, cloneVmstate, tapName,
|
||||
)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
if err := m.Start(ctx); err != nil {
|
||||
cancel()
|
||||
|
||||
71
orchestrator/snapshot.go
Normal file
71
orchestrator/snapshot.go
Normal file
@@ -0,0 +1,71 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type networkOverride struct {
|
||||
IfaceID string `json:"iface_id"`
|
||||
HostDevName string `json:"host_dev_name"`
|
||||
}
|
||||
|
||||
type snapshotLoadRequest struct {
|
||||
MemFilePath string `json:"mem_file_path"`
|
||||
SnapshotPath string `json:"snapshot_path"`
|
||||
ResumeVM bool `json:"resume_vm,omitempty"`
|
||||
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
|
||||
}
|
||||
|
||||
// loadSnapshotWithNetworkOverride calls PUT /snapshot/load on the Firecracker
|
||||
// Unix socket, remapping the first network interface to tapName.
|
||||
// This bypasses the SDK's LoadSnapshotHandler which doesn't expose
|
||||
// network_overrides (added in Firecracker v1.15, SDK v1.0.0 omits it).
|
||||
func loadSnapshotWithNetworkOverride(ctx context.Context, sockPath, memPath, vmstatePath, tapName string) error {
|
||||
payload := snapshotLoadRequest{
|
||||
MemFilePath: memPath,
|
||||
SnapshotPath: vmstatePath,
|
||||
ResumeVM: true,
|
||||
NetworkOverrides: []networkOverride{
|
||||
{IfaceID: "1", HostDevName: tapName},
|
||||
},
|
||||
}
|
||||
|
||||
data, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal snapshot load params: %w", err)
|
||||
}
|
||||
|
||||
httpClient := &http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
|
||||
return net.Dial("unix", sockPath)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPut,
|
||||
"http://localhost/snapshot/load", bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return fmt.Errorf("build snapshot load request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := httpClient.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("snapshot load request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusNoContent {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("snapshot load failed (%d): %s", resp.StatusCode, body)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user