feat: add structured logging, --dev flag, and snapshot network_overrides
- Add --dev flag to main that enables logrus caller info (file:line) for easier debugging without changing production log output - Wire firecracker SDK logger (WithLogger) and FIFO log file to both the golden VM and each clone machine so Firecracker's own logs are surfaced - Log the exact shell commands being run (cp --reflink, ip tuntap, ip link, firecracker binary) at Info level before each syscall/exec, making it straightforward to reproduce steps manually - Extract snapshot.go with loadSnapshotWithNetworkOverride: a direct PUT /snapshot/load call over the Unix socket that includes network_overrides, remapping the stored tap to the per-clone tap name (Firecracker v1.15+ feature not yet exposed by SDK v1.0.0) - Use firecracker.WithSnapshot + a Handlers.FcInit.Swap to replace the SDK's LoadSnapshotHandler with the above when Bridge != "none" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
30
main.go
30
main.go
@@ -18,11 +18,36 @@ package main
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"github.com/kacerr/fc-orchestrator/orchestrator"
|
"github.com/kacerr/fc-orchestrator/orchestrator"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
// strip --dev flag before subcommand routing
|
||||||
|
dev := false
|
||||||
|
filtered := os.Args[:1]
|
||||||
|
for _, a := range os.Args[1:] {
|
||||||
|
if a == "--dev" {
|
||||||
|
dev = true
|
||||||
|
} else {
|
||||||
|
filtered = append(filtered, a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
os.Args = filtered
|
||||||
|
|
||||||
|
if dev {
|
||||||
|
log.SetReportCaller(true)
|
||||||
|
log.SetFormatter(&log.TextFormatter{
|
||||||
|
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
|
||||||
|
return "", fmt.Sprintf("%s:%d", filepath.Base(f.File), f.Line)
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// figure out if we are running as root
|
// figure out if we are running as root
|
||||||
if os.Geteuid() == 0 {
|
if os.Geteuid() == 0 {
|
||||||
fmt.Println("Running with root/sudo privileges!")
|
fmt.Println("Running with root/sudo privileges!")
|
||||||
@@ -61,7 +86,10 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func usage() {
|
func usage() {
|
||||||
fmt.Fprintf(os.Stderr, `Usage: %s <command> [args]
|
fmt.Fprintf(os.Stderr, `Usage: %s [--dev] <command> [args]
|
||||||
|
|
||||||
|
Flags:
|
||||||
|
--dev log format with source file:line (e.g. file="orchestrator.go:123")
|
||||||
|
|
||||||
Commands:
|
Commands:
|
||||||
init Download kernel + create Alpine rootfs
|
init Download kernel + create Alpine rootfs
|
||||||
|
|||||||
@@ -203,6 +203,9 @@ func (o *Orchestrator) Golden() error {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
NetworkInterfaces: netIfaces,
|
NetworkInterfaces: netIfaces,
|
||||||
|
LogPath: sockPath + ".log",
|
||||||
|
LogLevel: "Debug",
|
||||||
|
FifoLogWriter: o.log.Writer(),
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
@@ -218,7 +221,10 @@ func (o *Orchestrator) Golden() error {
|
|||||||
WithSocketPath(sockPath).
|
WithSocketPath(sockPath).
|
||||||
Build(ctx)
|
Build(ctx)
|
||||||
|
|
||||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
m, err := firecracker.NewMachine(ctx, fcCfg,
|
||||||
|
firecracker.WithProcessRunner(cmd),
|
||||||
|
firecracker.WithLogger(o.log),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("new machine: %w", err)
|
return fmt.Errorf("new machine: %w", err)
|
||||||
}
|
}
|
||||||
@@ -308,6 +314,7 @@ func (o *Orchestrator) spawnOne(id int) error {
|
|||||||
|
|
||||||
// --- COW rootfs ---
|
// --- COW rootfs ---
|
||||||
cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4")
|
cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4")
|
||||||
|
o.log.Infof("clone %d: running: cp --reflink=always %s %s", id, filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs)
|
||||||
if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil {
|
if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil {
|
||||||
return fmt.Errorf("copy rootfs: %w", err)
|
return fmt.Errorf("copy rootfs: %w", err)
|
||||||
}
|
}
|
||||||
@@ -326,6 +333,9 @@ func (o *Orchestrator) spawnOne(id int) error {
|
|||||||
tapName := fmt.Sprintf("fctap%d", id)
|
tapName := fmt.Sprintf("fctap%d", id)
|
||||||
var netIfaces firecracker.NetworkInterfaces
|
var netIfaces firecracker.NetworkInterfaces
|
||||||
if o.cfg.Bridge != "none" {
|
if o.cfg.Bridge != "none" {
|
||||||
|
o.log.Infof("clone %d: running: ip tuntap add dev %s mode tap", id, tapName)
|
||||||
|
o.log.Infof("clone %d: running: ip link set %s up", id, tapName)
|
||||||
|
o.log.Infof("clone %d: running: ip link set %s master %s", id, tapName, o.cfg.Bridge)
|
||||||
if err := o.createTap(tapName); err != nil {
|
if err := o.createTap(tapName); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -354,6 +364,8 @@ func (o *Orchestrator) spawnOne(id int) error {
|
|||||||
WithSocketPath(sockPath).
|
WithSocketPath(sockPath).
|
||||||
Build(ctx)
|
Build(ctx)
|
||||||
|
|
||||||
|
o.log.Infof("clone %d: running: %s", id, strings.Join(cmd.Args, " "))
|
||||||
|
|
||||||
vcpus := o.cfg.VCPUs
|
vcpus := o.cfg.VCPUs
|
||||||
mem := o.cfg.MemMiB
|
mem := o.cfg.MemMiB
|
||||||
|
|
||||||
@@ -364,20 +376,41 @@ func (o *Orchestrator) spawnOne(id int) error {
|
|||||||
MemSizeMib: &mem,
|
MemSizeMib: &mem,
|
||||||
},
|
},
|
||||||
NetworkInterfaces: netIfaces,
|
NetworkInterfaces: netIfaces,
|
||||||
// Snapshot config: tells the SDK to restore instead of fresh boot.
|
LogPath: sockPath + ".log",
|
||||||
Snapshot: firecracker.SnapshotConfig{
|
LogLevel: "Debug",
|
||||||
MemFilePath: sharedMem,
|
FifoLogWriter: o.log.Writer(),
|
||||||
SnapshotPath: cloneVmstate,
|
|
||||||
ResumeVM: true,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
m, err := firecracker.NewMachine(ctx, fcCfg,
|
||||||
|
firecracker.WithProcessRunner(cmd),
|
||||||
|
firecracker.WithLogger(o.log),
|
||||||
|
// WithSnapshot replaces the default handler set with snapshot-specific
|
||||||
|
// handlers: skips validate.Cfg (no KernelImagePath needed) and uses
|
||||||
|
// LoadSnapshotHandler instead of CreateBootSourceHandler.
|
||||||
|
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
|
||||||
|
sc.ResumeVM = true
|
||||||
|
}),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cancel()
|
cancel()
|
||||||
return fmt.Errorf("new machine: %w", err)
|
return fmt.Errorf("new machine: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Firecracker v1.15+ supports network_overrides in PUT /snapshot/load to
|
||||||
|
// remap the tap backend stored in the snapshot. The SDK v1.0.0 doesn't
|
||||||
|
// expose this field, so we replace the SDK's LoadSnapshotHandler with a
|
||||||
|
// direct HTTP call that includes the per-clone tap name.
|
||||||
|
if o.cfg.Bridge != "none" {
|
||||||
|
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
|
||||||
|
Name: firecracker.LoadSnapshotHandlerName,
|
||||||
|
Fn: func(ctx context.Context, m *firecracker.Machine) error {
|
||||||
|
return loadSnapshotWithNetworkOverride(
|
||||||
|
ctx, sockPath, sharedMem, cloneVmstate, tapName,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
if err := m.Start(ctx); err != nil {
|
if err := m.Start(ctx); err != nil {
|
||||||
cancel()
|
cancel()
|
||||||
|
|||||||
71
orchestrator/snapshot.go
Normal file
71
orchestrator/snapshot.go
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
package orchestrator
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
type networkOverride struct {
|
||||||
|
IfaceID string `json:"iface_id"`
|
||||||
|
HostDevName string `json:"host_dev_name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type snapshotLoadRequest struct {
|
||||||
|
MemFilePath string `json:"mem_file_path"`
|
||||||
|
SnapshotPath string `json:"snapshot_path"`
|
||||||
|
ResumeVM bool `json:"resume_vm,omitempty"`
|
||||||
|
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadSnapshotWithNetworkOverride calls PUT /snapshot/load on the Firecracker
|
||||||
|
// Unix socket, remapping the first network interface to tapName.
|
||||||
|
// This bypasses the SDK's LoadSnapshotHandler which doesn't expose
|
||||||
|
// network_overrides (added in Firecracker v1.15, SDK v1.0.0 omits it).
|
||||||
|
func loadSnapshotWithNetworkOverride(ctx context.Context, sockPath, memPath, vmstatePath, tapName string) error {
|
||||||
|
payload := snapshotLoadRequest{
|
||||||
|
MemFilePath: memPath,
|
||||||
|
SnapshotPath: vmstatePath,
|
||||||
|
ResumeVM: true,
|
||||||
|
NetworkOverrides: []networkOverride{
|
||||||
|
{IfaceID: "1", HostDevName: tapName},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := json.Marshal(payload)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal snapshot load params: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpClient := &http.Client{
|
||||||
|
Transport: &http.Transport{
|
||||||
|
DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) {
|
||||||
|
return net.Dial("unix", sockPath)
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPut,
|
||||||
|
"http://localhost/snapshot/load", bytes.NewReader(data))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("build snapshot load request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
|
||||||
|
resp, err := httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("snapshot load request: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusNoContent {
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
return fmt.Errorf("snapshot load failed (%d): %s", resp.StatusCode, body)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user