feat: initial Firecracker snapshot orchestrator (fc-orch)
A "poor man's" Firecracker VM orchestrator that boots a single golden VM,
snapshots it, then restores N clone VMs from that snapshot with minimal
per-clone overhead.
How it works:
- `init` — downloads a Linux 6.1 kernel and builds a minimal Alpine 3.20
rootfs (512 MiB ext4) with a basic init script
- `golden` — boots the golden VM, lets it settle, then pauses and snapshots
it (vmstate + memory file); the golden VMM is then terminated
since only the artifacts are needed
- `spawn N` — restores N clone VMs concurrently from the golden snapshot:
* rootfs: filesystem-level COW copy via `cp --reflink` (falls
back to a plain copy if reflinks are not supported)
* memory: shared golden `mem` file; Firecracker's MAP_PRIVATE
lets the kernel handle COW page-by-page at no up-front cost
* vmstate: small file, cheap regular copy per clone
* networking: per-clone TAP device (fctapN) bridged to fcbr0
with iptables MASQUERADE NAT on the default route interface
- `status` — reads PID files and checks /proc to report alive/dead clones
- `kill` — stops in-memory clones, kills any stragglers via PID files,
and tears down all fctap* devices
- `cleanup` — kill + remove all state dirs and the bridge
All tunables (binary path, base dir, kernel/rootfs paths, vCPUs, memory,
bridge name/CIDR) are configurable via environment variables.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
54
orchestrator/config.go
Normal file
54
orchestrator/config.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Config holds all tunables for the orchestrator.
|
||||
type Config struct {
|
||||
FCBin string // path to firecracker binary
|
||||
BaseDir string // working directory for all state
|
||||
Kernel string // path to vmlinux
|
||||
Rootfs string // path to base rootfs.ext4
|
||||
VCPUs int64
|
||||
MemMiB int64
|
||||
Bridge string // host bridge name, or "none" to skip networking
|
||||
BridgeCIDR string // e.g. "172.30.0.1/24"
|
||||
GuestPrefix string // e.g. "172.30.0" — clones get .10, .11, ...
|
||||
GuestGW string
|
||||
BootArgs string
|
||||
}
|
||||
|
||||
func DefaultConfig() Config {
|
||||
c := Config{
|
||||
FCBin: envOr("FC_BIN", "firecracker"),
|
||||
BaseDir: envOr("FC_BASE_DIR", "/tmp/fc-orch"),
|
||||
VCPUs: envOrInt("FC_VCPUS", 1),
|
||||
MemMiB: envOrInt("FC_MEM_MIB", 128),
|
||||
Bridge: envOr("FC_BRIDGE", "fcbr0"),
|
||||
BridgeCIDR: envOr("FC_BRIDGE_CIDR", "172.30.0.1/24"),
|
||||
GuestPrefix: envOr("FC_GUEST_PREFIX", "172.30.0"),
|
||||
GuestGW: envOr("FC_GUEST_GW", "172.30.0.1"),
|
||||
BootArgs: "console=ttyS0 reboot=k panic=1 pci=off i8042.noaux quiet loglevel=0",
|
||||
}
|
||||
c.Kernel = envOr("FC_KERNEL", c.BaseDir+"/vmlinux")
|
||||
c.Rootfs = envOr("FC_ROOTFS", c.BaseDir+"/rootfs.ext4")
|
||||
return c
|
||||
}
|
||||
|
||||
func envOr(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func envOrInt(key string, fallback int64) int64 {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
if n, err := strconv.ParseInt(v, 10, 64); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
72
orchestrator/network.go
Normal file
72
orchestrator/network.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// setupBridge creates the host bridge + enables NAT if it doesn't exist.
|
||||
func (o *Orchestrator) setupBridge() error {
|
||||
if o.cfg.Bridge == "none" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// check if bridge already exists
|
||||
if err := run("ip", "link", "show", o.cfg.Bridge); err == nil {
|
||||
return nil // already up
|
||||
}
|
||||
|
||||
if err := run("ip", "link", "add", o.cfg.Bridge, "type", "bridge"); err != nil {
|
||||
return fmt.Errorf("create bridge: %w", err)
|
||||
}
|
||||
if err := run("ip", "addr", "add", o.cfg.BridgeCIDR, "dev", o.cfg.Bridge); err != nil {
|
||||
return fmt.Errorf("add bridge addr: %w", err)
|
||||
}
|
||||
if err := run("ip", "link", "set", o.cfg.Bridge, "up"); err != nil {
|
||||
return fmt.Errorf("bring bridge up: %w", err)
|
||||
}
|
||||
|
||||
// find default route interface for NAT
|
||||
out, err := exec.Command("ip", "-4", "route", "show", "default").Output()
|
||||
if err == nil {
|
||||
fields := strings.Fields(string(out))
|
||||
for i, f := range fields {
|
||||
if f == "dev" && i+1 < len(fields) {
|
||||
iface := fields[i+1]
|
||||
_ = run("sysctl", "-qw", "net.ipv4.ip_forward=1")
|
||||
// idempotent: ignore error if rule exists
|
||||
_ = run("iptables", "-t", "nat", "-A", "POSTROUTING",
|
||||
"-o", iface, "-j", "MASQUERADE")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
o.log.Infof("bridge %s up on %s", o.cfg.Bridge, o.cfg.BridgeCIDR)
|
||||
return nil
|
||||
}
|
||||
|
||||
// createTap creates a tap device and attaches it to the bridge.
|
||||
func (o *Orchestrator) createTap(name string) error {
|
||||
if err := run("ip", "tuntap", "add", "dev", name, "mode", "tap"); err != nil {
|
||||
return fmt.Errorf("create tap %s: %w", name, err)
|
||||
}
|
||||
if err := run("ip", "link", "set", name, "up"); err != nil {
|
||||
return fmt.Errorf("bring tap %s up: %w", name, err)
|
||||
}
|
||||
if o.cfg.Bridge != "none" {
|
||||
if err := run("ip", "link", "set", name, "master", o.cfg.Bridge); err != nil {
|
||||
return fmt.Errorf("attach tap %s to bridge: %w", name, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func destroyTap(name string) {
|
||||
_ = run("ip", "link", "del", name)
|
||||
}
|
||||
|
||||
// run executes a command, returning an error if it fails.
|
||||
func run(name string, args ...string) error {
|
||||
return exec.Command(name, args...).Run()
|
||||
}
|
||||
545
orchestrator/orchestrator.go
Normal file
545
orchestrator/orchestrator.go
Normal file
@@ -0,0 +1,545 @@
|
||||
package orchestrator
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
firecracker "github.com/firecracker-microvm/firecracker-go-sdk"
|
||||
"github.com/firecracker-microvm/firecracker-go-sdk/client/models"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Orchestrator manages golden snapshots and clone VMs.
|
||||
type Orchestrator struct {
|
||||
cfg Config
|
||||
log *log.Entry
|
||||
|
||||
mu sync.Mutex
|
||||
clones map[int]*cloneInfo
|
||||
}
|
||||
|
||||
type cloneInfo struct {
|
||||
ID int
|
||||
Machine *firecracker.Machine
|
||||
Cancel context.CancelFunc
|
||||
Tap string
|
||||
}
|
||||
|
||||
func New(cfg Config) *Orchestrator {
|
||||
return &Orchestrator{
|
||||
cfg: cfg,
|
||||
log: log.WithField("component", "fc-orch"),
|
||||
clones: make(map[int]*cloneInfo),
|
||||
}
|
||||
}
|
||||
|
||||
func (o *Orchestrator) goldenDir() string { return filepath.Join(o.cfg.BaseDir, "golden") }
|
||||
func (o *Orchestrator) clonesDir() string { return filepath.Join(o.cfg.BaseDir, "clones") }
|
||||
func (o *Orchestrator) pidsDir() string { return filepath.Join(o.cfg.BaseDir, "pids") }
|
||||
|
||||
// ——— Init ————————————————————————————————————————————————————————————————
|
||||
|
||||
func (o *Orchestrator) Init() error {
|
||||
if err := os.MkdirAll(o.cfg.BaseDir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Download kernel if missing
|
||||
if _, err := os.Stat(o.cfg.Kernel); os.IsNotExist(err) {
|
||||
url := "https://s3.amazonaws.com/spec.ccfc.min/ci-artifacts/kernels/x86_64/vmlinux-6.1.bin"
|
||||
o.log.Infof("downloading kernel from %s ...", url)
|
||||
if err := downloadFile(url, o.cfg.Kernel); err != nil {
|
||||
return fmt.Errorf("download kernel: %w", err)
|
||||
}
|
||||
o.log.Infof("kernel saved to %s", o.cfg.Kernel)
|
||||
}
|
||||
|
||||
// Build rootfs if missing
|
||||
if _, err := os.Stat(o.cfg.Rootfs); os.IsNotExist(err) {
|
||||
o.log.Info("building minimal Alpine rootfs ...")
|
||||
if err := o.buildRootfs(); err != nil {
|
||||
return fmt.Errorf("build rootfs: %w", err)
|
||||
}
|
||||
o.log.Infof("rootfs saved to %s", o.cfg.Rootfs)
|
||||
}
|
||||
|
||||
o.log.Info("init complete")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Orchestrator) buildRootfs() error {
|
||||
sizeMB := 512
|
||||
mnt := filepath.Join(o.cfg.BaseDir, "mnt")
|
||||
|
||||
// create empty ext4 image
|
||||
if err := run("dd", "if=/dev/zero", "of="+o.cfg.Rootfs,
|
||||
"bs=1M", fmt.Sprintf("count=%d", sizeMB), "status=none"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := run("mkfs.ext4", "-qF", o.cfg.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
os.MkdirAll(mnt, 0o755)
|
||||
if err := run("mount", "-o", "loop", o.cfg.Rootfs, mnt); err != nil {
|
||||
return err
|
||||
}
|
||||
defer run("umount", mnt)
|
||||
|
||||
// download and extract Alpine minirootfs
|
||||
alpineVer := "3.20"
|
||||
arch := "x86_64"
|
||||
tarball := fmt.Sprintf("alpine-minirootfs-%s.0-%s.tar.gz", alpineVer, arch)
|
||||
url := fmt.Sprintf("https://dl-cdn.alpinelinux.org/alpine/v%s/releases/%s/%s",
|
||||
alpineVer, arch, tarball)
|
||||
|
||||
tarPath := filepath.Join(o.cfg.BaseDir, tarball)
|
||||
if err := downloadFile(url, tarPath); err != nil {
|
||||
return fmt.Errorf("download alpine: %w", err)
|
||||
}
|
||||
if err := run("tar", "xzf", tarPath, "-C", mnt); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// write init script
|
||||
initScript := `#!/bin/sh
|
||||
mount -t proc proc /proc
|
||||
mount -t sysfs sys /sys
|
||||
mount -t devtmpfs devtmpfs /dev
|
||||
ip link set eth0 up 2>/dev/null
|
||||
`
|
||||
initPath := filepath.Join(mnt, "etc", "init.d", "rcS")
|
||||
os.MkdirAll(filepath.Dir(initPath), 0o755)
|
||||
if err := os.WriteFile(initPath, []byte(initScript), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// write inittab
|
||||
inittab := "::sysinit:/etc/init.d/rcS\nttyS0::respawn:/bin/sh\n"
|
||||
return os.WriteFile(filepath.Join(mnt, "etc", "inittab"), []byte(inittab), 0o644)
|
||||
}
|
||||
|
||||
// ——— Golden VM ——————————————————————————————————————————————————————————
|
||||
|
||||
func (o *Orchestrator) Golden() error {
|
||||
if _, err := os.Stat(o.cfg.Kernel); err != nil {
|
||||
return fmt.Errorf("kernel not found — run init first: %w", err)
|
||||
}
|
||||
if _, err := os.Stat(o.cfg.Rootfs); err != nil {
|
||||
return fmt.Errorf("rootfs not found — run init first: %w", err)
|
||||
}
|
||||
|
||||
goldenDir := o.goldenDir()
|
||||
os.RemoveAll(goldenDir)
|
||||
os.MkdirAll(goldenDir, 0o755)
|
||||
os.MkdirAll(o.pidsDir(), 0o755)
|
||||
|
||||
// COW copy of rootfs for golden VM
|
||||
goldenRootfs := filepath.Join(goldenDir, "rootfs.ext4")
|
||||
if err := reflinkCopy(o.cfg.Rootfs, goldenRootfs); err != nil {
|
||||
return fmt.Errorf("copy rootfs: %w", err)
|
||||
}
|
||||
|
||||
sockPath := filepath.Join(goldenDir, "api.sock")
|
||||
os.Remove(sockPath) // remove stale socket
|
||||
|
||||
// prepare network
|
||||
tap := "fctap0"
|
||||
var netIfaces firecracker.NetworkInterfaces
|
||||
if o.cfg.Bridge != "none" {
|
||||
if err := o.setupBridge(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := o.createTap(tap); err != nil {
|
||||
return err
|
||||
}
|
||||
defer destroyTap(tap)
|
||||
|
||||
netIfaces = firecracker.NetworkInterfaces{
|
||||
firecracker.NetworkInterface{
|
||||
StaticConfiguration: &firecracker.StaticNetworkConfiguration{
|
||||
MacAddress: "AA:FC:00:00:00:01",
|
||||
HostDevName: tap,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
vcpus := o.cfg.VCPUs
|
||||
mem := o.cfg.MemMiB
|
||||
rootDriveID := "rootfs"
|
||||
isRoot := true
|
||||
isRO := false
|
||||
trackDirty := true
|
||||
|
||||
fcCfg := firecracker.Config{
|
||||
SocketPath: sockPath,
|
||||
KernelImagePath: o.cfg.Kernel,
|
||||
KernelArgs: o.cfg.BootArgs,
|
||||
MachineCfg: models.MachineConfiguration{
|
||||
VcpuCount: &vcpus,
|
||||
MemSizeMib: &mem,
|
||||
TrackDirtyPages: trackDirty,
|
||||
},
|
||||
Drives: []models.Drive{
|
||||
{
|
||||
DriveID: &rootDriveID,
|
||||
PathOnHost: &goldenRootfs,
|
||||
IsRootDevice: &isRoot,
|
||||
IsReadOnly: &isRO,
|
||||
},
|
||||
},
|
||||
NetworkInterfaces: netIfaces,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
// find firecracker binary
|
||||
fcBin, err := exec.LookPath(o.cfg.FCBin)
|
||||
if err != nil {
|
||||
return fmt.Errorf("firecracker binary not found: %w", err)
|
||||
}
|
||||
cmd := firecracker.VMCommandBuilder{}.
|
||||
WithBin(fcBin).
|
||||
WithSocketPath(sockPath).
|
||||
Build(ctx)
|
||||
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
||||
if err != nil {
|
||||
return fmt.Errorf("new machine: %w", err)
|
||||
}
|
||||
|
||||
o.log.Info("starting golden VM ...")
|
||||
if err := m.Start(ctx); err != nil {
|
||||
return fmt.Errorf("start golden VM: %w", err)
|
||||
}
|
||||
|
||||
// write PID for tracking
|
||||
if m.Cfg.VMID != "" {
|
||||
os.WriteFile(filepath.Join(o.pidsDir(), "golden.pid"),
|
||||
[]byte(fmt.Sprintf("%d", cmd.Process.Pid)), 0o644)
|
||||
}
|
||||
|
||||
o.log.Info("golden VM booted, letting it settle ...")
|
||||
time.Sleep(3 * time.Second)
|
||||
|
||||
// pause
|
||||
o.log.Info("pausing golden VM ...")
|
||||
if err := m.PauseVM(ctx); err != nil {
|
||||
return fmt.Errorf("pause VM: %w", err)
|
||||
}
|
||||
|
||||
// snapshot
|
||||
memPath := filepath.Join(goldenDir, "mem")
|
||||
vmstatePath := filepath.Join(goldenDir, "vmstate")
|
||||
|
||||
o.log.Info("creating snapshot ...")
|
||||
if err := m.CreateSnapshot(ctx, memPath, vmstatePath); err != nil {
|
||||
return fmt.Errorf("create snapshot: %w", err)
|
||||
}
|
||||
|
||||
// log sizes
|
||||
for _, f := range []string{memPath, vmstatePath} {
|
||||
if info, err := os.Stat(f); err == nil {
|
||||
o.log.Infof(" %s: %d MiB", filepath.Base(f), info.Size()/(1024*1024))
|
||||
}
|
||||
}
|
||||
|
||||
// kill golden VM — we only need the artifacts
|
||||
m.StopVMM()
|
||||
cancel()
|
||||
|
||||
o.log.Infof("golden snapshot ready at %s/{vmstate,mem}", goldenDir)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ——— Spawn clones ——————————————————————————————————————————————————————
|
||||
|
||||
func (o *Orchestrator) Spawn(count int) error {
|
||||
goldenDir := o.goldenDir()
|
||||
for _, f := range []string{"vmstate", "mem"} {
|
||||
if _, err := os.Stat(filepath.Join(goldenDir, f)); err != nil {
|
||||
return fmt.Errorf("golden %s not found — run golden first", f)
|
||||
}
|
||||
}
|
||||
|
||||
os.MkdirAll(o.clonesDir(), 0o755)
|
||||
os.MkdirAll(o.pidsDir(), 0o755)
|
||||
if o.cfg.Bridge != "none" {
|
||||
if err := o.setupBridge(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < count; i++ {
|
||||
id := o.nextCloneID()
|
||||
if err := o.spawnOne(id); err != nil {
|
||||
o.log.Errorf("clone %d failed: %v", id, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
o.log.Infof("spawned %d clone(s) from golden snapshot", count)
|
||||
o.Status()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Orchestrator) spawnOne(id int) error {
|
||||
goldenDir := o.goldenDir()
|
||||
cloneDir := filepath.Join(o.clonesDir(), strconv.Itoa(id))
|
||||
os.MkdirAll(cloneDir, 0o755)
|
||||
|
||||
sockPath := filepath.Join(cloneDir, "api.sock")
|
||||
os.Remove(sockPath)
|
||||
|
||||
// --- COW rootfs ---
|
||||
cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4")
|
||||
if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil {
|
||||
return fmt.Errorf("copy rootfs: %w", err)
|
||||
}
|
||||
|
||||
// --- Memory: point at the shared golden mem file ---
|
||||
// Firecracker uses MAP_PRIVATE → kernel COW. No copy needed.
|
||||
sharedMem := filepath.Join(goldenDir, "mem")
|
||||
|
||||
// --- vmstate: small, cheap copy ---
|
||||
cloneVmstate := filepath.Join(cloneDir, "vmstate")
|
||||
if err := copyFile(filepath.Join(goldenDir, "vmstate"), cloneVmstate); err != nil {
|
||||
return fmt.Errorf("copy vmstate: %w", err)
|
||||
}
|
||||
|
||||
// --- Networking ---
|
||||
tapName := fmt.Sprintf("fctap%d", id)
|
||||
var netIfaces firecracker.NetworkInterfaces
|
||||
if o.cfg.Bridge != "none" {
|
||||
if err := o.createTap(tapName); err != nil {
|
||||
return err
|
||||
}
|
||||
mac := fmt.Sprintf("AA:FC:00:00:%02X:%02X", id/256, id%256)
|
||||
netIfaces = firecracker.NetworkInterfaces{
|
||||
firecracker.NetworkInterface{
|
||||
StaticConfiguration: &firecracker.StaticNetworkConfiguration{
|
||||
MacAddress: mac,
|
||||
HostDevName: tapName,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// --- Restore from snapshot ---
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
fcBin, err := exec.LookPath(o.cfg.FCBin)
|
||||
if err != nil {
|
||||
cancel()
|
||||
return fmt.Errorf("firecracker not found: %w", err)
|
||||
}
|
||||
|
||||
cmd := firecracker.VMCommandBuilder{}.
|
||||
WithBin(fcBin).
|
||||
WithSocketPath(sockPath).
|
||||
Build(ctx)
|
||||
|
||||
vcpus := o.cfg.VCPUs
|
||||
mem := o.cfg.MemMiB
|
||||
|
||||
fcCfg := firecracker.Config{
|
||||
SocketPath: sockPath,
|
||||
MachineCfg: models.MachineConfiguration{
|
||||
VcpuCount: &vcpus,
|
||||
MemSizeMib: &mem,
|
||||
},
|
||||
NetworkInterfaces: netIfaces,
|
||||
// Snapshot config: tells the SDK to restore instead of fresh boot.
|
||||
Snapshot: firecracker.SnapshotConfig{
|
||||
MemFilePath: sharedMem,
|
||||
SnapshotPath: cloneVmstate,
|
||||
ResumeVM: true,
|
||||
},
|
||||
}
|
||||
|
||||
m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd))
|
||||
if err != nil {
|
||||
cancel()
|
||||
return fmt.Errorf("new machine: %w", err)
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
if err := m.Start(ctx); err != nil {
|
||||
cancel()
|
||||
return fmt.Errorf("restore clone %d: %w", id, err)
|
||||
}
|
||||
elapsed := time.Since(start)
|
||||
|
||||
// store PID
|
||||
if cmd.Process != nil {
|
||||
os.WriteFile(filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.pid", id)),
|
||||
[]byte(strconv.Itoa(cmd.Process.Pid)), 0o644)
|
||||
}
|
||||
|
||||
o.mu.Lock()
|
||||
o.clones[id] = &cloneInfo{
|
||||
ID: id,
|
||||
Machine: m,
|
||||
Cancel: cancel,
|
||||
Tap: tapName,
|
||||
}
|
||||
o.mu.Unlock()
|
||||
|
||||
o.log.Infof("clone %d: restored in %s (pid=%d, tap=%s)",
|
||||
id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ——— Status ————————————————————————————————————————————————————————————
|
||||
|
||||
func (o *Orchestrator) Status() {
|
||||
entries, _ := os.ReadDir(o.pidsDir())
|
||||
fmt.Println("=== Running clones ===")
|
||||
for _, e := range entries {
|
||||
if !strings.HasPrefix(e.Name(), "clone-") {
|
||||
continue
|
||||
}
|
||||
data, _ := os.ReadFile(filepath.Join(o.pidsDir(), e.Name()))
|
||||
pid := strings.TrimSpace(string(data))
|
||||
alive := "DEAD"
|
||||
if _, err := os.Stat(fmt.Sprintf("/proc/%s", pid)); err == nil {
|
||||
alive = "alive"
|
||||
}
|
||||
name := strings.TrimSuffix(e.Name(), ".pid")
|
||||
fmt.Printf(" %-12s pid=%-8s %s\n", name, pid, alive)
|
||||
}
|
||||
}
|
||||
|
||||
// ——— Kill ——————————————————————————————————————————————————————————————
|
||||
|
||||
func (o *Orchestrator) Kill() error {
|
||||
// kill in-memory clones
|
||||
o.mu.Lock()
|
||||
for id, c := range o.clones {
|
||||
c.Machine.StopVMM()
|
||||
c.Cancel()
|
||||
destroyTap(c.Tap)
|
||||
delete(o.clones, id)
|
||||
}
|
||||
o.mu.Unlock()
|
||||
|
||||
// also kill any from PID files (from a previous run)
|
||||
entries, _ := os.ReadDir(o.pidsDir())
|
||||
for _, e := range entries {
|
||||
data, err := os.ReadFile(filepath.Join(o.pidsDir(), e.Name()))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
pid, err := strconv.Atoi(strings.TrimSpace(string(data)))
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if p, err := os.FindProcess(pid); err == nil {
|
||||
p.Kill()
|
||||
o.log.Infof("killed pid %d", pid)
|
||||
}
|
||||
os.Remove(filepath.Join(o.pidsDir(), e.Name()))
|
||||
}
|
||||
|
||||
// destroy stale tap devices
|
||||
out, _ := exec.Command("ip", "-o", "link", "show").Output()
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
if idx := strings.Index(line, "fctap"); idx >= 0 {
|
||||
fields := strings.SplitN(line[idx:], ":", 2)
|
||||
if len(fields) > 0 {
|
||||
tap := strings.TrimSpace(fields[0])
|
||||
destroyTap(tap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
o.log.Info("all VMs killed")
|
||||
return nil
|
||||
}
|
||||
|
||||
// ——— Cleanup ——————————————————————————————————————————————————————————
|
||||
|
||||
func (o *Orchestrator) Cleanup() error {
|
||||
o.Kill()
|
||||
os.RemoveAll(o.clonesDir())
|
||||
os.RemoveAll(o.goldenDir())
|
||||
os.RemoveAll(o.pidsDir())
|
||||
|
||||
if o.cfg.Bridge != "none" {
|
||||
_ = run("ip", "link", "del", o.cfg.Bridge)
|
||||
o.log.Infof("removed bridge %s", o.cfg.Bridge)
|
||||
}
|
||||
|
||||
o.log.Infof("cleaned up %s", o.cfg.BaseDir)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ——— Helpers ——————————————————————————————————————————————————————————
|
||||
|
||||
func (o *Orchestrator) nextCloneID() int {
|
||||
max := 0
|
||||
entries, _ := os.ReadDir(o.clonesDir())
|
||||
for _, e := range entries {
|
||||
if n, err := strconv.Atoi(e.Name()); err == nil && n > max {
|
||||
max = n
|
||||
}
|
||||
}
|
||||
return max + 1
|
||||
}
|
||||
|
||||
// reflinkCopy tries cp --reflink=auto, falling back to plain copy.
|
||||
func reflinkCopy(src, dst string) error {
|
||||
err := exec.Command("cp", "--reflink=always", src, dst).Run()
|
||||
if err != nil {
|
||||
// fallback: regular copy
|
||||
return copyFile(src, dst)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyFile(src, dst string) error {
|
||||
in, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer in.Close()
|
||||
|
||||
out, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
_, err = io.Copy(out, in)
|
||||
return err
|
||||
}
|
||||
|
||||
func downloadFile(url, dest string) error {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != 200 {
|
||||
return fmt.Errorf("HTTP %d for %s", resp.StatusCode, url)
|
||||
}
|
||||
f, err := os.Create(dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
_, err = io.Copy(f, resp.Body)
|
||||
return err
|
||||
}
|
||||
Reference in New Issue
Block a user