package orchestrator import ( "context" "fmt" "io" "net/http" "os" "os/exec" "path/filepath" "strconv" "strings" "sync" "time" firecracker "github.com/firecracker-microvm/firecracker-go-sdk" "github.com/firecracker-microvm/firecracker-go-sdk/client/models" log "github.com/sirupsen/logrus" ) // Orchestrator manages golden snapshots and clone VMs. type Orchestrator struct { cfg Config log *log.Entry mu sync.Mutex clones map[int]*cloneInfo } type cloneInfo struct { ID int Machine *firecracker.Machine Cancel context.CancelFunc Tap string } func New(cfg Config) *Orchestrator { return &Orchestrator{ cfg: cfg, log: log.WithField("component", "fc-orch"), clones: make(map[int]*cloneInfo), } } func (o *Orchestrator) goldenDir() string { return filepath.Join(o.cfg.BaseDir, "golden") } func (o *Orchestrator) clonesDir() string { return filepath.Join(o.cfg.BaseDir, "clones") } func (o *Orchestrator) pidsDir() string { return filepath.Join(o.cfg.BaseDir, "pids") } // ——— Init ———————————————————————————————————————————————————————————————— func (o *Orchestrator) Init() error { if err := os.MkdirAll(o.cfg.BaseDir, 0o755); err != nil { return err } // Download kernel if missing if _, err := os.Stat(o.cfg.Kernel); os.IsNotExist(err) { url := "https://s3.amazonaws.com/spec.ccfc.min/ci-artifacts/kernels/x86_64/vmlinux-6.1.bin" o.log.Infof("downloading kernel from %s ...", url) if err := downloadFile(url, o.cfg.Kernel); err != nil { return fmt.Errorf("download kernel: %w", err) } o.log.Infof("kernel saved to %s", o.cfg.Kernel) } // Build rootfs if missing if _, err := os.Stat(o.cfg.Rootfs); os.IsNotExist(err) { o.log.Info("building minimal Alpine rootfs ...") if err := o.buildRootfs(); err != nil { return fmt.Errorf("build rootfs: %w", err) } o.log.Infof("rootfs saved to %s", o.cfg.Rootfs) } o.log.Info("init complete") return nil } func (o *Orchestrator) buildRootfs() error { sizeMB := 512 mnt := filepath.Join(o.cfg.BaseDir, "mnt") // create empty ext4 image if err := run("dd", "if=/dev/zero", "of="+o.cfg.Rootfs, "bs=1M", fmt.Sprintf("count=%d", sizeMB), "status=none"); err != nil { return err } if err := run("mkfs.ext4", "-qF", o.cfg.Rootfs); err != nil { return err } os.MkdirAll(mnt, 0o755) if err := run("mount", "-o", "loop", o.cfg.Rootfs, mnt); err != nil { return err } defer run("umount", mnt) // download and extract Alpine minirootfs alpineVer := "3.20" arch := "x86_64" tarball := fmt.Sprintf("alpine-minirootfs-%s.0-%s.tar.gz", alpineVer, arch) url := fmt.Sprintf("https://dl-cdn.alpinelinux.org/alpine/v%s/releases/%s/%s", alpineVer, arch, tarball) tarPath := filepath.Join(o.cfg.BaseDir, tarball) if err := downloadFile(url, tarPath); err != nil { return fmt.Errorf("download alpine: %w", err) } if err := run("tar", "xzf", tarPath, "-C", mnt); err != nil { return err } // write init script initScript := `#!/bin/sh mount -t proc proc /proc mount -t sysfs sys /sys mount -t devtmpfs devtmpfs /dev ip link set eth0 up 2>/dev/null ` initPath := filepath.Join(mnt, "etc", "init.d", "rcS") os.MkdirAll(filepath.Dir(initPath), 0o755) if err := os.WriteFile(initPath, []byte(initScript), 0o755); err != nil { return err } // write inittab inittab := "::sysinit:/etc/init.d/rcS\nttyS0::respawn:/bin/sh\n" return os.WriteFile(filepath.Join(mnt, "etc", "inittab"), []byte(inittab), 0o644) } // ——— Golden VM —————————————————————————————————————————————————————————— func (o *Orchestrator) Golden() error { if _, err := os.Stat(o.cfg.Kernel); err != nil { return fmt.Errorf("kernel not found — run init first: %w", err) } if _, err := os.Stat(o.cfg.Rootfs); err != nil { return fmt.Errorf("rootfs not found — run init first: %w", err) } goldenDir := o.goldenDir() os.RemoveAll(goldenDir) os.MkdirAll(goldenDir, 0o755) os.MkdirAll(o.pidsDir(), 0o755) // COW copy of rootfs for golden VM goldenRootfs := filepath.Join(goldenDir, "rootfs.ext4") if err := reflinkCopy(o.cfg.Rootfs, goldenRootfs); err != nil { return fmt.Errorf("copy rootfs: %w", err) } sockPath := filepath.Join(goldenDir, "api.sock") os.Remove(sockPath) // remove stale socket // prepare network tap := "fctap0" var netIfaces firecracker.NetworkInterfaces if o.cfg.Bridge != "none" { if err := o.setupBridge(); err != nil { return err } if err := o.createTap(tap); err != nil { return err } defer destroyTap(tap) netIfaces = firecracker.NetworkInterfaces{ firecracker.NetworkInterface{ StaticConfiguration: &firecracker.StaticNetworkConfiguration{ MacAddress: "AA:FC:00:00:00:01", HostDevName: tap, }, }, } } vcpus := o.cfg.VCPUs mem := o.cfg.MemMiB rootDriveID := "rootfs" isRoot := true isRO := false trackDirty := true fcCfg := firecracker.Config{ SocketPath: sockPath, KernelImagePath: o.cfg.Kernel, KernelArgs: o.cfg.BootArgs, MachineCfg: models.MachineConfiguration{ VcpuCount: &vcpus, MemSizeMib: &mem, TrackDirtyPages: trackDirty, }, Drives: []models.Drive{ { DriveID: &rootDriveID, PathOnHost: &goldenRootfs, IsRootDevice: &isRoot, IsReadOnly: &isRO, }, }, NetworkInterfaces: netIfaces, } ctx, cancel := context.WithCancel(context.Background()) defer cancel() // find firecracker binary fcBin, err := exec.LookPath(o.cfg.FCBin) if err != nil { return fmt.Errorf("firecracker binary not found: %w", err) } cmd := firecracker.VMCommandBuilder{}. WithBin(fcBin). WithSocketPath(sockPath). Build(ctx) m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd)) if err != nil { return fmt.Errorf("new machine: %w", err) } o.log.Info("starting golden VM ...") if err := m.Start(ctx); err != nil { return fmt.Errorf("start golden VM: %w", err) } // write PID for tracking if m.Cfg.VMID != "" { os.WriteFile(filepath.Join(o.pidsDir(), "golden.pid"), []byte(fmt.Sprintf("%d", cmd.Process.Pid)), 0o644) } o.log.Info("golden VM booted, letting it settle ...") time.Sleep(3 * time.Second) // pause o.log.Info("pausing golden VM ...") if err := m.PauseVM(ctx); err != nil { return fmt.Errorf("pause VM: %w", err) } // snapshot memPath := filepath.Join(goldenDir, "mem") vmstatePath := filepath.Join(goldenDir, "vmstate") o.log.Info("creating snapshot ...") if err := m.CreateSnapshot(ctx, memPath, vmstatePath); err != nil { return fmt.Errorf("create snapshot: %w", err) } // log sizes for _, f := range []string{memPath, vmstatePath} { if info, err := os.Stat(f); err == nil { o.log.Infof(" %s: %d MiB", filepath.Base(f), info.Size()/(1024*1024)) } } // kill golden VM — we only need the artifacts m.StopVMM() cancel() o.log.Infof("golden snapshot ready at %s/{vmstate,mem}", goldenDir) return nil } // ——— Spawn clones —————————————————————————————————————————————————————— func (o *Orchestrator) Spawn(count int) error { goldenDir := o.goldenDir() for _, f := range []string{"vmstate", "mem"} { if _, err := os.Stat(filepath.Join(goldenDir, f)); err != nil { return fmt.Errorf("golden %s not found — run golden first", f) } } os.MkdirAll(o.clonesDir(), 0o755) os.MkdirAll(o.pidsDir(), 0o755) if o.cfg.Bridge != "none" { if err := o.setupBridge(); err != nil { return err } } for i := 0; i < count; i++ { id := o.nextCloneID() if err := o.spawnOne(id); err != nil { o.log.Errorf("clone %d failed: %v", id, err) continue } } o.log.Infof("spawned %d clone(s) from golden snapshot", count) o.Status() return nil } func (o *Orchestrator) spawnOne(id int) error { goldenDir := o.goldenDir() cloneDir := filepath.Join(o.clonesDir(), strconv.Itoa(id)) os.MkdirAll(cloneDir, 0o755) sockPath := filepath.Join(cloneDir, "api.sock") os.Remove(sockPath) // --- COW rootfs --- cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4") if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil { return fmt.Errorf("copy rootfs: %w", err) } // --- Memory: point at the shared golden mem file --- // Firecracker uses MAP_PRIVATE → kernel COW. No copy needed. sharedMem := filepath.Join(goldenDir, "mem") // --- vmstate: small, cheap copy --- cloneVmstate := filepath.Join(cloneDir, "vmstate") if err := copyFile(filepath.Join(goldenDir, "vmstate"), cloneVmstate); err != nil { return fmt.Errorf("copy vmstate: %w", err) } // --- Networking --- tapName := fmt.Sprintf("fctap%d", id) var netIfaces firecracker.NetworkInterfaces if o.cfg.Bridge != "none" { if err := o.createTap(tapName); err != nil { return err } mac := fmt.Sprintf("AA:FC:00:00:%02X:%02X", id/256, id%256) netIfaces = firecracker.NetworkInterfaces{ firecracker.NetworkInterface{ StaticConfiguration: &firecracker.StaticNetworkConfiguration{ MacAddress: mac, HostDevName: tapName, }, }, } } // --- Restore from snapshot --- ctx, cancel := context.WithCancel(context.Background()) fcBin, err := exec.LookPath(o.cfg.FCBin) if err != nil { cancel() return fmt.Errorf("firecracker not found: %w", err) } cmd := firecracker.VMCommandBuilder{}. WithBin(fcBin). WithSocketPath(sockPath). Build(ctx) vcpus := o.cfg.VCPUs mem := o.cfg.MemMiB fcCfg := firecracker.Config{ SocketPath: sockPath, MachineCfg: models.MachineConfiguration{ VcpuCount: &vcpus, MemSizeMib: &mem, }, NetworkInterfaces: netIfaces, // Snapshot config: tells the SDK to restore instead of fresh boot. Snapshot: firecracker.SnapshotConfig{ MemFilePath: sharedMem, SnapshotPath: cloneVmstate, ResumeVM: true, }, } m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd)) if err != nil { cancel() return fmt.Errorf("new machine: %w", err) } start := time.Now() if err := m.Start(ctx); err != nil { cancel() return fmt.Errorf("restore clone %d: %w", id, err) } elapsed := time.Since(start) // store PID if cmd.Process != nil { os.WriteFile(filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.pid", id)), []byte(strconv.Itoa(cmd.Process.Pid)), 0o644) } o.mu.Lock() o.clones[id] = &cloneInfo{ ID: id, Machine: m, Cancel: cancel, Tap: tapName, } o.mu.Unlock() o.log.Infof("clone %d: restored in %s (pid=%d, tap=%s)", id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName) return nil } // ——— Status ———————————————————————————————————————————————————————————— func (o *Orchestrator) Status() { entries, _ := os.ReadDir(o.pidsDir()) fmt.Println("=== Running clones ===") for _, e := range entries { if !strings.HasPrefix(e.Name(), "clone-") { continue } data, _ := os.ReadFile(filepath.Join(o.pidsDir(), e.Name())) pid := strings.TrimSpace(string(data)) alive := "DEAD" if _, err := os.Stat(fmt.Sprintf("/proc/%s", pid)); err == nil { alive = "alive" } name := strings.TrimSuffix(e.Name(), ".pid") fmt.Printf(" %-12s pid=%-8s %s\n", name, pid, alive) } } // ——— Kill —————————————————————————————————————————————————————————————— func (o *Orchestrator) Kill() error { // kill in-memory clones o.mu.Lock() for id, c := range o.clones { c.Machine.StopVMM() c.Cancel() destroyTap(c.Tap) delete(o.clones, id) } o.mu.Unlock() // also kill any from PID files (from a previous run) entries, _ := os.ReadDir(o.pidsDir()) for _, e := range entries { data, err := os.ReadFile(filepath.Join(o.pidsDir(), e.Name())) if err != nil { continue } pid, err := strconv.Atoi(strings.TrimSpace(string(data))) if err != nil { continue } if p, err := os.FindProcess(pid); err == nil { p.Kill() o.log.Infof("killed pid %d", pid) } os.Remove(filepath.Join(o.pidsDir(), e.Name())) } // destroy stale tap devices out, _ := exec.Command("ip", "-o", "link", "show").Output() for _, line := range strings.Split(string(out), "\n") { if idx := strings.Index(line, "fctap"); idx >= 0 { fields := strings.SplitN(line[idx:], ":", 2) if len(fields) > 0 { tap := strings.TrimSpace(fields[0]) destroyTap(tap) } } } o.log.Info("all VMs killed") return nil } // ——— Cleanup —————————————————————————————————————————————————————————— func (o *Orchestrator) Cleanup() error { o.Kill() os.RemoveAll(o.clonesDir()) os.RemoveAll(o.goldenDir()) os.RemoveAll(o.pidsDir()) if o.cfg.Bridge != "none" { _ = run("ip", "link", "del", o.cfg.Bridge) o.log.Infof("removed bridge %s", o.cfg.Bridge) } o.log.Infof("cleaned up %s", o.cfg.BaseDir) return nil } // ——— Helpers —————————————————————————————————————————————————————————— func (o *Orchestrator) nextCloneID() int { max := 0 entries, _ := os.ReadDir(o.clonesDir()) for _, e := range entries { if n, err := strconv.Atoi(e.Name()); err == nil && n > max { max = n } } return max + 1 } // reflinkCopy tries cp --reflink=auto, falling back to plain copy. func reflinkCopy(src, dst string) error { err := exec.Command("cp", "--reflink=always", src, dst).Run() if err != nil { // fallback: regular copy return copyFile(src, dst) } return nil } func copyFile(src, dst string) error { in, err := os.Open(src) if err != nil { return err } defer in.Close() out, err := os.Create(dst) if err != nil { return err } defer out.Close() _, err = io.Copy(out, in) return err } func downloadFile(url, dest string) error { resp, err := http.Get(url) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != 200 { return fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) } f, err := os.Create(dest) if err != nil { return err } defer f.Close() _, err = io.Copy(f, resp.Body) return err }