package orchestrator import ( "context" "fmt" "io" "net/http" "os" "os/exec" "path/filepath" "strconv" "strings" "sync" "syscall" "time" firecracker "github.com/firecracker-microvm/firecracker-go-sdk" "github.com/firecracker-microvm/firecracker-go-sdk/client/models" log "github.com/sirupsen/logrus" ) // Orchestrator manages golden snapshots and clone VMs. type Orchestrator struct { cfg Config log *log.Entry mu sync.Mutex clones map[int]*cloneInfo } type cloneInfo struct { ID int Machine *firecracker.Machine Cancel context.CancelFunc Tap string } func New(cfg Config) *Orchestrator { return &Orchestrator{ cfg: cfg, log: log.WithField("component", "fc-orch"), clones: make(map[int]*cloneInfo), } } func (o *Orchestrator) goldenDir(tag string) string { return filepath.Join(o.cfg.BaseDir, "golden", tag) } func (o *Orchestrator) clonesDir() string { return filepath.Join(o.cfg.BaseDir, "clones") } func (o *Orchestrator) pidsDir() string { return filepath.Join(o.cfg.BaseDir, "pids") } // ——— Init ———————————————————————————————————————————————————————————————— func (o *Orchestrator) Init(distro string) error { if err := os.MkdirAll(o.cfg.BaseDir, 0o755); err != nil { return err } // Download kernel if missing if _, err := os.Stat(o.cfg.Kernel); os.IsNotExist(err) { url := o.cfg.KernelURL o.log.Infof("downloading kernel from %s ...", url) if err := downloadFile(url, o.cfg.Kernel); err != nil { return fmt.Errorf("download kernel: %w", err) } o.log.Infof("kernel saved to %s", o.cfg.Kernel) } // Build rootfs if missing rootfsPath := o.cfg.RootfsPath(distro) if _, err := os.Stat(rootfsPath); os.IsNotExist(err) { o.log.Infof("building minimal %s rootfs ...", distro) if err := o.buildRootfs(distro, rootfsPath); err != nil { return fmt.Errorf("build rootfs: %w", err) } o.log.Infof("rootfs saved to %s", rootfsPath) } o.log.Info("init complete") return nil } func (o *Orchestrator) buildRootfs(distro, rootfsPath string) error { sizeMB := 512 if distro == "debian" || distro == "ubuntu" { sizeMB = 2048 } mnt := filepath.Join(o.cfg.BaseDir, "mnt") // create empty ext4 image o.log.Infof("running: dd if=/dev/zero of=%s bs=1M count=%d status=none", rootfsPath, sizeMB) if err := run("dd", "if=/dev/zero", "of="+rootfsPath, "bs=1M", fmt.Sprintf("count=%d", sizeMB), "status=none"); err != nil { return err } o.log.Infof("running: mkfs.ext4 -qF %s", rootfsPath) if err := run("mkfs.ext4", "-qF", rootfsPath); err != nil { return err } os.MkdirAll(mnt, 0o755) o.log.Infof("running: mount -o loop %s %s", rootfsPath, mnt) if err := run("mount", "-o", "loop", rootfsPath, mnt); err != nil { return err } defer func() { o.log.Infof("running: umount %s", mnt) run("umount", mnt) }() // download and extract minirootfs switch distro { case "alpine": alpineVer := "3.20" arch := "x86_64" tarball := fmt.Sprintf("alpine-minirootfs-%s.0-%s.tar.gz", alpineVer, arch) url := fmt.Sprintf("https://dl-cdn.alpinelinux.org/alpine/v%s/releases/%s/%s", alpineVer, arch, tarball) tarPath := filepath.Join(o.cfg.BaseDir, tarball) o.log.Infof("downloading http request: GET %s to %s", url, tarPath) if err := downloadFile(url, tarPath); err != nil { return fmt.Errorf("download alpine: %w", err) } o.log.Infof("running: tar xzf %s -C %s", tarPath, mnt) if err := run("tar", "xzf", tarPath, "-C", mnt); err != nil { return err } case "debian": tarball := "debian-12-nocloud-amd64.tar.xz" url := "https://cloud.debian.org/images/cloud/bookworm/latest/" + tarball tarPath := filepath.Join(o.cfg.BaseDir, tarball) o.log.Infof("downloading http request: GET %s to %s", url, tarPath) if err := downloadFile(url, tarPath); err != nil { return fmt.Errorf("download debian: %w", err) } o.log.Infof("running: tar xJf %s -C %s", tarPath, mnt) if err := run("tar", "xJf", tarPath, "-C", mnt); err != nil { return err } case "ubuntu": tarball := "ubuntu-base-24.04.4-base-amd64.tar.gz" url := "https://cdimage.ubuntu.com/ubuntu-base/releases/24.04/release/" + tarball tarPath := filepath.Join(o.cfg.BaseDir, tarball) o.log.Infof("downloading http request: GET %s to %s", url, tarPath) if err := downloadFile(url, tarPath); err != nil { return fmt.Errorf("download ubuntu: %w", err) } o.log.Infof("running: tar xzf %s -C %s", tarPath, mnt) if err := run("tar", "xzf", tarPath, "-C", mnt); err != nil { return err } o.log.Info("installing essential packages in ubuntu chroot ...") if err := installUbuntuPackages(mnt, o.log); err != nil { return fmt.Errorf("install ubuntu packages: %w", err) } default: return fmt.Errorf("unsupported distro: %s", distro) } // write fc-net-init daemon: polls MMDS for IP config and applies it. // Always embedded — harmless if MMDS is never populated (sleeps 1 s/loop). // Captured in the golden snapshot so it runs on every clone resume too. netInitScript := `#!/bin/sh # Poll Firecracker MMDS for network config, apply it, then exit. # Runs in background; loops until MMDS responds (survives snapshot resume). ip link set eth0 up 2>/dev/null ip route add 169.254.169.254 dev eth0 2>/dev/null ip addr add 169.254.169.2/32 dev eth0 2>/dev/null while true; do ip=$(wget -q -T1 -O- http://169.254.169.254/ip 2>/dev/null | tr -d '"') [ -n "$ip" ] || { sleep 1; continue; } gw=$(wget -q -T1 -O- http://169.254.169.254/gw 2>/dev/null | tr -d '"') dns=$(wget -q -T1 -O- http://169.254.169.254/dns 2>/dev/null | tr -d '"') ip addr flush dev eth0 2>/dev/null ip addr add "$ip" dev eth0 2>/dev/null ip route add default via "$gw" dev eth0 2>/dev/null printf "nameserver %s\n" "$dns" > /etc/resolv.conf break done ` os.MkdirAll(filepath.Join(mnt, "sbin"), 0o755) if err := os.WriteFile(filepath.Join(mnt, "sbin", "fc-net-init"), []byte(netInitScript), 0o755); err != nil { return err } if distro == "alpine" { // write init script initScript := `#!/bin/sh mount -t proc proc /proc mount -t sysfs sys /sys mount -t devtmpfs devtmpfs /dev ip link set eth0 up 2>/dev/null ip route add 169.254.169.254 dev eth0 2>/dev/null /sbin/fc-net-init & ` initPath := filepath.Join(mnt, "etc", "init.d", "rcS") os.MkdirAll(filepath.Dir(initPath), 0o755) if err := os.WriteFile(initPath, []byte(initScript), 0o755); err != nil { return err } // write inittab inittab := "::sysinit:/etc/init.d/rcS\nttyS0::respawn:/bin/sh\n" return os.WriteFile(filepath.Join(mnt, "etc", "inittab"), []byte(inittab), 0o644) } else { // systemd-based distributions (Debian, Ubuntu) svc := `[Unit] Description=Firecracker Network Init After=basic.target [Service] Type=simple ExecStart=/sbin/fc-net-init RemainAfterExit=yes [Install] WantedBy=multi-user.target ` svcPath := filepath.Join(mnt, "etc", "systemd", "system", "fc-net-init.service") os.MkdirAll(filepath.Dir(svcPath), 0o755) if err := os.WriteFile(svcPath, []byte(svc), 0o644); err != nil { return err } // Enable service dynamically wantsDir := filepath.Join(mnt, "etc", "systemd", "system", "multi-user.target.wants") os.MkdirAll(wantsDir, 0o755) os.Symlink("/etc/systemd/system/fc-net-init.service", filepath.Join(wantsDir, "fc-net-init.service")) //nolint:errcheck // Mask serial-getty@ttyS0.service and the udev device unit it depends on. // In Firecracker, udev never runs so dev-ttyS0.device never activates, // causing a 90-second systemd timeout. We replace it entirely with a // custom service that uses ConditionPathExists (filesystem check) instead. systemdDir := filepath.Join(mnt, "etc", "systemd", "system") os.Symlink("/dev/null", filepath.Join(systemdDir, "serial-getty@ttyS0.service")) //nolint:errcheck os.Symlink("/dev/null", filepath.Join(systemdDir, "dev-ttyS0.device")) //nolint:errcheck // Custom console service: no udev dependency, autologin as root. consoleSvc := `[Unit] Description=Serial Console (ttyS0) After=basic.target ConditionPathExists=/dev/ttyS0 [Service] ExecStart=/sbin/agetty --autologin root --noclear ttyS0 vt220 Restart=always RestartSec=1 [Install] WantedBy=multi-user.target ` consoleSvcPath := filepath.Join(systemdDir, "fc-console.service") os.WriteFile(consoleSvcPath, []byte(consoleSvc), 0o644) //nolint:errcheck wantsDir2 := filepath.Join(systemdDir, "multi-user.target.wants") os.MkdirAll(wantsDir2, 0o755) os.Symlink("/etc/systemd/system/fc-console.service", filepath.Join(wantsDir2, "fc-console.service")) //nolint:errcheck // Clear root password for auto-login on console shadowPath := filepath.Join(mnt, "etc", "shadow") if shadowBytes, err := os.ReadFile(shadowPath); err == nil { lines := strings.Split(string(shadowBytes), "\n") for i, line := range lines { if strings.HasPrefix(line, "root:") { parts := strings.Split(line, ":") if len(parts) > 1 { parts[1] = "" lines[i] = strings.Join(parts, ":") } } } os.WriteFile(shadowPath, []byte(strings.Join(lines, "\n")), 0o640) //nolint:errcheck } // Write fstab so systemd mounts virtual filesystems at boot. // Minimal tarball rootfs has no fstab; without it /proc, /sys, /dev are not mounted. fstab := "proc\t/proc\tproc\tdefaults\t0 0\nsysfs\t/sys\tsysfs\tdefaults\t0 0\ndevtmpfs\t/dev\tdevtmpfs\tdefaults\t0 0\n" os.WriteFile(filepath.Join(mnt, "etc", "fstab"), []byte(fstab), 0o644) //nolint:errcheck } return nil } // ——— Golden VM —————————————————————————————————————————————————————————— func (o *Orchestrator) Golden(tag string, distro string) error { if _, err := os.Stat(o.cfg.Kernel); err != nil { return fmt.Errorf("kernel not found — run init first: %w", err) } rootfsPath := o.cfg.RootfsPath(distro) if _, err := os.Stat(rootfsPath); err != nil { return fmt.Errorf("rootfs not found — run init first: %w", err) } goldenDir := o.goldenDir(tag) os.RemoveAll(goldenDir) os.MkdirAll(goldenDir, 0o755) os.MkdirAll(o.pidsDir(), 0o755) // COW copy of rootfs for golden VM goldenRootfs := filepath.Join(goldenDir, "rootfs.ext4") if err := reflinkCopy(rootfsPath, goldenRootfs); err != nil { return fmt.Errorf("copy rootfs: %w", err) } sockPath := filepath.Join(goldenDir, "api.sock") os.Remove(sockPath) // remove stale socket // prepare network tap := "fctap0" var netIfaces firecracker.NetworkInterfaces if o.cfg.Bridge != "none" { if err := o.setupBridge(); err != nil { return err } if err := o.createTap(tap); err != nil { return err } defer destroyTap(tap) netIfaces = firecracker.NetworkInterfaces{ firecracker.NetworkInterface{ StaticConfiguration: &firecracker.StaticNetworkConfiguration{ MacAddress: "AA:FC:00:00:00:01", HostDevName: tap, }, AllowMMDS: true, }, } } vcpus := o.cfg.VCPUs mem := o.cfg.MemMiB rootDriveID := "rootfs" isRoot := true isRO := false trackDirty := true fcCfg := firecracker.Config{ SocketPath: sockPath, KernelImagePath: o.cfg.Kernel, KernelArgs: o.cfg.BootArgs, MachineCfg: models.MachineConfiguration{ VcpuCount: &vcpus, MemSizeMib: &mem, TrackDirtyPages: trackDirty, }, Drives: []models.Drive{ { DriveID: &rootDriveID, PathOnHost: &goldenRootfs, IsRootDevice: &isRoot, IsReadOnly: &isRO, }, }, NetworkInterfaces: netIfaces, LogPath: sockPath + ".log", LogLevel: "Debug", FifoLogWriter: o.log.Writer(), } ctx, cancel := context.WithCancel(context.Background()) defer cancel() // find firecracker binary fcBin, err := exec.LookPath(o.cfg.FCBin) if err != nil { return fmt.Errorf("firecracker binary not found: %w", err) } cmd := firecracker.VMCommandBuilder{}. WithBin(fcBin). WithSocketPath(sockPath). Build(ctx) m, err := firecracker.NewMachine(ctx, fcCfg, firecracker.WithProcessRunner(cmd), firecracker.WithLogger(o.log), ) if err != nil { return fmt.Errorf("new machine: %w", err) } o.log.Info("starting golden VM ...") if err := m.Start(ctx); err != nil { return fmt.Errorf("start golden VM: %w", err) } // write PID for tracking if m.Cfg.VMID != "" { os.WriteFile(filepath.Join(o.pidsDir(), "golden.pid"), []byte(fmt.Sprintf("%d", cmd.Process.Pid)), 0o644) } settleTime := 3 * time.Second if distro == "debian" || distro == "ubuntu" { // systemd takes significantly longer to reach multi-user.target than // Alpine's busybox init. Snapshot too early and serial-getty@ttyS0 // won't have started yet, leaving the console unresponsive on resume. settleTime = 20 * time.Second } o.log.Infof("golden VM booted, letting it settle (%s) ...", settleTime) time.Sleep(settleTime) // pause o.log.Info("pausing golden VM ...") if err := m.PauseVM(ctx); err != nil { return fmt.Errorf("pause VM: %w", err) } // snapshot memPath := filepath.Join(goldenDir, "mem") vmstatePath := filepath.Join(goldenDir, "vmstate") o.log.Info("creating snapshot ...") if err := m.CreateSnapshot(ctx, memPath, vmstatePath); err != nil { return fmt.Errorf("create snapshot: %w", err) } // log sizes for _, f := range []string{memPath, vmstatePath} { if info, err := os.Stat(f); err == nil { o.log.Infof(" %s: %d MiB", filepath.Base(f), info.Size()/(1024*1024)) } } // kill golden VM — we only need the artifacts m.StopVMM() cancel() o.log.Infof("golden snapshot ready at %s/{vmstate,mem}", goldenDir) return nil } // GoldenTags returns a list of all existing golden VM tags. func (o *Orchestrator) GoldenTags() []string { goldenDir := filepath.Join(o.cfg.BaseDir, "golden") entries, err := os.ReadDir(goldenDir) if err != nil { return nil } var tags []string for _, e := range entries { if e.IsDir() { tags = append(tags, e.Name()) } } return tags } // ——— Spawn clones —————————————————————————————————————————————————————— func (o *Orchestrator) Spawn(count int, tag string) error { goldenDir := o.goldenDir(tag) for _, f := range []string{"vmstate", "mem"} { if _, err := os.Stat(filepath.Join(goldenDir, f)); err != nil { return fmt.Errorf("golden %s not found for tag %s — run golden first", f, tag) } } os.MkdirAll(o.clonesDir(), 0o755) os.MkdirAll(o.pidsDir(), 0o755) if o.cfg.Bridge != "none" { if err := o.setupBridge(); err != nil { return err } } for i := 0; i < count; i++ { id := o.nextCloneID() if err := o.spawnOne(id, o.cfg.AutoNetConfig, tag); err != nil { o.log.Errorf("clone %d failed: %v", id, err) continue } } o.log.Infof("spawned %d clone(s) from golden snapshot", count) o.Status() return nil } // SpawnSingle spawns exactly one new clone and returns its ID. // It is safe to call from multiple goroutines (nextCloneID is serialised by the // filesystem scan, and each clone gets its own directory/tap). // SpawnSingle spawns one clone. net controls whether the guest receives // automatic IP configuration via MMDS (overrides FC_AUTO_NET_CONFIG for this // clone). Pass cfg.AutoNetConfig to preserve the global default. func (o *Orchestrator) SpawnSingle(net bool, tag string) (int, error) { goldenDir := o.goldenDir(tag) for _, f := range []string{"vmstate", "mem"} { if _, err := os.Stat(filepath.Join(goldenDir, f)); err != nil { return 0, fmt.Errorf("golden %s not found for tag %s — run golden first", f, tag) } } os.MkdirAll(o.clonesDir(), 0o755) os.MkdirAll(o.pidsDir(), 0o755) if o.cfg.Bridge != "none" { if err := o.setupBridge(); err != nil { return 0, err } } id := o.nextCloneID() if err := o.spawnOne(id, net, tag); err != nil { return 0, err } return id, nil } // KillClone kills a single clone by ID: terminates its proxy process, // destroys its tap device, and removes its working directory. func (o *Orchestrator) KillClone(id int) error { pidFile := filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.proxy.pid", id)) if data, err := os.ReadFile(pidFile); err == nil { if pid, err := strconv.Atoi(strings.TrimSpace(string(data))); err == nil { if p, err := os.FindProcess(pid); err == nil { _ = p.Kill() o.log.Infof("clone %d: killed proxy pid %d", id, pid) } } os.Remove(pidFile) //nolint:errcheck } tapName := fmt.Sprintf("fctap%d", id) destroyTap(tapName) os.RemoveAll(filepath.Join(o.clonesDir(), strconv.Itoa(id))) //nolint:errcheck o.log.Infof("clone %d: destroyed", id) return nil } func (o *Orchestrator) spawnOne(id int, net bool, tag string) error { goldenDir := o.goldenDir(tag) cloneDir := filepath.Join(o.clonesDir(), strconv.Itoa(id)) os.MkdirAll(cloneDir, 0o755) os.WriteFile(filepath.Join(cloneDir, "tag"), []byte(tag), 0o644) //nolint:errcheck sockPath := filepath.Join(cloneDir, "api.sock") os.Remove(sockPath) // --- COW rootfs --- cloneRootfs := filepath.Join(cloneDir, "rootfs.ext4") o.log.Infof("clone %d: running: cp --reflink=always %s %s", id, filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs) if err := reflinkCopy(filepath.Join(goldenDir, "rootfs.ext4"), cloneRootfs); err != nil { return fmt.Errorf("copy rootfs: %w", err) } // --- vmstate: small, cheap copy --- cloneVmstate := filepath.Join(cloneDir, "vmstate") if err := copyFile(filepath.Join(goldenDir, "vmstate"), cloneVmstate); err != nil { return fmt.Errorf("copy vmstate: %w", err) } // --- Networking --- tapName := fmt.Sprintf("fctap%d", id) if o.cfg.Bridge != "none" { o.log.Infof("clone %d: running: ip tuntap add dev %s mode tap", id, tapName) o.log.Infof("clone %d: running: ip link set %s up", id, tapName) o.log.Infof("clone %d: running: ip link set %s master %s", id, tapName, o.cfg.Bridge) if err := o.createTap(tapName); err != nil { return err } } // --- Launch console proxy (detached daemon) --- // The proxy owns the full VM lifecycle: it starts firecracker with a PTY, // loads the snapshot, and serves cloneDir/console.sock until the VM exits. selfExe, err := os.Executable() if err != nil { return fmt.Errorf("resolve self path: %w", err) } proxyArgs := []string{"_console-proxy", "--id", strconv.Itoa(id), "--tag", tag} if o.cfg.Bridge != "none" { proxyArgs = append(proxyArgs, "--tap", tapName) } proxyCmd := exec.Command(selfExe, proxyArgs...) // New session: proxy is detached from our terminal and survives our exit. proxyCmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} proxyCmd.Stdin = nil proxyCmd.Stdout = nil proxyCmd.Stderr = nil // Build proxy env: inherit parent env, then force FC_AUTO_NET_CONFIG to // match the per-clone net flag so the proxy picks it up via DefaultConfig(). proxyEnv := make([]string, 0, len(os.Environ())+1) for _, kv := range os.Environ() { if !strings.HasPrefix(kv, "FC_AUTO_NET_CONFIG=") { proxyEnv = append(proxyEnv, kv) } } if net { proxyEnv = append(proxyEnv, "FC_AUTO_NET_CONFIG=1") } proxyCmd.Env = proxyEnv if err := proxyCmd.Start(); err != nil { return fmt.Errorf("start console proxy: %w", err) } os.WriteFile(filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.proxy.pid", id)), []byte(strconv.Itoa(proxyCmd.Process.Pid)), 0o644) //nolint:errcheck // Wait for the console socket to appear — it is created by the proxy once // the VM is running, so this also gates on successful snapshot restore. consoleSockPath := filepath.Join(cloneDir, "console.sock") if err := waitForSocket(consoleSockPath, 15*time.Second); err != nil { return fmt.Errorf("clone %d: %w", id, err) } o.log.Infof("clone %d: ready (proxy pid=%d, tap=%s, console=%s)", id, proxyCmd.Process.Pid, tapName, consoleSockPath) return nil } // waitForSocket polls path every 50 ms until it appears or timeout elapses. func waitForSocket(path string, timeout time.Duration) error { deadline := time.Now().Add(timeout) for time.Now().Before(deadline) { if _, err := os.Stat(path); err == nil { return nil } time.Sleep(50 * time.Millisecond) } return fmt.Errorf("timed out waiting for %s", path) } // ——— Status ———————————————————————————————————————————————————————————— func (o *Orchestrator) Status() { entries, _ := os.ReadDir(o.pidsDir()) fmt.Println("=== Running clones ===") for _, e := range entries { if !strings.HasPrefix(e.Name(), "clone-") { continue } data, _ := os.ReadFile(filepath.Join(o.pidsDir(), e.Name())) pid := strings.TrimSpace(string(data)) alive := "DEAD" if _, err := os.Stat(fmt.Sprintf("/proc/%s", pid)); err == nil { alive = "alive" } name := strings.TrimSuffix(e.Name(), ".pid") fmt.Printf(" %-12s pid=%-8s %s\n", name, pid, alive) } } // ——— Kill —————————————————————————————————————————————————————————————— func (o *Orchestrator) Kill() error { // kill in-memory clones o.mu.Lock() for id, c := range o.clones { c.Machine.StopVMM() c.Cancel() destroyTap(c.Tap) delete(o.clones, id) } o.mu.Unlock() // also kill any from PID files (from a previous run) entries, _ := os.ReadDir(o.pidsDir()) for _, e := range entries { data, err := os.ReadFile(filepath.Join(o.pidsDir(), e.Name())) if err != nil { continue } pid, err := strconv.Atoi(strings.TrimSpace(string(data))) if err != nil { continue } if p, err := os.FindProcess(pid); err == nil { p.Kill() o.log.Infof("killed pid %d", pid) } os.Remove(filepath.Join(o.pidsDir(), e.Name())) } // destroy stale tap devices out, _ := exec.Command("ip", "-o", "link", "show").Output() for _, line := range strings.Split(string(out), "\n") { if idx := strings.Index(line, "fctap"); idx >= 0 { fields := strings.SplitN(line[idx:], ":", 2) if len(fields) > 0 { tap := strings.TrimSpace(fields[0]) destroyTap(tap) } } } o.log.Info("all VMs killed") return nil } // ——— Cleanup —————————————————————————————————————————————————————————— func (o *Orchestrator) Cleanup() error { o.Kill() os.RemoveAll(o.clonesDir()) os.RemoveAll(filepath.Join(o.cfg.BaseDir, "golden")) os.RemoveAll(o.pidsDir()) if o.cfg.Bridge != "none" { _ = run("ip", "link", "del", o.cfg.Bridge) o.log.Infof("removed bridge %s", o.cfg.Bridge) } o.log.Infof("cleaned up %s", o.cfg.BaseDir) return nil } // ——— Helpers —————————————————————————————————————————————————————————— // installUbuntuPackages bind-mounts the virtual filesystems into mnt, then // runs apt-get inside the chroot to install the minimal toolset required for // network operation and general use. Bind mounts are always cleaned up on // return regardless of whether apt-get succeeds. func installUbuntuPackages(mnt string, logger *log.Entry) error { type bm struct{ fstype, src, dst string } mounts := []bm{ {"proc", "proc", "proc"}, {"sysfs", "sysfs", "sys"}, {"devtmpfs", "devtmpfs", "dev"}, {"devpts", "devpts", "dev/pts"}, } // mount in order; on any failure unmount whatever succeeded and return. for i, m := range mounts { dst := filepath.Join(mnt, m.dst) os.MkdirAll(dst, 0o755) logger.Infof("running: mount -t %s %s %s", m.fstype, m.src, dst) if err := run("mount", "-t", m.fstype, m.src, dst); err != nil { for j := i - 1; j >= 0; j-- { logger.Infof("running: umount %s", filepath.Join(mnt, mounts[j].dst)) run("umount", filepath.Join(mnt, mounts[j].dst)) //nolint:errcheck } return fmt.Errorf("mount %s: %w", m.dst, err) } } defer func() { for i := len(mounts) - 1; i >= 0; i-- { logger.Infof("running: umount %s", filepath.Join(mnt, mounts[i].dst)) run("umount", filepath.Join(mnt, mounts[i].dst)) //nolint:errcheck } }() // Provide DNS resolution inside the chroot so apt-get can reach the network. if data, err := os.ReadFile("/etc/resolv.conf"); err == nil { os.WriteFile(filepath.Join(mnt, "etc/resolv.conf"), data, 0o644) //nolint:errcheck } pkgs := "bash curl iproute2 wget ca-certificates systemd systemd-sysv util-linux" script := "DEBIAN_FRONTEND=noninteractive apt-get update -q && " + "DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends " + pkgs + " && " + "apt-get clean && rm -rf /var/lib/apt/lists/*" logger.Infof("running: chroot %s /bin/sh -c %q", mnt, script) cmd := exec.Command("chroot", mnt, "/bin/sh", "-c", script) cmd.Stdout = logger.Writer() cmd.Stderr = logger.Writer() return cmd.Run() } func (o *Orchestrator) nextCloneID() int { max := 0 entries, _ := os.ReadDir(o.clonesDir()) for _, e := range entries { if n, err := strconv.Atoi(e.Name()); err == nil && n > max { max = n } } return max + 1 } // reflinkCopy tries cp --reflink=auto, falling back to plain copy. func reflinkCopy(src, dst string) error { err := exec.Command("cp", "--reflink=always", src, dst).Run() if err != nil { // fallback: regular copy return copyFile(src, dst) } return nil } func copyFile(src, dst string) error { in, err := os.Open(src) if err != nil { return err } defer in.Close() out, err := os.Create(dst) if err != nil { return err } defer out.Close() _, err = io.Copy(out, in) return err } func downloadFile(url, dest string) error { resp, err := http.Get(url) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != 200 { return fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) } f, err := os.Create(dest) if err != nil { return err } defer f.Close() _, err = io.Copy(f, resp.Body) return err }