diff --git a/orchestrator/config.go b/orchestrator/config.go index b6323fb..c739d2a 100644 --- a/orchestrator/config.go +++ b/orchestrator/config.go @@ -34,7 +34,7 @@ func DefaultConfig() Config { GuestPrefix: envOr("FC_GUEST_PREFIX", "172.30.0"), GuestGW: envOr("FC_GUEST_GW", "172.30.0.1"), AutoNetConfig: envOr("FC_AUTO_NET_CONFIG", "") == "1", - BootArgs: "console=ttyS0 reboot=k panic=1 pci=off i8042.noaux quiet loglevel=0", + BootArgs: "console=ttyS0 reboot=k panic=1 pci=off i8042.noaux", } c.Kernel = envOr("FC_KERNEL", c.BaseDir+"/vmlinux") c.KernelURL = envOr("FC_KERNEL_URL", diff --git a/orchestrator/console.go b/orchestrator/console.go index 2a1eeac..46be0d9 100644 --- a/orchestrator/console.go +++ b/orchestrator/console.go @@ -164,6 +164,17 @@ func RunConsoleProxy(cfg Config, id int, tapName, tag string) error { logger.Infof("clone %d: restored in %s (pid=%d, tap=%s)", id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName) + // --- Open console log (captures all serial output from boot) --- + consoleLogPath := filepath.Join(cloneDir, "console.log") + consoleLog, err := os.OpenFile(consoleLogPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) + if err != nil { + logger.Warnf("could not open console log: %v", err) + consoleLog = nil + } + if consoleLog != nil { + defer consoleLog.Close() + } + // --- Create console socket --- os.Remove(consoleSockPath) //nolint:errcheck listener, err := net.Listen("unix", consoleSockPath) @@ -191,7 +202,7 @@ func RunConsoleProxy(cfg Config, id int, tapName, tag string) error { if resizeListener != nil { go serveResize(resizeListener, ptm, vmDone, logger) } - serveConsole(listener, ptm, vmDone, logger) + serveConsole(listener, ptm, consoleLog, vmDone, logger) listener.Close() if resizeListener != nil { @@ -274,16 +285,20 @@ func (a *atomicWriter) Write(p []byte) (int, error) { // A background goroutine reads from the PTY master continuously (discarding // output when no client is connected so the VM never blocks on a full buffer). // Only one client is served at a time; sessions are serialised. -func serveConsole(listener net.Listener, ptm *os.File, vmDone <-chan struct{}, logger *log.Entry) { +func serveConsole(listener net.Listener, ptm *os.File, logFile *os.File, vmDone <-chan struct{}, logger *log.Entry) { aw := &atomicWriter{w: io.Discard} // Background PTY reader — runs for the full VM lifetime. + // All output is tee'd to logFile (if set) so boot messages are never lost. go func() { buf := make([]byte, 4096) for { n, err := ptm.Read(buf) if n > 0 { aw.Write(buf[:n]) //nolint:errcheck + if logFile != nil { + logFile.Write(buf[:n]) //nolint:errcheck + } } if err != nil { return // PTY closed (VM exited) diff --git a/orchestrator/network.go b/orchestrator/network.go index 2700cd1..ed094ca 100644 --- a/orchestrator/network.go +++ b/orchestrator/network.go @@ -54,6 +54,8 @@ func (o *Orchestrator) setupBridge() error { // createTap creates a tap device and attaches it to the bridge. func (o *Orchestrator) createTap(name string) error { + // Destroy any stale tap with this name before (re)creating it. + _ = run("ip", "link", "del", name) if err := run("ip", "tuntap", "add", "dev", name, "mode", "tap"); err != nil { return fmt.Errorf("create tap %s: %w", name, err) } diff --git a/orchestrator/orchestrator.go b/orchestrator/orchestrator.go index 86045df..08206c5 100644 --- a/orchestrator/orchestrator.go +++ b/orchestrator/orchestrator.go @@ -80,7 +80,7 @@ func (o *Orchestrator) Init(distro string) error { func (o *Orchestrator) buildRootfs(distro, rootfsPath string) error { sizeMB := 512 - if distro == "debian" { + if distro == "debian" || distro == "ubuntu" { sizeMB = 2048 } mnt := filepath.Join(o.cfg.BaseDir, "mnt") @@ -161,6 +161,8 @@ func (o *Orchestrator) buildRootfs(distro, rootfsPath string) error { netInitScript := `#!/bin/sh # Poll Firecracker MMDS for network config, apply it, then exit. # Runs in background; loops until MMDS responds (survives snapshot resume). +ip link set eth0 up 2>/dev/null +ip route add 169.254.169.254 dev eth0 2>/dev/null ip addr add 169.254.169.2/32 dev eth0 2>/dev/null while true; do ip=$(wget -q -T1 -O- http://169.254.169.254/ip 2>/dev/null | tr -d '"') @@ -179,12 +181,6 @@ done return err } - // write fc-net-init - os.MkdirAll(filepath.Join(mnt, "sbin"), 0o755) - if err := os.WriteFile(filepath.Join(mnt, "sbin", "fc-net-init"), []byte(netInitScript), 0o755); err != nil { - return err - } - if distro == "alpine" { // write init script initScript := `#!/bin/sh @@ -208,10 +204,10 @@ ip route add 169.254.169.254 dev eth0 2>/dev/null // systemd-based distributions (Debian, Ubuntu) svc := `[Unit] Description=Firecracker Network Init -After=network.target +After=basic.target [Service] -Type=oneshot +Type=simple ExecStart=/sbin/fc-net-init RemainAfterExit=yes @@ -229,10 +225,33 @@ WantedBy=multi-user.target os.MkdirAll(wantsDir, 0o755) os.Symlink("/etc/systemd/system/fc-net-init.service", filepath.Join(wantsDir, "fc-net-init.service")) //nolint:errcheck - // Ensure serial console is active - gettyWantsDir := filepath.Join(mnt, "etc", "systemd", "system", "getty.target.wants") - os.MkdirAll(gettyWantsDir, 0o755) - os.Symlink("/lib/systemd/system/serial-getty@.service", filepath.Join(gettyWantsDir, "serial-getty@ttyS0.service")) //nolint:errcheck + // Mask serial-getty@ttyS0.service and the udev device unit it depends on. + // In Firecracker, udev never runs so dev-ttyS0.device never activates, + // causing a 90-second systemd timeout. We replace it entirely with a + // custom service that uses ConditionPathExists (filesystem check) instead. + systemdDir := filepath.Join(mnt, "etc", "systemd", "system") + os.Symlink("/dev/null", filepath.Join(systemdDir, "serial-getty@ttyS0.service")) //nolint:errcheck + os.Symlink("/dev/null", filepath.Join(systemdDir, "dev-ttyS0.device")) //nolint:errcheck + + // Custom console service: no udev dependency, autologin as root. + consoleSvc := `[Unit] +Description=Serial Console (ttyS0) +After=basic.target +ConditionPathExists=/dev/ttyS0 + +[Service] +ExecStart=/sbin/agetty --autologin root --noclear ttyS0 vt220 +Restart=always +RestartSec=1 + +[Install] +WantedBy=multi-user.target +` + consoleSvcPath := filepath.Join(systemdDir, "fc-console.service") + os.WriteFile(consoleSvcPath, []byte(consoleSvc), 0o644) //nolint:errcheck + wantsDir2 := filepath.Join(systemdDir, "multi-user.target.wants") + os.MkdirAll(wantsDir2, 0o755) + os.Symlink("/etc/systemd/system/fc-console.service", filepath.Join(wantsDir2, "fc-console.service")) //nolint:errcheck // Clear root password for auto-login on console shadowPath := filepath.Join(mnt, "etc", "shadow") @@ -249,6 +268,11 @@ WantedBy=multi-user.target } os.WriteFile(shadowPath, []byte(strings.Join(lines, "\n")), 0o640) //nolint:errcheck } + + // Write fstab so systemd mounts virtual filesystems at boot. + // Minimal tarball rootfs has no fstab; without it /proc, /sys, /dev are not mounted. + fstab := "proc\t/proc\tproc\tdefaults\t0 0\nsysfs\t/sys\tsysfs\tdefaults\t0 0\ndevtmpfs\t/dev\tdevtmpfs\tdefaults\t0 0\n" + os.WriteFile(filepath.Join(mnt, "etc", "fstab"), []byte(fstab), 0o644) //nolint:errcheck } return nil } @@ -363,8 +387,15 @@ func (o *Orchestrator) Golden(tag string, distro string) error { []byte(fmt.Sprintf("%d", cmd.Process.Pid)), 0o644) } - o.log.Info("golden VM booted, letting it settle ...") - time.Sleep(3 * time.Second) + settleTime := 3 * time.Second + if distro == "debian" || distro == "ubuntu" { + // systemd takes significantly longer to reach multi-user.target than + // Alpine's busybox init. Snapshot too early and serial-getty@ttyS0 + // won't have started yet, leaving the console unresponsive on resume. + settleTime = 20 * time.Second + } + o.log.Infof("golden VM booted, letting it settle (%s) ...", settleTime) + time.Sleep(settleTime) // pause o.log.Info("pausing golden VM ...") @@ -494,6 +525,7 @@ func (o *Orchestrator) spawnOne(id int, net bool, tag string) error { goldenDir := o.goldenDir(tag) cloneDir := filepath.Join(o.clonesDir(), strconv.Itoa(id)) os.MkdirAll(cloneDir, 0o755) + os.WriteFile(filepath.Join(cloneDir, "tag"), []byte(tag), 0o644) //nolint:errcheck sockPath := filepath.Join(cloneDir, "api.sock") os.Remove(sockPath) @@ -707,7 +739,7 @@ func installUbuntuPackages(mnt string, logger *log.Entry) error { os.WriteFile(filepath.Join(mnt, "etc/resolv.conf"), data, 0o644) //nolint:errcheck } - pkgs := "bash curl iproute2 wget ca-certificates" + pkgs := "bash curl iproute2 wget ca-certificates systemd systemd-sysv util-linux" script := "DEBIAN_FRONTEND=noninteractive apt-get update -q && " + "DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends " + pkgs + " && " + "apt-get clean && rm -rf /var/lib/apt/lists/*" diff --git a/orchestrator/serve.go b/orchestrator/serve.go index aecafbd..670475c 100644 --- a/orchestrator/serve.go +++ b/orchestrator/serve.go @@ -49,9 +49,9 @@ func Serve(orch *Orchestrator, addr string) error { mux.HandleFunc("/clones", func(w http.ResponseWriter, r *http.Request) { switch r.Method { case http.MethodGet, "": - ids := runningCloneIDs(orch.cfg) + clones := runningClones(orch.cfg) w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(ids) //nolint:errcheck + json.NewEncoder(w).Encode(clones) //nolint:errcheck case http.MethodPost: // Optional JSON body: {"net": bool, "tag": string} // Defaults to the server's FC_AUTO_NET_CONFIG setting. @@ -215,14 +215,19 @@ func bridgeWS(ws *websocket.Conn, consoleConn net.Conn, resizeConn net.Conn) { <-sockDone } -// runningCloneIDs returns clone IDs that have a live console socket. -func runningCloneIDs(cfg Config) []int { +type cloneEntry struct { + ID int `json:"id"` + Tag string `json:"tag"` +} + +// runningClones returns entries for clones that have a live console socket. +func runningClones(cfg Config) []cloneEntry { clonesDir := filepath.Join(cfg.BaseDir, "clones") entries, err := os.ReadDir(clonesDir) if err != nil { return nil } - var ids []int + var clones []cloneEntry for _, e := range entries { if !e.IsDir() { continue @@ -232,11 +237,16 @@ func runningCloneIDs(cfg Config) []int { continue } sock := filepath.Join(clonesDir, e.Name(), "console.sock") - if _, err := os.Stat(sock); err == nil { - ids = append(ids, id) + if _, err := os.Stat(sock); err != nil { + continue } + tag := "unknown" + if raw, err := os.ReadFile(filepath.Join(clonesDir, e.Name(), "tag")); err == nil { + tag = strings.TrimSpace(string(raw)) + } + clones = append(clones, cloneEntry{ID: id, Tag: tag}) } - return ids + return clones } func writeWSError(ws *websocket.Conn, msg string) { diff --git a/orchestrator/web/terminal.html b/orchestrator/web/terminal.html index b534d08..09983e4 100644 --- a/orchestrator/web/terminal.html +++ b/orchestrator/web/terminal.html @@ -62,6 +62,11 @@ } .clone-entry button.destroy:hover { background: #2a1a1a; } .clone-entry button.destroy:disabled { color: #555; cursor: default; } + .clone-tag { + font-size: .72rem; + color: #666; + margin-left: .4rem; + } #index .none { color: #666; font-size: .9rem; } @@ -183,11 +188,11 @@ noneEl.style.display = 'none'; const li = document.createElement('li'); li.className = 'clone-entry'; - li.dataset.id = c; + li.dataset.id = c.id; li.innerHTML = - `clone ${c}` + - ``; - li.querySelector('.destroy').addEventListener('click', () => destroyClone(c, li)); + `clone ${c.id}${c.tag}` + + ``; + li.querySelector('.destroy').addEventListener('click', () => destroyClone(c.id, li)); ul.appendChild(li); }