- Load snapshot with ResumeVM: false so MMDS data can be written while VM is paused - Call ResumeVM explicitly after configureMmds succeeds - Skip PUT /mmds/config on restored VMs (Firecracker rejects it with 400) - Strip JSON quotes from MMDS values with tr -d '"' in net-init script - Add 169.254.169.2/32 link-local addr and flush eth0 before applying new IP Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
339 lines
9.1 KiB
Go
339 lines
9.1 KiB
Go
package orchestrator
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strconv"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/creack/pty"
|
|
firecracker "github.com/firecracker-microvm/firecracker-go-sdk"
|
|
"github.com/firecracker-microvm/firecracker-go-sdk/client/models"
|
|
log "github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// RunConsoleProxy is the entrypoint for the "_console-proxy" internal subcommand.
|
|
// It restores a Firecracker clone from the golden snapshot, connecting its serial
|
|
// console (ttyS0) to a PTY, then serves the PTY master on a Unix socket at
|
|
// {cloneDir}/console.sock for the lifetime of the VM.
|
|
func RunConsoleProxy(cfg Config, id int, tapName string) error {
|
|
logger := log.WithField("component", fmt.Sprintf("console-proxy[%d]", id))
|
|
|
|
cloneDir := filepath.Join(cfg.BaseDir, "clones", strconv.Itoa(id))
|
|
goldenDir := filepath.Join(cfg.BaseDir, "golden")
|
|
sockPath := filepath.Join(cloneDir, "api.sock")
|
|
consoleSockPath := filepath.Join(cloneDir, "console.sock")
|
|
sharedMem := filepath.Join(goldenDir, "mem")
|
|
cloneVmstate := filepath.Join(cloneDir, "vmstate")
|
|
|
|
// --- Create PTY ---
|
|
// ptm = master (we hold and proxy), pts = slave (firecracker's stdio)
|
|
ptm, pts, err := pty.Open()
|
|
if err != nil {
|
|
return fmt.Errorf("open pty: %w", err)
|
|
}
|
|
pty.Setsize(ptm, &pty.Winsize{Rows: 24, Cols: 80}) //nolint:errcheck
|
|
|
|
fcBin, err := exec.LookPath(cfg.FCBin)
|
|
if err != nil {
|
|
pts.Close()
|
|
ptm.Close()
|
|
return fmt.Errorf("firecracker not found: %w", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
// --- Build firecracker command with slave PTY as stdin/stdout/stderr ---
|
|
// Setsid makes the slave PTY the controlling terminal for firecracker,
|
|
// which is required for job control (Ctrl+C, SIGWINCH, etc.) to work.
|
|
cmd := firecracker.VMCommandBuilder{}.
|
|
WithBin(fcBin).
|
|
WithSocketPath(sockPath).
|
|
Build(ctx)
|
|
cmd.Stdin = pts
|
|
cmd.Stdout = pts
|
|
cmd.Stderr = pts
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
|
|
|
|
// --- Build Machine config (mirrors spawnOne) ---
|
|
vcpus := cfg.VCPUs
|
|
mem := cfg.MemMiB
|
|
|
|
fcCfg := firecracker.Config{
|
|
SocketPath: sockPath,
|
|
MachineCfg: models.MachineConfiguration{
|
|
VcpuCount: &vcpus,
|
|
MemSizeMib: &mem,
|
|
},
|
|
LogPath: sockPath + ".log",
|
|
LogLevel: "Debug",
|
|
FifoLogWriter: logger.Writer(),
|
|
}
|
|
|
|
if cfg.Bridge != "none" && tapName != "" {
|
|
mac := fmt.Sprintf("AA:FC:00:00:%02X:%02X", id/256, id%256)
|
|
fcCfg.NetworkInterfaces = firecracker.NetworkInterfaces{
|
|
{
|
|
StaticConfiguration: &firecracker.StaticNetworkConfiguration{
|
|
MacAddress: mac,
|
|
HostDevName: tapName,
|
|
},
|
|
AllowMMDS: true,
|
|
},
|
|
}
|
|
}
|
|
|
|
m, err := firecracker.NewMachine(ctx, fcCfg,
|
|
firecracker.WithProcessRunner(cmd),
|
|
firecracker.WithLogger(logger),
|
|
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
|
|
sc.ResumeVM = true
|
|
}),
|
|
)
|
|
if err != nil {
|
|
pts.Close()
|
|
ptm.Close()
|
|
return fmt.Errorf("new machine: %w", err)
|
|
}
|
|
|
|
// Swap in network-override snapshot handler (same as spawnOne)
|
|
if cfg.Bridge != "none" && tapName != "" {
|
|
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
|
|
Name: firecracker.LoadSnapshotHandlerName,
|
|
Fn: func(ctx context.Context, m *firecracker.Machine) error {
|
|
return loadSnapshotWithNetworkOverride(
|
|
ctx, sockPath, sharedMem, cloneVmstate, tapName,
|
|
)
|
|
},
|
|
})
|
|
}
|
|
|
|
// --- Start VM (blocks until snapshot is loaded and VM is PAUSED) ---
|
|
start := time.Now()
|
|
logger.Infof("restoring clone %d from snapshot ...", id)
|
|
if err := m.Start(ctx); err != nil {
|
|
pts.Close()
|
|
ptm.Close()
|
|
return fmt.Errorf("restore clone %d: %w", id, err)
|
|
}
|
|
|
|
// Inject per-clone IP config via MMDS so the fc-net-init guest daemon
|
|
// can configure eth0 without any manual steps inside the VM.
|
|
// This must happen while the VM is PAUSED (ResumeVM: false in snapshot load).
|
|
if cfg.AutoNetConfig && cfg.Bridge != "none" {
|
|
guestIP := fmt.Sprintf("%s.%d/24", cfg.GuestPrefix, 10+id)
|
|
if err := configureMmds(ctx, sockPath, guestIP, cfg.GuestGW, "1.1.1.1"); err != nil {
|
|
logger.Warnf("MMDS config failed (guest network will be unconfigured): %v", err)
|
|
} else {
|
|
logger.Infof("MMDS: assigned %s gw %s to clone %d", guestIP, cfg.GuestGW, id)
|
|
}
|
|
}
|
|
|
|
// Now RESUME the VM to start execution!
|
|
if err := m.ResumeVM(ctx); err != nil {
|
|
pts.Close()
|
|
ptm.Close()
|
|
return fmt.Errorf("resume clone %d: %w", id, err)
|
|
}
|
|
elapsed := time.Since(start)
|
|
|
|
// Release our copy of the slave — firecracker holds its own fd now.
|
|
// Closing here ensures we get EOF on ptm when firecracker exits.
|
|
pts.Close()
|
|
|
|
// --- Write PID file ---
|
|
pidsDir := filepath.Join(cfg.BaseDir, "pids")
|
|
os.MkdirAll(pidsDir, 0o755) //nolint:errcheck
|
|
if cmd.Process != nil {
|
|
os.WriteFile( //nolint:errcheck
|
|
filepath.Join(pidsDir, fmt.Sprintf("clone-%d.pid", id)),
|
|
[]byte(strconv.Itoa(cmd.Process.Pid)),
|
|
0o644,
|
|
)
|
|
}
|
|
logger.Infof("clone %d: restored in %s (pid=%d, tap=%s)",
|
|
id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName)
|
|
|
|
// --- Create console socket ---
|
|
os.Remove(consoleSockPath) //nolint:errcheck
|
|
listener, err := net.Listen("unix", consoleSockPath)
|
|
if err != nil {
|
|
ptm.Close()
|
|
return fmt.Errorf("listen on console socket: %w", err)
|
|
}
|
|
|
|
// --- Create resize sideband socket ---
|
|
resizeSockPath := filepath.Join(cloneDir, "console-resize.sock")
|
|
os.Remove(resizeSockPath) //nolint:errcheck
|
|
resizeListener, err := net.Listen("unix", resizeSockPath)
|
|
if err != nil {
|
|
logger.Warnf("could not create resize socket, terminal resize will be unavailable: %v", err)
|
|
resizeListener = nil
|
|
}
|
|
|
|
// --- Serve until VM exits ---
|
|
vmDone := make(chan struct{})
|
|
go func() {
|
|
m.Wait(ctx) //nolint:errcheck
|
|
close(vmDone)
|
|
}()
|
|
|
|
if resizeListener != nil {
|
|
go serveResize(resizeListener, ptm, vmDone, logger)
|
|
}
|
|
serveConsole(listener, ptm, vmDone, logger)
|
|
|
|
listener.Close()
|
|
if resizeListener != nil {
|
|
resizeListener.Close()
|
|
}
|
|
ptm.Close()
|
|
os.Remove(consoleSockPath) //nolint:errcheck
|
|
os.Remove(resizeSockPath) //nolint:errcheck
|
|
|
|
if tapName != "" {
|
|
destroyTap(tapName)
|
|
}
|
|
|
|
logger.Infof("clone %d: exiting", id)
|
|
return nil
|
|
}
|
|
|
|
// resizeMsg is the JSON payload sent over the resize sideband socket.
|
|
type resizeMsg struct {
|
|
Rows uint16 `json:"rows"`
|
|
Cols uint16 `json:"cols"`
|
|
}
|
|
|
|
// serveResize accepts connections on the resize sideband listener and applies
|
|
// PTY window size changes as JSON resize messages arrive.
|
|
func serveResize(listener net.Listener, ptm *os.File, vmDone <-chan struct{}, logger *log.Entry) {
|
|
go func() {
|
|
<-vmDone
|
|
listener.Close()
|
|
}()
|
|
for {
|
|
conn, err := listener.Accept()
|
|
if err != nil {
|
|
return
|
|
}
|
|
go handleResize(conn, ptm, logger)
|
|
}
|
|
}
|
|
|
|
// handleResize reads newline-delimited JSON resize messages from conn and
|
|
// applies each one to the PTY master.
|
|
func handleResize(conn net.Conn, ptm *os.File, logger *log.Entry) {
|
|
defer conn.Close()
|
|
scanner := bufio.NewScanner(conn)
|
|
for scanner.Scan() {
|
|
var msg resizeMsg
|
|
if err := json.Unmarshal(scanner.Bytes(), &msg); err != nil {
|
|
logger.Warnf("resize: bad message: %v", err)
|
|
continue
|
|
}
|
|
if msg.Rows == 0 || msg.Cols == 0 {
|
|
continue
|
|
}
|
|
if err := pty.Setsize(ptm, &pty.Winsize{Rows: msg.Rows, Cols: msg.Cols}); err != nil {
|
|
logger.Warnf("resize pty: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// atomicWriter wraps an io.Writer behind a read/write lock so it can be
|
|
// swapped between connections without holding the lock during writes.
|
|
type atomicWriter struct {
|
|
mu sync.RWMutex
|
|
w io.Writer
|
|
}
|
|
|
|
func (a *atomicWriter) set(w io.Writer) {
|
|
a.mu.Lock()
|
|
defer a.mu.Unlock()
|
|
a.w = w
|
|
}
|
|
|
|
func (a *atomicWriter) Write(p []byte) (int, error) {
|
|
a.mu.RLock()
|
|
defer a.mu.RUnlock()
|
|
return a.w.Write(p)
|
|
}
|
|
|
|
// serveConsole accepts connections on listener and proxies console I/O via ptm.
|
|
// A background goroutine reads from the PTY master continuously (discarding
|
|
// output when no client is connected so the VM never blocks on a full buffer).
|
|
// Only one client is served at a time; sessions are serialised.
|
|
func serveConsole(listener net.Listener, ptm *os.File, vmDone <-chan struct{}, logger *log.Entry) {
|
|
aw := &atomicWriter{w: io.Discard}
|
|
|
|
// Background PTY reader — runs for the full VM lifetime.
|
|
go func() {
|
|
buf := make([]byte, 4096)
|
|
for {
|
|
n, err := ptm.Read(buf)
|
|
if n > 0 {
|
|
aw.Write(buf[:n]) //nolint:errcheck
|
|
}
|
|
if err != nil {
|
|
return // PTY closed (VM exited)
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Unblock Accept() when the VM exits.
|
|
go func() {
|
|
<-vmDone
|
|
listener.Close()
|
|
}()
|
|
|
|
for {
|
|
conn, err := listener.Accept()
|
|
if err != nil {
|
|
return // listener closed
|
|
}
|
|
|
|
logger.Info("console client connected")
|
|
|
|
// Route PTY output to this connection.
|
|
aw.set(conn)
|
|
|
|
// Forward client input to the PTY master.
|
|
clientGone := make(chan struct{})
|
|
go func() {
|
|
defer close(clientGone)
|
|
buf := make([]byte, 256)
|
|
for {
|
|
n, err := conn.Read(buf)
|
|
if n > 0 {
|
|
ptm.Write(buf[:n]) //nolint:errcheck
|
|
}
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
// Block until client disconnects or VM dies.
|
|
select {
|
|
case <-clientGone:
|
|
logger.Info("console client disconnected")
|
|
case <-vmDone:
|
|
logger.Info("VM exited while console client was connected")
|
|
}
|
|
|
|
aw.set(io.Discard)
|
|
conn.Close()
|
|
}
|
|
}
|