feat: add serial console access via PTY + Unix socket proxy

Each spawned clone now runs under a _console-proxy daemon that connects
firecracker's ttyS0 (stdin/stdout) to a PTY and serves it on a Unix
socket at clones/<id>/console.sock for the VM's lifetime.

  sudo ./fc-orch spawn 1
  sudo ./fc-orch console 1   # Ctrl+] to detach

spawnOne delegates VM startup to the proxy process (Setsid, detached)
and waits for console.sock to appear before returning. Kill continues
to work via PID files — proxy and firecracker PIDs are both recorded.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-13 10:24:09 +00:00
parent 04067f7e6b
commit 9089cbdbe9
6 changed files with 437 additions and 97 deletions

6
go.mod
View File

@@ -1,6 +1,6 @@
module github.com/kacerr/fc-orchestrator
go 1.23
go 1.25.0
require (
github.com/firecracker-microvm/firecracker-go-sdk v1.0.0
@@ -14,6 +14,7 @@ require (
github.com/containerd/fifo v1.0.0 // indirect
github.com/containernetworking/cni v1.0.1 // indirect
github.com/containernetworking/plugins v1.0.1 // indirect
github.com/creack/pty v1.1.24 // indirect
github.com/go-openapi/analysis v0.21.2 // indirect
github.com/go-openapi/errors v0.20.2 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
@@ -38,7 +39,8 @@ require (
github.com/vishvananda/netns v0.0.0-20210104183010-2eb08e3e575f // indirect
go.mongodb.org/mongo-driver v1.8.3 // indirect
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
golang.org/x/sys v0.43.0 // indirect
golang.org/x/term v0.42.0 // indirect
golang.org/x/text v0.3.7 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
)

9
go.sum
View File

@@ -208,6 +208,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma
github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4=
github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ=
github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s=
@@ -444,6 +446,7 @@ github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kacerrmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q=
@@ -686,7 +689,7 @@ github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:
github.com/xeipuuv/gojsonschema v0.0.0-20180618132009-1d523034197f/go.mod h1:5yf86TLmAcydyeJq5YvxkGPE2fm/u4myDekKRoLuqhs=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
github.com/kacerrmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs=
@@ -889,9 +892,13 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220204135822-1c1b9b1eba6a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY=
golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

22
main.go
View File

@@ -16,6 +16,7 @@
package main
import (
"flag"
"fmt"
"os"
"path/filepath"
@@ -79,6 +80,26 @@ func main() {
fatal(orch.Kill())
case "cleanup":
fatal(orch.Cleanup())
case "console":
if len(os.Args) < 3 {
fmt.Fprintf(os.Stderr, "usage: %s console <id>\n", os.Args[0])
os.Exit(1)
}
var id int
fmt.Sscanf(os.Args[2], "%d", &id)
fatal(orchestrator.ConnectConsole(orchestrator.DefaultConfig(), id))
case "_console-proxy":
// Internal subcommand: started by spawnOne, runs as a background daemon.
fs := flag.NewFlagSet("_console-proxy", flag.ContinueOnError)
var id int
var tap string
fs.IntVar(&id, "id", 0, "clone ID")
fs.StringVar(&tap, "tap", "", "TAP device name")
if err := fs.Parse(os.Args[2:]); err != nil {
fmt.Fprintf(os.Stderr, "console-proxy: %v\n", err)
os.Exit(1)
}
fatal(orchestrator.RunConsoleProxy(orchestrator.DefaultConfig(), id, tap))
default:
usage()
os.Exit(1)
@@ -95,6 +116,7 @@ Commands:
init Download kernel + create Alpine rootfs
golden Boot golden VM → pause → snapshot
spawn [N] Restore N clones from golden snapshot (default: 1)
console <id> Attach to the serial console of a running clone (Ctrl+] to detach)
status Show running clones
kill Kill all running VMs
cleanup Kill VMs + remove all state

258
orchestrator/console.go Normal file
View File

@@ -0,0 +1,258 @@
package orchestrator
import (
"context"
"fmt"
"io"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"sync"
"syscall"
"time"
"github.com/creack/pty"
firecracker "github.com/firecracker-microvm/firecracker-go-sdk"
"github.com/firecracker-microvm/firecracker-go-sdk/client/models"
log "github.com/sirupsen/logrus"
)
// RunConsoleProxy is the entrypoint for the "_console-proxy" internal subcommand.
// It restores a Firecracker clone from the golden snapshot, connecting its serial
// console (ttyS0) to a PTY, then serves the PTY master on a Unix socket at
// {cloneDir}/console.sock for the lifetime of the VM.
func RunConsoleProxy(cfg Config, id int, tapName string) error {
logger := log.WithField("component", fmt.Sprintf("console-proxy[%d]", id))
cloneDir := filepath.Join(cfg.BaseDir, "clones", strconv.Itoa(id))
goldenDir := filepath.Join(cfg.BaseDir, "golden")
sockPath := filepath.Join(cloneDir, "api.sock")
consoleSockPath := filepath.Join(cloneDir, "console.sock")
sharedMem := filepath.Join(goldenDir, "mem")
cloneVmstate := filepath.Join(cloneDir, "vmstate")
// --- Create PTY ---
// ptm = master (we hold and proxy), pts = slave (firecracker's stdio)
ptm, pts, err := pty.Open()
if err != nil {
return fmt.Errorf("open pty: %w", err)
}
pty.Setsize(ptm, &pty.Winsize{Rows: 24, Cols: 80}) //nolint:errcheck
fcBin, err := exec.LookPath(cfg.FCBin)
if err != nil {
pts.Close()
ptm.Close()
return fmt.Errorf("firecracker not found: %w", err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// --- Build firecracker command with slave PTY as stdin/stdout/stderr ---
// Setsid makes the slave PTY the controlling terminal for firecracker,
// which is required for job control (Ctrl+C, SIGWINCH, etc.) to work.
cmd := firecracker.VMCommandBuilder{}.
WithBin(fcBin).
WithSocketPath(sockPath).
Build(ctx)
cmd.Stdin = pts
cmd.Stdout = pts
cmd.Stderr = pts
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
// --- Build Machine config (mirrors spawnOne) ---
vcpus := cfg.VCPUs
mem := cfg.MemMiB
fcCfg := firecracker.Config{
SocketPath: sockPath,
MachineCfg: models.MachineConfiguration{
VcpuCount: &vcpus,
MemSizeMib: &mem,
},
LogPath: sockPath + ".log",
LogLevel: "Debug",
FifoLogWriter: logger.Writer(),
}
if cfg.Bridge != "none" && tapName != "" {
mac := fmt.Sprintf("AA:FC:00:00:%02X:%02X", id/256, id%256)
fcCfg.NetworkInterfaces = firecracker.NetworkInterfaces{
{
StaticConfiguration: &firecracker.StaticNetworkConfiguration{
MacAddress: mac,
HostDevName: tapName,
},
},
}
}
m, err := firecracker.NewMachine(ctx, fcCfg,
firecracker.WithProcessRunner(cmd),
firecracker.WithLogger(logger),
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
sc.ResumeVM = true
}),
)
if err != nil {
pts.Close()
ptm.Close()
return fmt.Errorf("new machine: %w", err)
}
// Swap in network-override snapshot handler (same as spawnOne)
if cfg.Bridge != "none" && tapName != "" {
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
Name: firecracker.LoadSnapshotHandlerName,
Fn: func(ctx context.Context, m *firecracker.Machine) error {
return loadSnapshotWithNetworkOverride(
ctx, sockPath, sharedMem, cloneVmstate, tapName,
)
},
})
}
// --- Start VM (blocks until snapshot is loaded and VM is running) ---
start := time.Now()
logger.Infof("restoring clone %d from snapshot ...", id)
if err := m.Start(ctx); err != nil {
pts.Close()
ptm.Close()
return fmt.Errorf("restore clone %d: %w", id, err)
}
elapsed := time.Since(start)
// Release our copy of the slave — firecracker holds its own fd now.
// Closing here ensures we get EOF on ptm when firecracker exits.
pts.Close()
// --- Write PID file ---
pidsDir := filepath.Join(cfg.BaseDir, "pids")
os.MkdirAll(pidsDir, 0o755) //nolint:errcheck
if cmd.Process != nil {
os.WriteFile( //nolint:errcheck
filepath.Join(pidsDir, fmt.Sprintf("clone-%d.pid", id)),
[]byte(strconv.Itoa(cmd.Process.Pid)),
0o644,
)
}
logger.Infof("clone %d: restored in %s (pid=%d, tap=%s)",
id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName)
// --- Create console socket ---
os.Remove(consoleSockPath) //nolint:errcheck
listener, err := net.Listen("unix", consoleSockPath)
if err != nil {
ptm.Close()
return fmt.Errorf("listen on console socket: %w", err)
}
// --- Serve until VM exits ---
vmDone := make(chan struct{})
go func() {
m.Wait(ctx) //nolint:errcheck
close(vmDone)
}()
serveConsole(listener, ptm, vmDone, logger)
listener.Close()
ptm.Close()
os.Remove(consoleSockPath) //nolint:errcheck
if tapName != "" {
destroyTap(tapName)
}
logger.Infof("clone %d: exiting", id)
return nil
}
// atomicWriter wraps an io.Writer behind a read/write lock so it can be
// swapped between connections without holding the lock during writes.
type atomicWriter struct {
mu sync.RWMutex
w io.Writer
}
func (a *atomicWriter) set(w io.Writer) {
a.mu.Lock()
defer a.mu.Unlock()
a.w = w
}
func (a *atomicWriter) Write(p []byte) (int, error) {
a.mu.RLock()
defer a.mu.RUnlock()
return a.w.Write(p)
}
// serveConsole accepts connections on listener and proxies console I/O via ptm.
// A background goroutine reads from the PTY master continuously (discarding
// output when no client is connected so the VM never blocks on a full buffer).
// Only one client is served at a time; sessions are serialised.
func serveConsole(listener net.Listener, ptm *os.File, vmDone <-chan struct{}, logger *log.Entry) {
aw := &atomicWriter{w: io.Discard}
// Background PTY reader — runs for the full VM lifetime.
go func() {
buf := make([]byte, 4096)
for {
n, err := ptm.Read(buf)
if n > 0 {
aw.Write(buf[:n]) //nolint:errcheck
}
if err != nil {
return // PTY closed (VM exited)
}
}
}()
// Unblock Accept() when the VM exits.
go func() {
<-vmDone
listener.Close()
}()
for {
conn, err := listener.Accept()
if err != nil {
return // listener closed
}
logger.Info("console client connected")
// Route PTY output to this connection.
aw.set(conn)
// Forward client input to the PTY master.
clientGone := make(chan struct{})
go func() {
defer close(clientGone)
buf := make([]byte, 256)
for {
n, err := conn.Read(buf)
if n > 0 {
ptm.Write(buf[:n]) //nolint:errcheck
}
if err != nil {
return
}
}
}()
// Block until client disconnects or VM dies.
select {
case <-clientGone:
logger.Info("console client disconnected")
case <-vmDone:
logger.Info("VM exited while console client was connected")
}
aw.set(io.Discard)
conn.Close()
}
}

View File

@@ -0,0 +1,104 @@
package orchestrator
import (
"context"
"fmt"
"io"
"net"
"os"
"os/signal"
"path/filepath"
"strconv"
"sync"
"syscall"
"golang.org/x/term"
)
// ConnectConsole attaches the calling terminal to the serial console of the
// clone with the given ID. The connection is made over the Unix socket at
// {cloneDir}/console.sock served by the console-proxy process.
//
// Escape sequence: Ctrl+] (byte 0x1D) detaches without stopping the VM.
func ConnectConsole(cfg Config, id int) error {
sockPath := filepath.Join(cfg.BaseDir, "clones", strconv.Itoa(id), "console.sock")
if _, err := os.Stat(sockPath); err != nil {
return fmt.Errorf("clone %d has no console socket — is it running?", id)
}
conn, err := net.Dial("unix", sockPath)
if err != nil {
return fmt.Errorf("connect to console: %w", err)
}
defer conn.Close()
if !term.IsTerminal(int(os.Stdin.Fd())) {
return fmt.Errorf("console requires an interactive terminal")
}
oldState, err := term.MakeRaw(int(os.Stdin.Fd()))
if err != nil {
return fmt.Errorf("set raw terminal: %w", err)
}
defer term.Restore(int(os.Stdin.Fd()), oldState) //nolint:errcheck
// Ensure the terminal is restored even on SIGTERM / SIGHUP.
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGHUP)
go func() {
<-sigCh
term.Restore(int(os.Stdin.Fd()), oldState) //nolint:errcheck
os.Exit(0)
}()
fmt.Fprintf(os.Stderr, "Connected to clone %d console. Escape: Ctrl+]\r\n", id)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var wg sync.WaitGroup
wg.Add(2)
// stdin → VM (with Ctrl+] escape detection)
go func() {
defer wg.Done()
io.Copy(conn, &escapeReader{r: os.Stdin, cancel: cancel}) //nolint:errcheck
// Half-close so the proxy knows we're done sending.
if uc, ok := conn.(*net.UnixConn); ok {
uc.CloseWrite() //nolint:errcheck
}
}()
// VM → stdout
go func() {
defer wg.Done()
io.Copy(os.Stdout, conn) //nolint:errcheck
cancel() // VM exited or proxy closed the connection
}()
<-ctx.Done()
conn.Close() // unblock both goroutines
wg.Wait()
fmt.Fprintf(os.Stderr, "\r\nDetached from clone %d.\r\n", id)
return nil
}
// escapeReader wraps an io.Reader and intercepts Ctrl+] (0x1D), calling cancel
// and returning io.EOF when the escape byte is seen.
type escapeReader struct {
r io.Reader
cancel context.CancelFunc
}
func (e *escapeReader) Read(p []byte) (int, error) {
n, err := e.r.Read(p)
for i := 0; i < n; i++ {
if p[i] == 0x1D { // Ctrl+]
e.cancel()
// Return only the bytes before the escape so nothing leaks to the VM.
return i, io.EOF
}
}
return n, err
}

View File

@@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"sync"
"syscall"
"time"
firecracker "github.com/firecracker-microvm/firecracker-go-sdk"
@@ -319,10 +320,6 @@ func (o *Orchestrator) spawnOne(id int) error {
return fmt.Errorf("copy rootfs: %w", err)
}
// --- Memory: point at the shared golden mem file ---
// Firecracker uses MAP_PRIVATE → kernel COW. No copy needed.
sharedMem := filepath.Join(goldenDir, "mem")
// --- vmstate: small, cheap copy ---
cloneVmstate := filepath.Join(cloneDir, "vmstate")
if err := copyFile(filepath.Join(goldenDir, "vmstate"), cloneVmstate); err != nil {
@@ -331,7 +328,6 @@ func (o *Orchestrator) spawnOne(id int) error {
// --- Networking ---
tapName := fmt.Sprintf("fctap%d", id)
var netIfaces firecracker.NetworkInterfaces
if o.cfg.Bridge != "none" {
o.log.Infof("clone %d: running: ip tuntap add dev %s mode tap", id, tapName)
o.log.Infof("clone %d: running: ip link set %s up", id, tapName)
@@ -339,106 +335,57 @@ func (o *Orchestrator) spawnOne(id int) error {
if err := o.createTap(tapName); err != nil {
return err
}
mac := fmt.Sprintf("AA:FC:00:00:%02X:%02X", id/256, id%256)
netIfaces = firecracker.NetworkInterfaces{
firecracker.NetworkInterface{
StaticConfiguration: &firecracker.StaticNetworkConfiguration{
MacAddress: mac,
HostDevName: tapName,
},
},
}
}
// --- Restore from snapshot ---
ctx, cancel := context.WithCancel(context.Background())
fcBin, err := exec.LookPath(o.cfg.FCBin)
// --- Launch console proxy (detached daemon) ---
// The proxy owns the full VM lifecycle: it starts firecracker with a PTY,
// loads the snapshot, and serves cloneDir/console.sock until the VM exits.
selfExe, err := os.Executable()
if err != nil {
cancel()
return fmt.Errorf("firecracker not found: %w", err)
return fmt.Errorf("resolve self path: %w", err)
}
cmd := firecracker.VMCommandBuilder{}.
WithBin(fcBin).
WithSocketPath(sockPath).
Build(ctx)
o.log.Infof("clone %d: running: %s", id, strings.Join(cmd.Args, " "))
vcpus := o.cfg.VCPUs
mem := o.cfg.MemMiB
fcCfg := firecracker.Config{
SocketPath: sockPath,
MachineCfg: models.MachineConfiguration{
VcpuCount: &vcpus,
MemSizeMib: &mem,
},
NetworkInterfaces: netIfaces,
LogPath: sockPath + ".log",
LogLevel: "Debug",
FifoLogWriter: o.log.Writer(),
}
m, err := firecracker.NewMachine(ctx, fcCfg,
firecracker.WithProcessRunner(cmd),
firecracker.WithLogger(o.log),
// WithSnapshot replaces the default handler set with snapshot-specific
// handlers: skips validate.Cfg (no KernelImagePath needed) and uses
// LoadSnapshotHandler instead of CreateBootSourceHandler.
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
sc.ResumeVM = true
}),
)
if err != nil {
cancel()
return fmt.Errorf("new machine: %w", err)
}
// Firecracker v1.15+ supports network_overrides in PUT /snapshot/load to
// remap the tap backend stored in the snapshot. The SDK v1.0.0 doesn't
// expose this field, so we replace the SDK's LoadSnapshotHandler with a
// direct HTTP call that includes the per-clone tap name.
proxyArgs := []string{"_console-proxy", "--id", strconv.Itoa(id)}
if o.cfg.Bridge != "none" {
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
Name: firecracker.LoadSnapshotHandlerName,
Fn: func(ctx context.Context, m *firecracker.Machine) error {
return loadSnapshotWithNetworkOverride(
ctx, sockPath, sharedMem, cloneVmstate, tapName,
)
},
})
proxyArgs = append(proxyArgs, "--tap", tapName)
}
proxyCmd := exec.Command(selfExe, proxyArgs...)
// New session: proxy is detached from our terminal and survives our exit.
proxyCmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
proxyCmd.Stdin = nil
proxyCmd.Stdout = nil
proxyCmd.Stderr = nil
if err := proxyCmd.Start(); err != nil {
return fmt.Errorf("start console proxy: %w", err)
}
os.WriteFile(filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.proxy.pid", id)),
[]byte(strconv.Itoa(proxyCmd.Process.Pid)), 0o644) //nolint:errcheck
// Wait for the console socket to appear — it is created by the proxy once
// the VM is running, so this also gates on successful snapshot restore.
consoleSockPath := filepath.Join(cloneDir, "console.sock")
if err := waitForSocket(consoleSockPath, 15*time.Second); err != nil {
return fmt.Errorf("clone %d: %w", id, err)
}
start := time.Now()
if err := m.Start(ctx); err != nil {
cancel()
return fmt.Errorf("restore clone %d: %w", id, err)
}
elapsed := time.Since(start)
// store PID
if cmd.Process != nil {
os.WriteFile(filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.pid", id)),
[]byte(strconv.Itoa(cmd.Process.Pid)), 0o644)
}
o.mu.Lock()
o.clones[id] = &cloneInfo{
ID: id,
Machine: m,
Cancel: cancel,
Tap: tapName,
}
o.mu.Unlock()
o.log.Infof("clone %d: restored in %s (pid=%d, tap=%s)",
id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName)
o.log.Infof("clone %d: ready (proxy pid=%d, tap=%s, console=%s)",
id, proxyCmd.Process.Pid, tapName, consoleSockPath)
return nil
}
// waitForSocket polls path every 50 ms until it appears or timeout elapses.
func waitForSocket(path string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if _, err := os.Stat(path); err == nil {
return nil
}
time.Sleep(50 * time.Millisecond)
}
return fmt.Errorf("timed out waiting for %s", path)
}
// ——— Status ————————————————————————————————————————————————————————————
func (o *Orchestrator) Status() {