feat: add serial console access via PTY + Unix socket proxy

Each spawned clone now runs under a _console-proxy daemon that connects
firecracker's ttyS0 (stdin/stdout) to a PTY and serves it on a Unix
socket at clones/<id>/console.sock for the VM's lifetime.

  sudo ./fc-orch spawn 1
  sudo ./fc-orch console 1   # Ctrl+] to detach

spawnOne delegates VM startup to the proxy process (Setsid, detached)
and waits for console.sock to appear before returning. Kill continues
to work via PID files — proxy and firecracker PIDs are both recorded.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-13 10:24:09 +00:00
parent 04067f7e6b
commit 9089cbdbe9
6 changed files with 437 additions and 97 deletions

View File

@@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"sync"
"syscall"
"time"
firecracker "github.com/firecracker-microvm/firecracker-go-sdk"
@@ -319,10 +320,6 @@ func (o *Orchestrator) spawnOne(id int) error {
return fmt.Errorf("copy rootfs: %w", err)
}
// --- Memory: point at the shared golden mem file ---
// Firecracker uses MAP_PRIVATE → kernel COW. No copy needed.
sharedMem := filepath.Join(goldenDir, "mem")
// --- vmstate: small, cheap copy ---
cloneVmstate := filepath.Join(cloneDir, "vmstate")
if err := copyFile(filepath.Join(goldenDir, "vmstate"), cloneVmstate); err != nil {
@@ -331,7 +328,6 @@ func (o *Orchestrator) spawnOne(id int) error {
// --- Networking ---
tapName := fmt.Sprintf("fctap%d", id)
var netIfaces firecracker.NetworkInterfaces
if o.cfg.Bridge != "none" {
o.log.Infof("clone %d: running: ip tuntap add dev %s mode tap", id, tapName)
o.log.Infof("clone %d: running: ip link set %s up", id, tapName)
@@ -339,106 +335,57 @@ func (o *Orchestrator) spawnOne(id int) error {
if err := o.createTap(tapName); err != nil {
return err
}
mac := fmt.Sprintf("AA:FC:00:00:%02X:%02X", id/256, id%256)
netIfaces = firecracker.NetworkInterfaces{
firecracker.NetworkInterface{
StaticConfiguration: &firecracker.StaticNetworkConfiguration{
MacAddress: mac,
HostDevName: tapName,
},
},
}
}
// --- Restore from snapshot ---
ctx, cancel := context.WithCancel(context.Background())
fcBin, err := exec.LookPath(o.cfg.FCBin)
// --- Launch console proxy (detached daemon) ---
// The proxy owns the full VM lifecycle: it starts firecracker with a PTY,
// loads the snapshot, and serves cloneDir/console.sock until the VM exits.
selfExe, err := os.Executable()
if err != nil {
cancel()
return fmt.Errorf("firecracker not found: %w", err)
return fmt.Errorf("resolve self path: %w", err)
}
cmd := firecracker.VMCommandBuilder{}.
WithBin(fcBin).
WithSocketPath(sockPath).
Build(ctx)
o.log.Infof("clone %d: running: %s", id, strings.Join(cmd.Args, " "))
vcpus := o.cfg.VCPUs
mem := o.cfg.MemMiB
fcCfg := firecracker.Config{
SocketPath: sockPath,
MachineCfg: models.MachineConfiguration{
VcpuCount: &vcpus,
MemSizeMib: &mem,
},
NetworkInterfaces: netIfaces,
LogPath: sockPath + ".log",
LogLevel: "Debug",
FifoLogWriter: o.log.Writer(),
}
m, err := firecracker.NewMachine(ctx, fcCfg,
firecracker.WithProcessRunner(cmd),
firecracker.WithLogger(o.log),
// WithSnapshot replaces the default handler set with snapshot-specific
// handlers: skips validate.Cfg (no KernelImagePath needed) and uses
// LoadSnapshotHandler instead of CreateBootSourceHandler.
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
sc.ResumeVM = true
}),
)
if err != nil {
cancel()
return fmt.Errorf("new machine: %w", err)
}
// Firecracker v1.15+ supports network_overrides in PUT /snapshot/load to
// remap the tap backend stored in the snapshot. The SDK v1.0.0 doesn't
// expose this field, so we replace the SDK's LoadSnapshotHandler with a
// direct HTTP call that includes the per-clone tap name.
proxyArgs := []string{"_console-proxy", "--id", strconv.Itoa(id)}
if o.cfg.Bridge != "none" {
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
Name: firecracker.LoadSnapshotHandlerName,
Fn: func(ctx context.Context, m *firecracker.Machine) error {
return loadSnapshotWithNetworkOverride(
ctx, sockPath, sharedMem, cloneVmstate, tapName,
)
},
})
proxyArgs = append(proxyArgs, "--tap", tapName)
}
proxyCmd := exec.Command(selfExe, proxyArgs...)
// New session: proxy is detached from our terminal and survives our exit.
proxyCmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
proxyCmd.Stdin = nil
proxyCmd.Stdout = nil
proxyCmd.Stderr = nil
if err := proxyCmd.Start(); err != nil {
return fmt.Errorf("start console proxy: %w", err)
}
os.WriteFile(filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.proxy.pid", id)),
[]byte(strconv.Itoa(proxyCmd.Process.Pid)), 0o644) //nolint:errcheck
// Wait for the console socket to appear — it is created by the proxy once
// the VM is running, so this also gates on successful snapshot restore.
consoleSockPath := filepath.Join(cloneDir, "console.sock")
if err := waitForSocket(consoleSockPath, 15*time.Second); err != nil {
return fmt.Errorf("clone %d: %w", id, err)
}
start := time.Now()
if err := m.Start(ctx); err != nil {
cancel()
return fmt.Errorf("restore clone %d: %w", id, err)
}
elapsed := time.Since(start)
// store PID
if cmd.Process != nil {
os.WriteFile(filepath.Join(o.pidsDir(), fmt.Sprintf("clone-%d.pid", id)),
[]byte(strconv.Itoa(cmd.Process.Pid)), 0o644)
}
o.mu.Lock()
o.clones[id] = &cloneInfo{
ID: id,
Machine: m,
Cancel: cancel,
Tap: tapName,
}
o.mu.Unlock()
o.log.Infof("clone %d: restored in %s (pid=%d, tap=%s)",
id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName)
o.log.Infof("clone %d: ready (proxy pid=%d, tap=%s, console=%s)",
id, proxyCmd.Process.Pid, tapName, consoleSockPath)
return nil
}
// waitForSocket polls path every 50 ms until it appears or timeout elapses.
func waitForSocket(path string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if _, err := os.Stat(path); err == nil {
return nil
}
time.Sleep(50 * time.Millisecond)
}
return fmt.Errorf("timed out waiting for %s", path)
}
// ——— Status ————————————————————————————————————————————————————————————
func (o *Orchestrator) Status() {