Files
firecracker-orchestrator/orchestrator/console.go
Honza Novak 82c11dd2f8 feat: add web terminal UI with WebSocket console and clone management API
Introduces a browser-based terminal interface backed by xterm.js, served
directly from the binary via an embedded HTML asset.

New HTTP server (`serve [addr]`, default :8080):
  GET  /              — xterm.js terminal UI; ?id=N selects a clone
  GET  /clones        — JSON list of running clone IDs
  POST /clones        — spawn a new clone; returns {"id": N}
  DELETE /clones/{id} — destroy a clone by ID
  GET  /ws/{id}       — WebSocket console for clone {id}
                        binary frames = raw PTY I/O
                        text frames   = JSON resize {"rows":N,"cols":M}

Supporting changes:
- orchestrator: add SpawnSingle() and KillClone(id) for per-clone lifecycle
  management from the HTTP layer
- console: add a resize sideband Unix socket (console-resize.sock) that
  accepts newline-delimited JSON {"rows","cols"} messages and applies them
  to the PTY master via pty.Setsize; the WebSocket handler writes to this
  socket on text frames so browser window resizes propagate into the VM
- deps: add gorilla/websocket v1.5.3

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 10:53:41 +00:00

319 lines
8.3 KiB
Go

package orchestrator
import (
"bufio"
"context"
"encoding/json"
"fmt"
"io"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"sync"
"syscall"
"time"
"github.com/creack/pty"
firecracker "github.com/firecracker-microvm/firecracker-go-sdk"
"github.com/firecracker-microvm/firecracker-go-sdk/client/models"
log "github.com/sirupsen/logrus"
)
// RunConsoleProxy is the entrypoint for the "_console-proxy" internal subcommand.
// It restores a Firecracker clone from the golden snapshot, connecting its serial
// console (ttyS0) to a PTY, then serves the PTY master on a Unix socket at
// {cloneDir}/console.sock for the lifetime of the VM.
func RunConsoleProxy(cfg Config, id int, tapName string) error {
logger := log.WithField("component", fmt.Sprintf("console-proxy[%d]", id))
cloneDir := filepath.Join(cfg.BaseDir, "clones", strconv.Itoa(id))
goldenDir := filepath.Join(cfg.BaseDir, "golden")
sockPath := filepath.Join(cloneDir, "api.sock")
consoleSockPath := filepath.Join(cloneDir, "console.sock")
sharedMem := filepath.Join(goldenDir, "mem")
cloneVmstate := filepath.Join(cloneDir, "vmstate")
// --- Create PTY ---
// ptm = master (we hold and proxy), pts = slave (firecracker's stdio)
ptm, pts, err := pty.Open()
if err != nil {
return fmt.Errorf("open pty: %w", err)
}
pty.Setsize(ptm, &pty.Winsize{Rows: 24, Cols: 80}) //nolint:errcheck
fcBin, err := exec.LookPath(cfg.FCBin)
if err != nil {
pts.Close()
ptm.Close()
return fmt.Errorf("firecracker not found: %w", err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// --- Build firecracker command with slave PTY as stdin/stdout/stderr ---
// Setsid makes the slave PTY the controlling terminal for firecracker,
// which is required for job control (Ctrl+C, SIGWINCH, etc.) to work.
cmd := firecracker.VMCommandBuilder{}.
WithBin(fcBin).
WithSocketPath(sockPath).
Build(ctx)
cmd.Stdin = pts
cmd.Stdout = pts
cmd.Stderr = pts
cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true}
// --- Build Machine config (mirrors spawnOne) ---
vcpus := cfg.VCPUs
mem := cfg.MemMiB
fcCfg := firecracker.Config{
SocketPath: sockPath,
MachineCfg: models.MachineConfiguration{
VcpuCount: &vcpus,
MemSizeMib: &mem,
},
LogPath: sockPath + ".log",
LogLevel: "Debug",
FifoLogWriter: logger.Writer(),
}
if cfg.Bridge != "none" && tapName != "" {
mac := fmt.Sprintf("AA:FC:00:00:%02X:%02X", id/256, id%256)
fcCfg.NetworkInterfaces = firecracker.NetworkInterfaces{
{
StaticConfiguration: &firecracker.StaticNetworkConfiguration{
MacAddress: mac,
HostDevName: tapName,
},
},
}
}
m, err := firecracker.NewMachine(ctx, fcCfg,
firecracker.WithProcessRunner(cmd),
firecracker.WithLogger(logger),
firecracker.WithSnapshot(sharedMem, cloneVmstate, func(sc *firecracker.SnapshotConfig) {
sc.ResumeVM = true
}),
)
if err != nil {
pts.Close()
ptm.Close()
return fmt.Errorf("new machine: %w", err)
}
// Swap in network-override snapshot handler (same as spawnOne)
if cfg.Bridge != "none" && tapName != "" {
m.Handlers.FcInit = m.Handlers.FcInit.Swap(firecracker.Handler{
Name: firecracker.LoadSnapshotHandlerName,
Fn: func(ctx context.Context, m *firecracker.Machine) error {
return loadSnapshotWithNetworkOverride(
ctx, sockPath, sharedMem, cloneVmstate, tapName,
)
},
})
}
// --- Start VM (blocks until snapshot is loaded and VM is running) ---
start := time.Now()
logger.Infof("restoring clone %d from snapshot ...", id)
if err := m.Start(ctx); err != nil {
pts.Close()
ptm.Close()
return fmt.Errorf("restore clone %d: %w", id, err)
}
elapsed := time.Since(start)
// Release our copy of the slave — firecracker holds its own fd now.
// Closing here ensures we get EOF on ptm when firecracker exits.
pts.Close()
// --- Write PID file ---
pidsDir := filepath.Join(cfg.BaseDir, "pids")
os.MkdirAll(pidsDir, 0o755) //nolint:errcheck
if cmd.Process != nil {
os.WriteFile( //nolint:errcheck
filepath.Join(pidsDir, fmt.Sprintf("clone-%d.pid", id)),
[]byte(strconv.Itoa(cmd.Process.Pid)),
0o644,
)
}
logger.Infof("clone %d: restored in %s (pid=%d, tap=%s)",
id, elapsed.Round(time.Millisecond), cmd.Process.Pid, tapName)
// --- Create console socket ---
os.Remove(consoleSockPath) //nolint:errcheck
listener, err := net.Listen("unix", consoleSockPath)
if err != nil {
ptm.Close()
return fmt.Errorf("listen on console socket: %w", err)
}
// --- Create resize sideband socket ---
resizeSockPath := filepath.Join(cloneDir, "console-resize.sock")
os.Remove(resizeSockPath) //nolint:errcheck
resizeListener, err := net.Listen("unix", resizeSockPath)
if err != nil {
logger.Warnf("could not create resize socket, terminal resize will be unavailable: %v", err)
resizeListener = nil
}
// --- Serve until VM exits ---
vmDone := make(chan struct{})
go func() {
m.Wait(ctx) //nolint:errcheck
close(vmDone)
}()
if resizeListener != nil {
go serveResize(resizeListener, ptm, vmDone, logger)
}
serveConsole(listener, ptm, vmDone, logger)
listener.Close()
if resizeListener != nil {
resizeListener.Close()
}
ptm.Close()
os.Remove(consoleSockPath) //nolint:errcheck
os.Remove(resizeSockPath) //nolint:errcheck
if tapName != "" {
destroyTap(tapName)
}
logger.Infof("clone %d: exiting", id)
return nil
}
// resizeMsg is the JSON payload sent over the resize sideband socket.
type resizeMsg struct {
Rows uint16 `json:"rows"`
Cols uint16 `json:"cols"`
}
// serveResize accepts connections on the resize sideband listener and applies
// PTY window size changes as JSON resize messages arrive.
func serveResize(listener net.Listener, ptm *os.File, vmDone <-chan struct{}, logger *log.Entry) {
go func() {
<-vmDone
listener.Close()
}()
for {
conn, err := listener.Accept()
if err != nil {
return
}
go handleResize(conn, ptm, logger)
}
}
// handleResize reads newline-delimited JSON resize messages from conn and
// applies each one to the PTY master.
func handleResize(conn net.Conn, ptm *os.File, logger *log.Entry) {
defer conn.Close()
scanner := bufio.NewScanner(conn)
for scanner.Scan() {
var msg resizeMsg
if err := json.Unmarshal(scanner.Bytes(), &msg); err != nil {
logger.Warnf("resize: bad message: %v", err)
continue
}
if msg.Rows == 0 || msg.Cols == 0 {
continue
}
if err := pty.Setsize(ptm, &pty.Winsize{Rows: msg.Rows, Cols: msg.Cols}); err != nil {
logger.Warnf("resize pty: %v", err)
}
}
}
// atomicWriter wraps an io.Writer behind a read/write lock so it can be
// swapped between connections without holding the lock during writes.
type atomicWriter struct {
mu sync.RWMutex
w io.Writer
}
func (a *atomicWriter) set(w io.Writer) {
a.mu.Lock()
defer a.mu.Unlock()
a.w = w
}
func (a *atomicWriter) Write(p []byte) (int, error) {
a.mu.RLock()
defer a.mu.RUnlock()
return a.w.Write(p)
}
// serveConsole accepts connections on listener and proxies console I/O via ptm.
// A background goroutine reads from the PTY master continuously (discarding
// output when no client is connected so the VM never blocks on a full buffer).
// Only one client is served at a time; sessions are serialised.
func serveConsole(listener net.Listener, ptm *os.File, vmDone <-chan struct{}, logger *log.Entry) {
aw := &atomicWriter{w: io.Discard}
// Background PTY reader — runs for the full VM lifetime.
go func() {
buf := make([]byte, 4096)
for {
n, err := ptm.Read(buf)
if n > 0 {
aw.Write(buf[:n]) //nolint:errcheck
}
if err != nil {
return // PTY closed (VM exited)
}
}
}()
// Unblock Accept() when the VM exits.
go func() {
<-vmDone
listener.Close()
}()
for {
conn, err := listener.Accept()
if err != nil {
return // listener closed
}
logger.Info("console client connected")
// Route PTY output to this connection.
aw.set(conn)
// Forward client input to the PTY master.
clientGone := make(chan struct{})
go func() {
defer close(clientGone)
buf := make([]byte, 256)
for {
n, err := conn.Read(buf)
if n > 0 {
ptm.Write(buf[:n]) //nolint:errcheck
}
if err != nil {
return
}
}
}()
// Block until client disconnects or VM dies.
select {
case <-clientGone:
logger.Info("console client disconnected")
case <-vmDone:
logger.Info("VM exited while console client was connected")
}
aw.set(io.Discard)
conn.Close()
}
}