Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ jobs:
# ~/go/pkg/mod and ~/.cache/go-build stay on disk between runs automatically.
cache: false
go-version: '1.25.4'

- name: Check UFFD pager version
run: |
git fetch origin main --depth=1
bash scripts/check-uffd-version.sh origin/main

- name: Install dependencies
run: |
Expand Down
13 changes: 13 additions & 0 deletions .goreleaser.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@ builds:
ldflags:
- -s -w

- id: hypeman-uffd-pager
main: ./cmd/uffd-pager
binary: hypeman-uffd-pager
env:
- CGO_ENABLED=0
goos:
- linux
goarch:
- amd64
- arm64
ldflags:
- -s -w

archives:
- id: default
formats:
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ endif

build-linux: ensure-ch-binaries ensure-firecracker-binaries ensure-caddy-binaries build-embedded | $(BIN_DIR)
go build -tags containers_image_openpgp -o $(BIN_DIR)/hypeman ./cmd/api
go build -o $(BIN_DIR)/hypeman-uffd-pager ./cmd/uffd-pager

# Build all binaries
build-all: build
Expand Down
31 changes: 24 additions & 7 deletions cmd/api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,12 @@ type CapacityConfig struct {

// HypervisorConfig holds hypervisor settings.
type HypervisorConfig struct {
Default string `koanf:"default"`
CloudHypervisorDefaultVersion string `koanf:"cloud_hypervisor_default_version"`
FirecrackerBinaryPath string `koanf:"firecracker_binary_path"`
Memory HypervisorMemoryConfig `koanf:"memory"`
Default string `koanf:"default"`
CloudHypervisorDefaultVersion string `koanf:"cloud_hypervisor_default_version"`
FirecrackerBinaryPath string `koanf:"firecracker_binary_path"`
FirecrackerSnapshotMemoryBackend string `koanf:"firecracker_snapshot_memory_backend"`
FirecrackerUFFDCacheMaxBytes string `koanf:"firecracker_uffd_cache_max_bytes"`
Memory HypervisorMemoryConfig `koanf:"memory"`
}

// HypervisorMemoryConfig holds guest memory management settings.
Expand Down Expand Up @@ -404,9 +406,11 @@ func defaultConfig() *Config {
},

Hypervisor: HypervisorConfig{
Default: "cloud-hypervisor",
CloudHypervisorDefaultVersion: "",
FirecrackerBinaryPath: "",
Default: "cloud-hypervisor",
CloudHypervisorDefaultVersion: "",
FirecrackerBinaryPath: "",
FirecrackerSnapshotMemoryBackend: "file",
FirecrackerUFFDCacheMaxBytes: "4294967296",
Comment thread
sjmiller609 marked this conversation as resolved.
Comment thread
sjmiller609 marked this conversation as resolved.
Memory: HypervisorMemoryConfig{
Enabled: false,
KernelPageInitMode: "hardened",
Expand Down Expand Up @@ -618,6 +622,19 @@ func (c *Config) Validate() error {
if c.Hypervisor.Memory.KernelPageInitMode != "performance" && c.Hypervisor.Memory.KernelPageInitMode != "hardened" {
return fmt.Errorf("hypervisor.memory.kernel_page_init_mode must be one of {performance,hardened}, got %q", c.Hypervisor.Memory.KernelPageInitMode)
}
backend := strings.ToLower(strings.TrimSpace(c.Hypervisor.FirecrackerSnapshotMemoryBackend))
if backend == "" {
backend = "file"
}
switch backend {
case "file", "uffd":
c.Hypervisor.FirecrackerSnapshotMemoryBackend = backend
Comment thread
sjmiller609 marked this conversation as resolved.
default:
return fmt.Errorf("hypervisor.firecracker_snapshot_memory_backend must be one of {file,uffd}, got %q", c.Hypervisor.FirecrackerSnapshotMemoryBackend)
}
if err := validateByteSize("hypervisor.firecracker_uffd_cache_max_bytes", c.Hypervisor.FirecrackerUFFDCacheMaxBytes); err != nil {
return err
}
if err := validateDuration("hypervisor.memory.active_ballooning.poll_interval", c.Hypervisor.Memory.ActiveBallooning.PollInterval); err != nil {
return err
}
Expand Down
29 changes: 29 additions & 0 deletions cmd/api/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,35 @@ func TestDefaultConfigIncludesMetricsSettings(t *testing.T) {
if cfg.Instances.LifecycleEventBufferSize != 256 {
t.Fatalf("expected default instances.lifecycle_event_buffer_size to be 256, got %d", cfg.Instances.LifecycleEventBufferSize)
}
if cfg.Hypervisor.FirecrackerSnapshotMemoryBackend != "file" {
t.Fatalf("expected default firecracker snapshot backend to be file, got %q", cfg.Hypervisor.FirecrackerSnapshotMemoryBackend)
}
if cfg.Hypervisor.FirecrackerUFFDCacheMaxBytes != "4294967296" {
t.Fatalf("expected default firecracker uffd cache size to be 4294967296, got %q", cfg.Hypervisor.FirecrackerUFFDCacheMaxBytes)
}
}

func TestValidateFirecrackerSnapshotMemoryBackend(t *testing.T) {
cfg := defaultConfig()
cfg.Hypervisor.FirecrackerSnapshotMemoryBackend = "UFFD"
if err := cfg.Validate(); err != nil {
t.Fatalf("expected UFFD backend to validate, got %v", err)
}
if cfg.Hypervisor.FirecrackerSnapshotMemoryBackend != "uffd" {
t.Fatalf("expected backend to normalize to uffd, got %q", cfg.Hypervisor.FirecrackerSnapshotMemoryBackend)
}

cfg = defaultConfig()
cfg.Hypervisor.FirecrackerSnapshotMemoryBackend = "bad"
if err := cfg.Validate(); err == nil {
t.Fatalf("expected invalid firecracker snapshot backend validation error")
}

cfg = defaultConfig()
cfg.Hypervisor.FirecrackerUFFDCacheMaxBytes = "not-a-size"
if err := cfg.Validate(); err == nil {
t.Fatalf("expected invalid firecracker uffd cache size validation error")
}
}

func TestLoadEnvOverridesMetricsAndOtelInterval(t *testing.T) {
Expand Down
15 changes: 15 additions & 0 deletions cmd/uffd-pager/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package main

import (
"log/slog"
"os"

"github.com/kernel/hypeman/lib/uffdpager"
)

func main() {
if err := uffdpager.Main(os.Args[1:]); err != nil {
slog.Error("uffd pager terminated", "error", err)
os.Exit(1)
}
}
2 changes: 1 addition & 1 deletion lib/hypervisor/cloudhypervisor/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s

// RestoreVM starts Cloud Hypervisor and restores VM state from a snapshot.
// The VM is in paused state after restore; caller should call Resume() to continue execution.
func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (int, hypervisor.Hypervisor, error) {
func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string, _ hypervisor.RestoreOptions) (int, hypervisor.Hypervisor, error) {
log := logger.FromContext(ctx)
startTime := time.Now()

Expand Down
26 changes: 18 additions & 8 deletions lib/hypervisor/firecracker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,15 @@ type snapshotCreateParams struct {
}

type snapshotLoadParams struct {
MemFilePath string `json:"mem_file_path,omitempty"`
SnapshotPath string `json:"snapshot_path"`
EnableDiffSnapshots bool `json:"enable_diff_snapshots,omitempty"`
ResumeVM bool `json:"resume_vm,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
MemBackend *snapshotMemBackend `json:"mem_backend,omitempty"`
SnapshotPath string `json:"snapshot_path"`
EnableDiffSnapshots bool `json:"enable_diff_snapshots,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
}

type snapshotMemBackend struct {
BackendType string `json:"backend_type"`
BackendPath string `json:"backend_path"`
}

type networkOverride struct {
Expand Down Expand Up @@ -213,16 +217,22 @@ func toSnapshotCreateParams(snapshotDir string) snapshotCreateParams {
}
}

func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride) snapshotLoadParams {
func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride, backend snapshotMemBackend) snapshotLoadParams {
return snapshotLoadParams{
MemFilePath: snapshotMemoryPath(snapshotDir),
MemBackend: &backend,
SnapshotPath: snapshotStatePath(snapshotDir),
EnableDiffSnapshots: true,
ResumeVM: false,
NetworkOverrides: networkOverrides,
}
}

func fileSnapshotMemBackend(snapshotDir string) snapshotMemBackend {
return snapshotMemBackend{
BackendType: "File",
BackendPath: snapshotMemoryPath(snapshotDir),
}
}

func snapshotStatePath(snapshotDir string) string {
return filepath.Join(snapshotDir, snapshotStateFile)
}
Expand Down
17 changes: 14 additions & 3 deletions lib/hypervisor/firecracker/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,25 @@ func TestSnapshotParamPaths(t *testing.T) {

load := toSnapshotLoadParams("/tmp/snapshot-latest", []networkOverride{
{IfaceID: "eth0", HostDevName: "hype-abc123"},
})
}, fileSnapshotMemBackend("/tmp/snapshot-latest"))
assert.Equal(t, "/tmp/snapshot-latest/state", load.SnapshotPath)
assert.Equal(t, "/tmp/snapshot-latest/memory", load.MemFilePath)
require.NotNil(t, load.MemBackend)
assert.Equal(t, "File", load.MemBackend.BackendType)
assert.Equal(t, "/tmp/snapshot-latest/memory", load.MemBackend.BackendPath)
assert.True(t, load.EnableDiffSnapshots)
assert.False(t, load.ResumeVM)
require.Len(t, load.NetworkOverrides, 1)
}

func TestSnapshotLoadParamsSupportsUFFDBackend(t *testing.T) {
load := toSnapshotLoadParams("/tmp/snapshot-latest", nil, snapshotMemBackend{
BackendType: "Uffd",
BackendPath: "/tmp/pager.sock",
})
require.NotNil(t, load.MemBackend)
assert.Equal(t, "Uffd", load.MemBackend.BackendType)
assert.Equal(t, "/tmp/pager.sock", load.MemBackend.BackendPath)
}

func TestToBalloonConfig(t *testing.T) {
cfg := hypervisor.VMConfig{
GuestMemory: hypervisor.GuestMemoryConfig{
Expand Down
4 changes: 2 additions & 2 deletions lib/hypervisor/firecracker/firecracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ func (f *Firecracker) instanceStart(ctx context.Context) error {
return f.postAction(ctx, "InstanceStart")
}

func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides)
func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride, backend snapshotMemBackend) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides, backend)
if _, err := f.do(ctx, http.MethodPut, "/snapshot/load", params, http.StatusNoContent); err != nil {
return err
}
Expand Down
55 changes: 50 additions & 5 deletions lib/hypervisor/firecracker/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/kernel/hypeman/lib/hypervisor"
"github.com/kernel/hypeman/lib/paths"
"github.com/kernel/hypeman/lib/uffdpager"
"gvisor.dev/gvisor/pkg/cleanup"
)

Expand All @@ -31,11 +32,30 @@ func init() {
})
}

type UFFDClient interface {
CreateSession(ctx context.Context, req uffdpager.CreateSessionRequest) (*uffdpager.CreateSessionResponse, error)
CloseSession(ctx context.Context, sessionID string) error
}

type StarterOption func(*Starter)

// Starter implements hypervisor.VMStarter for Firecracker.
type Starter struct{}
type Starter struct {
uffd UFFDClient
}

func NewStarter() *Starter {
return &Starter{}
func NewStarter(opts ...StarterOption) *Starter {
s := &Starter{}
for _, opt := range opts {
opt(s)
}
return s
}

func WithUFFDClient(client UFFDClient) StarterOption {
return func(s *Starter) {
s.uffd = client
}
}

var _ hypervisor.VMStarter = (*Starter)(nil)
Expand Down Expand Up @@ -93,7 +113,7 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s
return pid, hv, nil
}

func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (int, hypervisor.Hypervisor, error) {
func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string, opts hypervisor.RestoreOptions) (int, hypervisor.Hypervisor, error) {
processCtx, processSpan := hypervisor.StartProcessSpan(ctx, hypervisor.TypeFirecracker)
pid, err := s.startProcess(processCtx, p, version, socketPath)
hypervisor.FinishTraceSpan(processSpan, err)
Expand All @@ -115,14 +135,39 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
if err != nil {
return 0, nil, fmt.Errorf("load firecracker restore metadata: %w", err)
}
backend := fileSnapshotMemBackend(snapshotPath)
createdUFFDSession := ""
if opts.SnapshotMemoryBackend == hypervisor.SnapshotMemoryBackendUFFD {
if s.uffd == nil {
return 0, nil, fmt.Errorf("uffd snapshot restore requested but no uffd pager is configured")
}
sessionID := strings.TrimSpace(opts.SnapshotMemorySessionID)
if sessionID == "" {
sessionID = filepath.Base(filepath.Dir(socketPath))
}
resp, err := s.uffd.CreateSession(ctx, uffdpager.CreateSessionRequest{
SessionID: sessionID,
InstanceID: sessionID,
BackingMemoryPath: snapshotMemoryPath(snapshotPath),
CacheKey: opts.SnapshotMemoryCacheKey,
})
if err != nil {
return 0, nil, fmt.Errorf("create uffd pager session: %w", err)
}
createdUFFDSession = resp.SessionID
backend = snapshotMemBackend{BackendType: "Uffd", BackendPath: resp.UFFDSocketPath}
}
err = func() error {
snapshotSourceAliasMu.Lock()
defer snapshotSourceAliasMu.Unlock()
return withSnapshotSourceDirAlias(meta, filepath.Dir(socketPath), func() error {
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides)
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides, backend)
})
}()
if err != nil {
if createdUFFDSession != "" {
_ = s.uffd.CloseSession(context.Background(), createdUFFDSession)
}
return 0, nil, fmt.Errorf("load firecracker snapshot: %w", err)
}
if meta.SnapshotSourceDataDir != "" && !meta.RetainSnapshotSourceDataDirAlias {
Expand Down
15 changes: 14 additions & 1 deletion lib/hypervisor/hypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ type VMStarter interface {
// - Cloud Hypervisor: starts process, calls Restore API
// - QEMU: would start with -incoming or -loadvm flags (not yet implemented)
// Returns the process ID and a Hypervisor client. The VM is in paused state after restore.
RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (pid int, hv Hypervisor, err error)
RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string, opts RestoreOptions) (pid int, hv Hypervisor, err error)

// PrepareFork allows hypervisors to prepare forked instance state.
// For snapshot-based forks, implementations can rewrite snapshot config with
Expand All @@ -124,6 +124,19 @@ type VMStarter interface {
PrepareFork(ctx context.Context, req ForkPrepareRequest) (ForkPrepareResult, error)
}

type SnapshotMemoryBackend string

const (
SnapshotMemoryBackendFile SnapshotMemoryBackend = "file"
SnapshotMemoryBackendUFFD SnapshotMemoryBackend = "uffd"
)

type RestoreOptions struct {
SnapshotMemoryBackend SnapshotMemoryBackend
SnapshotMemoryCacheKey string
SnapshotMemorySessionID string
}

// ForkNetworkConfig contains network identity fields for fork preparation.
type ForkNetworkConfig struct {
TAPDevice string
Expand Down
2 changes: 1 addition & 1 deletion lib/hypervisor/qemu/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ func shouldRetryWithReducedBalloon(err error) bool {

// RestoreVM starts QEMU and restores VM state from a snapshot.
// The VM is in paused state after restore; caller should call Resume() to continue execution.
func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (int, hypervisor.Hypervisor, error) {
func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string, _ hypervisor.RestoreOptions) (int, hypervisor.Hypervisor, error) {
log := logger.FromContext(ctx)
startTime := time.Now()

Expand Down
4 changes: 2 additions & 2 deletions lib/hypervisor/tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ func (s *tracingVMStarter) StartVM(ctx context.Context, p *paths.Paths, version
return pid, hv, err
}

func (s *tracingVMStarter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string) (pid int, hv Hypervisor, err error) {
func (s *tracingVMStarter) RestoreVM(ctx context.Context, p *paths.Paths, version string, socketPath string, snapshotPath string, opts RestoreOptions) (pid int, hv Hypervisor, err error) {
ctx, span := startTraceSpan(ctx, s.tracer, "hypervisor.restore_vm",
attribute.String("hypervisor", string(s.hvType)),
attribute.String("operation", "restore_vm"),
Expand All @@ -307,7 +307,7 @@ func (s *tracingVMStarter) RestoreVM(ctx context.Context, p *paths.Paths, versio
}
finishTraceSpan(span, err)
}()
pid, hv, err = s.next.RestoreVM(ctx, p, version, socketPath, snapshotPath)
pid, hv, err = s.next.RestoreVM(ctx, p, version, socketPath, snapshotPath, opts)
return pid, hv, err
}

Expand Down
2 changes: 1 addition & 1 deletion lib/hypervisor/tracing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (s fakeStarter) GetVersion(*paths.Paths) (string, error) { return "test", n
func (s fakeStarter) StartVM(context.Context, *paths.Paths, string, string, VMConfig) (int, Hypervisor, error) {
return 42, s.returned, nil
}
func (s fakeStarter) RestoreVM(context.Context, *paths.Paths, string, string, string) (int, Hypervisor, error) {
func (s fakeStarter) RestoreVM(context.Context, *paths.Paths, string, string, string, RestoreOptions) (int, Hypervisor, error) {
return 43, s.returned, nil
}
func (s fakeStarter) PrepareFork(context.Context, ForkPrepareRequest) (ForkPrepareResult, error) {
Expand Down
Loading
Loading