Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions cmd/api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,12 @@ type CapacityConfig struct {

// HypervisorConfig holds hypervisor settings.
type HypervisorConfig struct {
Default string `koanf:"default"`
CloudHypervisorDefaultVersion string `koanf:"cloud_hypervisor_default_version"`
FirecrackerBinaryPath string `koanf:"firecracker_binary_path"`
Memory HypervisorMemoryConfig `koanf:"memory"`
Default string `koanf:"default"`
CloudHypervisorDefaultVersion string `koanf:"cloud_hypervisor_default_version"`
FirecrackerBinaryPath string `koanf:"firecracker_binary_path"`
FirecrackerSnapshotMemoryBackend string `koanf:"firecracker_snapshot_memory_backend"`
FirecrackerUFFDCacheMaxBytes string `koanf:"firecracker_uffd_cache_max_bytes"`
Memory HypervisorMemoryConfig `koanf:"memory"`
}

// HypervisorMemoryConfig holds guest memory management settings.
Expand Down Expand Up @@ -404,9 +406,11 @@ func defaultConfig() *Config {
},

Hypervisor: HypervisorConfig{
Default: "cloud-hypervisor",
CloudHypervisorDefaultVersion: "",
FirecrackerBinaryPath: "",
Default: "cloud-hypervisor",
CloudHypervisorDefaultVersion: "",
FirecrackerBinaryPath: "",
FirecrackerSnapshotMemoryBackend: "file",
FirecrackerUFFDCacheMaxBytes: "4294967296",
Memory: HypervisorMemoryConfig{
Enabled: false,
KernelPageInitMode: "hardened",
Expand Down Expand Up @@ -618,6 +622,19 @@ func (c *Config) Validate() error {
if c.Hypervisor.Memory.KernelPageInitMode != "performance" && c.Hypervisor.Memory.KernelPageInitMode != "hardened" {
return fmt.Errorf("hypervisor.memory.kernel_page_init_mode must be one of {performance,hardened}, got %q", c.Hypervisor.Memory.KernelPageInitMode)
}
backend := strings.ToLower(strings.TrimSpace(c.Hypervisor.FirecrackerSnapshotMemoryBackend))
if backend == "" {
backend = "file"
}
switch backend {
case "file", "uffd":
c.Hypervisor.FirecrackerSnapshotMemoryBackend = backend
default:
return fmt.Errorf("hypervisor.firecracker_snapshot_memory_backend must be one of {file,uffd}, got %q", c.Hypervisor.FirecrackerSnapshotMemoryBackend)
}
if err := validateByteSize("hypervisor.firecracker_uffd_cache_max_bytes", c.Hypervisor.FirecrackerUFFDCacheMaxBytes); err != nil {
return err
}
if err := validateDuration("hypervisor.memory.active_ballooning.poll_interval", c.Hypervisor.Memory.ActiveBallooning.PollInterval); err != nil {
return err
}
Expand Down
29 changes: 29 additions & 0 deletions cmd/api/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,35 @@ func TestDefaultConfigIncludesMetricsSettings(t *testing.T) {
if cfg.Instances.LifecycleEventBufferSize != 256 {
t.Fatalf("expected default instances.lifecycle_event_buffer_size to be 256, got %d", cfg.Instances.LifecycleEventBufferSize)
}
if cfg.Hypervisor.FirecrackerSnapshotMemoryBackend != "file" {
t.Fatalf("expected default firecracker snapshot backend to be file, got %q", cfg.Hypervisor.FirecrackerSnapshotMemoryBackend)
}
if cfg.Hypervisor.FirecrackerUFFDCacheMaxBytes != "4294967296" {
t.Fatalf("expected default firecracker uffd cache size to be 4294967296, got %q", cfg.Hypervisor.FirecrackerUFFDCacheMaxBytes)
}
}

func TestValidateFirecrackerSnapshotMemoryBackend(t *testing.T) {
cfg := defaultConfig()
cfg.Hypervisor.FirecrackerSnapshotMemoryBackend = "UFFD"
if err := cfg.Validate(); err != nil {
t.Fatalf("expected UFFD backend to validate, got %v", err)
}
if cfg.Hypervisor.FirecrackerSnapshotMemoryBackend != "uffd" {
t.Fatalf("expected backend to normalize to uffd, got %q", cfg.Hypervisor.FirecrackerSnapshotMemoryBackend)
}

cfg = defaultConfig()
cfg.Hypervisor.FirecrackerSnapshotMemoryBackend = "bad"
if err := cfg.Validate(); err == nil {
t.Fatalf("expected invalid firecracker snapshot backend validation error")
}

cfg = defaultConfig()
cfg.Hypervisor.FirecrackerUFFDCacheMaxBytes = "not-a-size"
if err := cfg.Validate(); err == nil {
t.Fatalf("expected invalid firecracker uffd cache size validation error")
}
}

func TestLoadEnvOverridesMetricsAndOtelInterval(t *testing.T) {
Expand Down
5 changes: 5 additions & 0 deletions cmd/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/kernel/hypeman/lib/paths"
"github.com/kernel/hypeman/lib/registry"
"github.com/kernel/hypeman/lib/scopes"
"github.com/kernel/hypeman/lib/uffdpager"
"github.com/kernel/hypeman/lib/vmm"
nethttpmiddleware "github.com/oapi-codegen/nethttp-middleware"
"github.com/riandyrn/otelchi"
Expand Down Expand Up @@ -132,6 +133,10 @@ func startOCICacheGC(grp *errgroup.Group, ctx context.Context, runner ociCacheGC
}

func run() error {
if len(os.Args) > 1 && os.Args[1] == "--internal-uffd-pager" {
return uffdpager.Main(os.Args[2:])
}

// Load config early for OTel initialization
// Config path can be specified via CONFIG_PATH env var or defaults to platform-specific locations
configPath := os.Getenv("CONFIG_PATH")
Expand Down
73 changes: 62 additions & 11 deletions lib/hypervisor/firecracker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/kernel/hypeman/lib/hypervisor"
"github.com/kernel/hypeman/lib/uffdpager"
)

const (
Expand Down Expand Up @@ -74,11 +76,16 @@ type snapshotCreateParams struct {
}

type snapshotLoadParams struct {
MemFilePath string `json:"mem_file_path,omitempty"`
SnapshotPath string `json:"snapshot_path"`
EnableDiffSnapshots bool `json:"enable_diff_snapshots,omitempty"`
ResumeVM bool `json:"resume_vm,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
MemBackend *snapshotMemBackend `json:"mem_backend,omitempty"`
SnapshotPath string `json:"snapshot_path"`
EnableDiffSnapshots bool `json:"enable_diff_snapshots,omitempty"`
ResumeVM bool `json:"resume_vm,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
}

type snapshotMemBackend struct {
BackendType string `json:"backend_type"`
BackendPath string `json:"backend_path"`
}

type networkOverride struct {
Expand All @@ -101,9 +108,18 @@ type instanceInfo struct {
}

type restoreMetadata struct {
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
SnapshotSourceDataDir string `json:"snapshot_source_data_dir,omitempty"`
RetainSnapshotSourceDataDirAlias bool `json:"retain_snapshot_source_data_dir_alias,omitempty"`
NetworkOverrides []networkOverride `json:"network_overrides,omitempty"`
SnapshotSourceDataDir string `json:"snapshot_source_data_dir,omitempty"`
RetainSnapshotSourceDataDirAlias bool `json:"retain_snapshot_source_data_dir_alias,omitempty"`
SnapshotMemoryBackend string `json:"snapshot_memory_backend,omitempty"`
UFFDCacheKey string `json:"uffd_cache_key,omitempty"`
UFFDOverlays []uffdpager.OverlayPage `json:"uffd_overlays,omitempty"`
}

type SnapshotMemoryBackendConfig struct {
Backend string
CacheKey string
Overlays []uffdpager.OverlayPage
}

func toBootSource(cfg hypervisor.VMConfig) bootSource {
Expand Down Expand Up @@ -213,16 +229,23 @@ func toSnapshotCreateParams(snapshotDir string) snapshotCreateParams {
}
}

func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride, resumeVM bool) snapshotLoadParams {
func toSnapshotLoadParams(snapshotDir string, networkOverrides []networkOverride, resumeVM bool, backend snapshotMemBackend) snapshotLoadParams {
return snapshotLoadParams{
MemFilePath: snapshotMemoryPath(snapshotDir),
MemBackend: &backend,
SnapshotPath: snapshotStatePath(snapshotDir),
EnableDiffSnapshots: true,
ResumeVM: resumeVM,
NetworkOverrides: networkOverrides,
}
}

func fileSnapshotMemBackend(snapshotDir string) snapshotMemBackend {
return snapshotMemBackend{
BackendType: "File",
BackendPath: snapshotMemoryPath(snapshotDir),
}
}

func snapshotStatePath(snapshotDir string) string {
return filepath.Join(snapshotDir, snapshotStateFile)
}
Expand All @@ -233,7 +256,8 @@ func snapshotMemoryPath(snapshotDir string) string {

func saveRestoreMetadata(instanceDir string, networkConfigs []networkInterface) error {
meta := restoreMetadata{
NetworkOverrides: make([]networkOverride, 0, len(networkConfigs)),
NetworkOverrides: make([]networkOverride, 0, len(networkConfigs)),
SnapshotMemoryBackend: uffdpager.BackendFile,
}
for _, netCfg := range networkConfigs {
meta.NetworkOverrides = append(meta.NetworkOverrides, networkOverride{
Expand All @@ -245,6 +269,33 @@ func saveRestoreMetadata(instanceDir string, networkConfigs []networkInterface)
return saveRestoreMetadataState(instanceDir, &meta)
}

func ConfigureSnapshotMemoryBackend(instanceDir string, cfg SnapshotMemoryBackendConfig) error {
meta, err := loadRestoreMetadata(instanceDir)
if err != nil {
return err
}
backend := strings.ToLower(strings.TrimSpace(cfg.Backend))
if backend == "" {
backend = uffdpager.BackendFile
}
switch backend {
case uffdpager.BackendFile:
meta.SnapshotMemoryBackend = uffdpager.BackendFile
meta.UFFDCacheKey = ""
meta.UFFDOverlays = nil
case uffdpager.BackendUFFD:
if strings.TrimSpace(cfg.CacheKey) == "" {
return fmt.Errorf("uffd cache key is required")
}
meta.SnapshotMemoryBackend = uffdpager.BackendUFFD
meta.UFFDCacheKey = strings.TrimSpace(cfg.CacheKey)
meta.UFFDOverlays = append([]uffdpager.OverlayPage(nil), cfg.Overlays...)
default:
return fmt.Errorf("unsupported snapshot memory backend %q", cfg.Backend)
}
return saveRestoreMetadataState(instanceDir, meta)
}

func saveRestoreMetadataState(instanceDir string, meta *restoreMetadata) error {
data, err := json.MarshalIndent(meta, "", " ")
if err != nil {
Expand Down
46 changes: 44 additions & 2 deletions lib/hypervisor/firecracker/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"testing"

"github.com/kernel/hypeman/lib/hypervisor"
"github.com/kernel/hypeman/lib/uffdpager"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -82,14 +83,55 @@ func TestSnapshotParamPaths(t *testing.T) {

load := toSnapshotLoadParams("/tmp/snapshot-latest", []networkOverride{
{IfaceID: "eth0", HostDevName: "hype-abc123"},
}, true)
}, true, fileSnapshotMemBackend("/tmp/snapshot-latest"))
assert.Equal(t, "/tmp/snapshot-latest/state", load.SnapshotPath)
assert.Equal(t, "/tmp/snapshot-latest/memory", load.MemFilePath)
require.NotNil(t, load.MemBackend)
assert.Equal(t, "File", load.MemBackend.BackendType)
assert.Equal(t, "/tmp/snapshot-latest/memory", load.MemBackend.BackendPath)
assert.True(t, load.EnableDiffSnapshots)
assert.True(t, load.ResumeVM)
require.Len(t, load.NetworkOverrides, 1)
}

func TestSnapshotLoadParamsSupportsUFFDBackend(t *testing.T) {
load := toSnapshotLoadParams("/tmp/snapshot-latest", nil, true, snapshotMemBackend{
BackendType: "Uffd",
BackendPath: "/tmp/pager.sock",
})
require.NotNil(t, load.MemBackend)
assert.Equal(t, "Uffd", load.MemBackend.BackendType)
assert.Equal(t, "/tmp/pager.sock", load.MemBackend.BackendPath)
}

func TestConfigureSnapshotMemoryBackendPersistsUFFDAndClearsForFile(t *testing.T) {
dir := t.TempDir()
require.NoError(t, saveRestoreMetadata(dir, []networkInterface{{IfaceID: "eth0", HostDevName: "tap0"}}))

require.NoError(t, ConfigureSnapshotMemoryBackend(dir, SnapshotMemoryBackendConfig{
Backend: uffdpager.BackendUFFD,
CacheKey: "cache-key",
Overlays: []uffdpager.OverlayPage{{
GuestMemoryOffset: 4096,
Path: "/tmp/overlay.page",
}},
}))
meta, err := loadRestoreMetadata(dir)
require.NoError(t, err)
assert.Equal(t, uffdpager.BackendUFFD, meta.SnapshotMemoryBackend)
assert.Equal(t, "cache-key", meta.UFFDCacheKey)
require.Len(t, meta.UFFDOverlays, 1)
assert.Equal(t, int64(4096), meta.UFFDOverlays[0].GuestMemoryOffset)
require.Len(t, meta.NetworkOverrides, 1)

require.NoError(t, ConfigureSnapshotMemoryBackend(dir, SnapshotMemoryBackendConfig{Backend: uffdpager.BackendFile}))
meta, err = loadRestoreMetadata(dir)
require.NoError(t, err)
assert.Equal(t, uffdpager.BackendFile, meta.SnapshotMemoryBackend)
assert.Empty(t, meta.UFFDCacheKey)
assert.Empty(t, meta.UFFDOverlays)
require.Len(t, meta.NetworkOverrides, 1)
}

func TestToBalloonConfig(t *testing.T) {
cfg := hypervisor.VMConfig{
GuestMemory: hypervisor.GuestMemoryConfig{
Expand Down
4 changes: 2 additions & 2 deletions lib/hypervisor/firecracker/firecracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ func (f *Firecracker) instanceStart(ctx context.Context) error {
return f.postAction(ctx, "InstanceStart")
}

func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride, resumeVM bool) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides, resumeVM)
func (f *Firecracker) loadSnapshot(ctx context.Context, snapshotDir string, networkOverrides []networkOverride, resumeVM bool, backend snapshotMemBackend) error {
params := toSnapshotLoadParams(snapshotDir, networkOverrides, resumeVM, backend)
if _, err := f.do(ctx, http.MethodPut, "/snapshot/load", params, http.StatusNoContent); err != nil {
return err
}
Expand Down
51 changes: 47 additions & 4 deletions lib/hypervisor/firecracker/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/kernel/hypeman/lib/hypervisor"
"github.com/kernel/hypeman/lib/paths"
"github.com/kernel/hypeman/lib/uffdpager"
"gvisor.dev/gvisor/pkg/cleanup"
)

Expand All @@ -33,11 +34,30 @@ func init() {
})
}

type UFFDClient interface {
CreateSession(ctx context.Context, req uffdpager.CreateSessionRequest) (*uffdpager.CreateSessionResponse, error)
CloseSession(ctx context.Context, sessionID string) error
}

type StarterOption func(*Starter)

// Starter implements hypervisor.VMStarter for Firecracker.
type Starter struct{}
type Starter struct {
uffd UFFDClient
}

func NewStarter(opts ...StarterOption) *Starter {
s := &Starter{}
for _, opt := range opts {
opt(s)
}
return s
}

func NewStarter() *Starter {
return &Starter{}
func WithUFFDClient(client UFFDClient) StarterOption {
return func(s *Starter) {
s.uffd = client
}
}

var _ hypervisor.VMStarter = (*Starter)(nil)
Expand Down Expand Up @@ -118,14 +138,37 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
return 0, nil, fmt.Errorf("load firecracker restore metadata: %w", err)
}
resumeOnLoad := shouldResumeOnSnapshotLoad()
backend := fileSnapshotMemBackend(snapshotPath)
createdUFFDSession := ""
if strings.EqualFold(strings.TrimSpace(meta.SnapshotMemoryBackend), uffdpager.BackendUFFD) {
if s.uffd == nil {
return 0, nil, fmt.Errorf("uffd snapshot restore requested but no uffd pager is configured")
}
sessionID := filepath.Base(filepath.Dir(socketPath))
resp, err := s.uffd.CreateSession(ctx, uffdpager.CreateSessionRequest{
SessionID: sessionID,
InstanceID: sessionID,
BackingMemoryPath: snapshotMemoryPath(snapshotPath),
CacheKey: meta.UFFDCacheKey,
Overlays: meta.UFFDOverlays,
})
if err != nil {
return 0, nil, fmt.Errorf("create uffd pager session: %w", err)
}
createdUFFDSession = resp.SessionID
backend = snapshotMemBackend{BackendType: "Uffd", BackendPath: resp.UFFDSocketPath}
}
err = func() error {
snapshotSourceAliasMu.Lock()
defer snapshotSourceAliasMu.Unlock()
return withSnapshotSourceDirAlias(meta, filepath.Dir(socketPath), func() error {
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides, resumeOnLoad)
return hv.loadSnapshot(ctx, snapshotPath, meta.NetworkOverrides, resumeOnLoad, backend)
})
}()
if err != nil {
if createdUFFDSession != "" {
_ = s.uffd.CloseSession(context.Background(), createdUFFDSession)
}
return 0, nil, fmt.Errorf("load firecracker snapshot: %w", err)
}
hv.restoredResumed = resumeOnLoad
Expand Down
1 change: 1 addition & 0 deletions lib/instances/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func (m *manager) deleteInstance(
log.WarnContext(ctx, "failed to kill hypervisor, continuing with cleanup", "instance_id", id, "error", err)
}
}
m.closeFirecrackerUFFDSession(ctx, stored)

// 6. Release network allocation
if inst.NetworkEnabled {
Expand Down
Loading
Loading