Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst
Volumes: volumes,
Hypervisor: hvType,
GPU: gpuConfig,
SkipKernelHeaders: request.Body.SkipKernelHeaders != nil && *request.Body.SkipKernelHeaders,
SkipGuestAgent: request.Body.SkipGuestAgent != nil && *request.Body.SkipGuestAgent,
}

inst, err := s.InstanceManager.CreateInstance(ctx, domainReq)
Expand Down
4 changes: 4 additions & 0 deletions lib/instances/configdisk.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ func (m *manager) buildGuestConfig(ctx context.Context, inst *Instance, imageInf
cfg.InitMode = "systemd"
}

// Boot optimizations
cfg.SkipKernelHeaders = inst.SkipKernelHeaders
cfg.SkipGuestAgent = inst.SkipGuestAgent

return cfg
}

Expand Down
2 changes: 2 additions & 0 deletions lib/instances/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,8 @@ func (m *manager) createInstance(
Devices: resolvedDeviceIDs,
GPUProfile: gpuProfile,
GPUMdevUUID: gpuMdevUUID,
SkipKernelHeaders: req.SkipKernelHeaders,
SkipGuestAgent: req.SkipGuestAgent,
}

// 12. Ensure directories
Expand Down
6 changes: 6 additions & 0 deletions lib/instances/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ type StoredMetadata struct {
// GPU configuration (vGPU mode)
GPUProfile string // vGPU profile name (e.g., "L40S-1Q")
GPUMdevUUID string // mdev device UUID

// Boot optimizations
SkipKernelHeaders bool // Skip kernel headers installation (disables DKMS)
SkipGuestAgent bool // Skip guest-agent installation (disables exec/stat API)
}

// Instance represents a virtual machine instance with derived runtime state
Expand Down Expand Up @@ -120,6 +124,8 @@ type CreateInstanceRequest struct {
Volumes []VolumeAttachment // Volumes to attach at creation time
Hypervisor hypervisor.Type // Optional: hypervisor type (defaults to config)
GPU *GPUConfig // Optional: vGPU configuration
SkipKernelHeaders bool // Skip kernel headers installation (disables DKMS)
SkipGuestAgent bool // Skip guest-agent installation (disables exec/stat API)
}

// AttachVolumeRequest is the domain request for attaching a volume (used for API compatibility)
Expand Down
321 changes: 168 additions & 153 deletions lib/oapi/oapi.go

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions lib/system/init/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"os"
"os/exec"
"time"

"github.com/kernel/hypeman/lib/vmconfig"
)
Expand All @@ -19,6 +20,11 @@ func readConfig(log *Logger) (*vmconfig.Config, error) {
return nil, fmt.Errorf("mkdir config mount: %w", err)
}

// Wait for config disk to be ready (polls every 10ms, 2s timeout)
if err := waitForDevice("/dev/vdc", 2*time.Second); err != nil {
return nil, fmt.Errorf("wait for config device: %w", err)
}

// Mount config disk (/dev/vdc) read-only
cmd := exec.Command("/bin/mount", "-o", "ro", "/dev/vdc", configMount)
if output, err := cmd.CombinedOutput(); err != nil {
Expand Down
16 changes: 10 additions & 6 deletions lib/system/init/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,20 @@ func main() {
dropToShell()
}

// Phase 7: Copy guest-agent to target location
if err := copyGuestAgent(log); err != nil {
// Phase 7: Copy guest-agent to target location (skips if already exists or skip_guest_agent=true)
if err := copyGuestAgent(log, cfg.SkipGuestAgent); err != nil {
log.Error("agent", "failed to copy guest-agent", err)
// Continue anyway - exec will still work, just no remote access
}

// Phase 8: Setup kernel headers for DKMS
if err := setupKernelHeaders(log); err != nil {
log.Error("headers", "failed to setup kernel headers", err)
// Continue anyway - only needed for DKMS module building
// Phase 8: Setup kernel headers for DKMS (can be skipped via config)
if cfg.SkipKernelHeaders {
log.Info("headers", "skipping kernel headers setup (skip_kernel_headers=true)")
} else {
if err := setupKernelHeaders(log); err != nil {
log.Error("headers", "failed to setup kernel headers", err)
// Continue anyway - only needed for DKMS module building
}
}

// Phase 9: Mode-specific execution
Expand Down
21 changes: 13 additions & 8 deletions lib/system/init/mode_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,18 @@ func runExecMode(log *Logger, cfg *vmconfig.Config) {
os.Setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin")
os.Setenv("HOME", "/root")

// Start guest-agent in background
log.Info("exec", "starting guest-agent in background")
agentCmd := exec.Command("/opt/hypeman/guest-agent")
agentCmd.Stdout = os.Stdout
agentCmd.Stderr = os.Stderr
if err := agentCmd.Start(); err != nil {
log.Error("exec", "failed to start guest-agent", err)
// Start guest-agent in background (skip if guest-agent was not copied)
var agentCmd *exec.Cmd
if cfg.SkipGuestAgent {
log.Info("exec", "skipping guest-agent (skip_guest_agent=true)")
} else {
log.Info("exec", "starting guest-agent in background")
agentCmd = exec.Command("/opt/hypeman/guest-agent")
agentCmd.Stdout = os.Stdout
agentCmd.Stderr = os.Stderr
if err := agentCmd.Start(); err != nil {
log.Error("exec", "failed to start guest-agent", err)
}
}

// Build the entrypoint command
Expand Down Expand Up @@ -94,7 +99,7 @@ func runExecMode(log *Logger, cfg *vmconfig.Config) {
// Wait for guest-agent (keeps init alive, prevents kernel panic)
// The guest-agent runs forever, so this effectively keeps the VM alive
// until it's explicitly terminated
if agentCmd.Process != nil {
if agentCmd != nil && agentCmd.Process != nil {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

VM terminates unexpectedly when skip_guest_agent is enabled

Medium Severity

When skip_guest_agent is true in exec mode, the VM terminates when the application exits because nothing keeps PID 1 alive. The code comment explicitly notes that the guest-agent wait "keeps init alive, prevents kernel panic," but when skipped, syscall.Exit(exitCode) is called immediately after the app finishes. This causes a kernel panic when PID 1 exits. However, the OpenAPI documentation states "The instance will still run, but remote command execution will be unavailable," which contradicts the actual behavior.

Additional Locations (1)

Fix in Cursor Fix in Web

agentCmd.Wait()
}

Expand Down
14 changes: 9 additions & 5 deletions lib/system/init/mode_systemd.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ import (
func runSystemdMode(log *Logger, cfg *vmconfig.Config) {
const newroot = "/overlay/newroot"

// Inject hypeman-agent.service
log.Info("systemd", "injecting hypeman-agent.service")
if err := injectAgentService(newroot); err != nil {
log.Error("systemd", "failed to inject service", err)
// Continue anyway - VM will work, just without agent
// Inject hypeman-agent.service (skip if guest-agent was not copied)
if cfg.SkipGuestAgent {
log.Info("systemd", "skipping agent service injection (skip_guest_agent=true)")
} else {
log.Info("systemd", "injecting hypeman-agent.service")
if err := injectAgentService(newroot); err != nil {
log.Error("systemd", "failed to inject service", err)
// Continue anyway - VM will work, just without agent
}
}

// Change root to the new filesystem using chroot
Expand Down
44 changes: 41 additions & 3 deletions lib/system/init/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,36 @@ func mountEssentials(log *Logger) error {
return nil
}

// waitForDevice polls for a block device to become available.
// It polls every 10ms until the device exists or the timeout is reached.
func waitForDevice(device string, timeout time.Duration) error {
const pollInterval = 10 * time.Millisecond
deadline := time.Now().Add(timeout)

for {
if _, err := os.Stat(device); err == nil {
return nil
}
if time.Now().After(deadline) {
return fmt.Errorf("timeout waiting for device %s", device)
}
time.Sleep(pollInterval)
}
}

// setupOverlay sets up the overlay filesystem:
// - /dev/vda: readonly rootfs (ext4)
// - /dev/vdb: writable overlay disk (ext4)
// - /overlay/newroot: merged overlay filesystem
func setupOverlay(log *Logger) error {
// Wait for block devices to be ready
time.Sleep(500 * time.Millisecond)
// Wait for block devices to be ready (polls every 10ms, 2s timeout)
// This typically completes in 10-50ms instead of always waiting 500ms
if err := waitForDevice("/dev/vda", 2*time.Second); err != nil {
return fmt.Errorf("wait for rootfs device: %w", err)
}
if err := waitForDevice("/dev/vdb", 2*time.Second); err != nil {
return fmt.Errorf("wait for overlay device: %w", err)
}

// Create mount points
for _, dir := range []string{"/lower", "/overlay"} {
Expand Down Expand Up @@ -197,12 +220,27 @@ func redirectToConsole(device string) {
}

// copyGuestAgent copies the guest-agent binary to the target location in the new root.
func copyGuestAgent(log *Logger) error {
// It skips copying if:
// - skipGuestAgent config option is true
// - The destination file already exists (lazy copy optimization)
func copyGuestAgent(log *Logger, skipGuestAgent bool) error {
const (
src = "/usr/local/bin/guest-agent"
dst = "/overlay/newroot/opt/hypeman/guest-agent"
)

// Check for skip via config
if skipGuestAgent {
log.Info("agent", "skipping guest-agent copy (skip_guest_agent=true)")
return nil
}

// Check if destination already exists (lazy copy - skip if already present)
if _, err := os.Stat(dst); err == nil {
log.Info("agent", "guest-agent already exists, skipping copy")
return nil
}

// Create target directory
if err := os.MkdirAll("/overlay/newroot/opt/hypeman", 0755); err != nil {
return fmt.Errorf("mkdir: %w", err)
Expand Down
4 changes: 4 additions & 0 deletions lib/vmconfig/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ type Config struct {

// Init mode: "exec" (default) or "systemd"
InitMode string `json:"init_mode"`

// Boot optimizations
SkipKernelHeaders bool `json:"skip_kernel_headers,omitempty"`
SkipGuestAgent bool `json:"skip_guest_agent,omitempty"`
}

// VolumeMount represents a volume mount configuration.
Expand Down
17 changes: 17 additions & 0 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,23 @@ components:
enum: [cloud-hypervisor, qemu]
description: Hypervisor to use for this instance. Defaults to server configuration.
example: cloud-hypervisor
skip_kernel_headers:
type: boolean
description: |
Skip kernel headers installation during boot for faster startup.
When true, DKMS (Dynamic Kernel Module Support) will not work,
preventing compilation of out-of-tree kernel modules (e.g., NVIDIA vGPU drivers).
Recommended for workloads that don't need kernel module compilation.
default: false
example: true
skip_guest_agent:
type: boolean
description: |
Skip guest-agent installation during boot.
When true, the exec and stat APIs will not work for this instance.
The instance will still run, but remote command execution will be unavailable.
default: false
example: false
# Future: port_mappings, timeout_seconds

Instance:
Expand Down