From d0f7d57f277f66ee19ebbb782276b74d3f897ab4 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Fri, 12 Sep 2025 10:01:35 +0200
Subject: [PATCH 01/12] Handle reboot skip disks during Maintenance
---
api/v1alpha1/servermaintenance_types.go | 14 +++
api/v1alpha1/zz_generated.deepcopy.go | 2 +-
bmc/bmc.go | 3 +
bmc/redfish.go | 9 ++
...metal.ironcore.dev_servermaintenances.yaml | 4 +
...metal.ironcore.dev_servermaintenances.yaml | 4 +
.../controller/biossettings_controller.go | 14 ++-
internal/controller/biosversion_controller.go | 11 +++
internal/controller/bmcsettings_controller.go | 11 +++
.../servermaintenance_controller.go | 87 ++++++++++++++-----
10 files changed, 135 insertions(+), 24 deletions(-)
diff --git a/api/v1alpha1/servermaintenance_types.go b/api/v1alpha1/servermaintenance_types.go
index 06b05d91..9d2dae27 100644
--- a/api/v1alpha1/servermaintenance_types.go
+++ b/api/v1alpha1/servermaintenance_types.go
@@ -17,6 +17,16 @@ const (
ServerMaintenanceApprovalKey = "metal.ironcore.dev/maintenance-approval"
)
+// ServerMaintenancePolicy specifies the maintenance policy to be enforced on the server.
+type MaintenanceBootPolicy string
+
+const (
+ // ServerMaintenanceBootPolicyBIOSSettingBoot specifies that the Server boot order is change to "BIOSSttings" during Maintenance period.
+ MaintenanceBootPolicyBIOSSetup MaintenanceBootPolicy = "BiosSetup"
+ // MaintenanceBootPolicyNone specifies that the Server boot order is not changed for Maintenance period.
+ MaintenanceBootPolicyNone MaintenanceBootPolicy = "None"
+)
+
// ServerBootConfigurationTemplate defines the parameters to be used for rendering a boot configuration.
type ServerBootConfigurationTemplate struct {
// Name specifies the name of the boot configuration.
@@ -34,6 +44,10 @@ type ServerMaintenanceSpec struct {
// +optional
Policy ServerMaintenancePolicy `json:"policy,omitempty"`
+ // MaintenanceBootPolicy specifies the boot policy to be enforced on the server during maintenance.
+ // +optional
+ MaintenanceBootPolicy MaintenanceBootPolicy `json:"maintenanceBootPolicy,omitempty"`
+
// ServerRef is a reference to the server that is to be maintained.
// +required
ServerRef *corev1.LocalObjectReference `json:"serverRef"`
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index cd44503e..333fe6c8 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -8,7 +8,7 @@
package v1alpha1
import (
- v1 "k8s.io/api/core/v1"
+ "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
diff --git a/bmc/bmc.go b/bmc/bmc.go
index e7bd1cd0..3f50898a 100644
--- a/bmc/bmc.go
+++ b/bmc/bmc.go
@@ -101,6 +101,9 @@ type BMC interface {
// SetBootOrder sets the boot order for the system.
SetBootOrder(ctx context.Context, systemURI string, order []string) error
+ // SetBootOverride sets the boot Override for the system.
+ SetBootOverride(ctx context.Context, systemURI string, overrideConfig redfish.Boot) error
+
// GetStorages retrieves storage information for the system.
GetStorages(ctx context.Context, systemURI string) ([]Storage, error)
diff --git a/bmc/redfish.go b/bmc/redfish.go
index 37699ec6..bb54d884 100644
--- a/bmc/redfish.go
+++ b/bmc/redfish.go
@@ -488,6 +488,15 @@ func (r *RedfishBMC) SetBootOrder(ctx context.Context, systemURI string, bootOrd
)
}
+// SetBootOverride sets server boot override
+func (r *RedfishBMC) SetBootOverride(ctx context.Context, systemURI string, overrideConfig redfish.Boot) error {
+ system, err := r.getSystemFromUri(ctx, systemURI)
+ if err != nil {
+ return err
+ }
+ return system.SetBoot(overrideConfig)
+}
+
func (r *RedfishBMC) getFilteredBiosRegistryAttributes(readOnly bool, immutable bool) (map[string]RegistryEntryAttributes, error) {
registries, err := r.client.Service.Registries()
if err != nil {
diff --git a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
index 3be2d4d0..db233497 100644
--- a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
@@ -58,6 +58,10 @@ spec:
spec:
description: ServerMaintenanceSpec defines the desired state of a ServerMaintenance
properties:
+ maintenanceBootPolicy:
+ description: MaintenanceBootPolicy specifies the boot policy to be
+ enforced on the server during maintenance.
+ type: string
policy:
description: Policy specifies the maintenance policy to be enforced
on the server.
diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
index 203073f7..d74bd7b2 100755
--- a/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
+++ b/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
@@ -64,6 +64,10 @@ spec:
spec:
description: ServerMaintenanceSpec defines the desired state of a ServerMaintenance
properties:
+ maintenanceBootPolicy:
+ description: MaintenanceBootPolicy specifies the boot policy to be
+ enforced on the server during maintenance.
+ type: string
policy:
description: Policy specifies the maintenance policy to be enforced
on the server.
diff --git a/internal/controller/biossettings_controller.go b/internal/controller/biossettings_controller.go
index 3ada7d3c..a4247437 100644
--- a/internal/controller/biossettings_controller.go
+++ b/internal/controller/biossettings_controller.go
@@ -460,7 +460,7 @@ func (r *BiosSettingsReconciler) handleSettingInProgressState(
}
// check if the maintenance is granted
- if ok := r.checkIfMaintenanceGranted(log, biosSettings, server); !ok {
+ if ok := r.checkIfMaintenanceGranted(ctx, log, biosSettings, server); !ok {
log.V(1).Info("Waiting for maintenance to be granted before continuing with updating settings")
return ctrl.Result{}, nil
}
@@ -1143,6 +1143,7 @@ func (r *BiosSettingsReconciler) checkForRequiredPowerStatus(
}
func (r *BiosSettingsReconciler) checkIfMaintenanceGranted(
+ ctx context.Context,
log logr.Logger,
biosSettings *metalv1alpha1.BIOSSettings,
server *metalv1alpha1.Server,
@@ -1152,6 +1153,17 @@ func (r *BiosSettingsReconciler) checkIfMaintenanceGranted(
}
if server.Status.State == metalv1alpha1.ServerStateMaintenance {
+ serverMaintenence, err := r.getReferredServerMaintenance(ctx, log, biosSettings.Spec.ServerMaintenanceRef)
+ if err != nil {
+ log.V(1).Info("Failed to get referred ServerMaintenance", "error", err, "serverMaintenanceRef", biosSettings.Spec.ServerMaintenanceRef)
+ return false
+ }
+ if serverMaintenence.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
+ log.V(1).Info("ServerMaintenance is not in maintenance. waiting...",
+ "serverMaintenance State", serverMaintenence.Status.State,
+ "serverMaintenance", serverMaintenence.Name)
+ return false
+ }
if server.Spec.ServerMaintenanceRef == nil || server.Spec.ServerMaintenanceRef.UID != biosSettings.Spec.ServerMaintenanceRef.UID {
// server in maintenance for other tasks. or
// server maintenance ref is wrong in either server or biosSettings
diff --git a/internal/controller/biosversion_controller.go b/internal/controller/biosversion_controller.go
index b665f31c..531d5b12 100644
--- a/internal/controller/biosversion_controller.go
+++ b/internal/controller/biosversion_controller.go
@@ -209,6 +209,17 @@ func (r *BIOSVersionReconciler) ensureBiosVersionStateTransition(
return false, nil
}
+ serverMaintenence, err := r.getReferredServerMaintenance(ctx, log, biosVersion.Spec.ServerMaintenanceRef)
+ if err != nil {
+ return false, fmt.Errorf("failed to get referred serverMaintenance obj from BIOSVersion: %w", err)
+ }
+ if serverMaintenence.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
+ log.V(1).Info("ServerMaintenance is not in maintenance. waiting...",
+ "serverMaintenance State", serverMaintenence.Status.State,
+ "serverMaintenance", serverMaintenence.Name)
+ return false, nil
+ }
+
if server.Spec.ServerMaintenanceRef == nil || server.Spec.ServerMaintenanceRef.UID != biosVersion.Spec.ServerMaintenanceRef.UID {
// server in maintenance for other tasks. or
// server maintenance ref is wrong in either server or biosSettings
diff --git a/internal/controller/bmcsettings_controller.go b/internal/controller/bmcsettings_controller.go
index ba5b196c..d44f5c20 100644
--- a/internal/controller/bmcsettings_controller.go
+++ b/internal/controller/bmcsettings_controller.go
@@ -515,6 +515,17 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
for _, server := range servers {
if server.Status.State == metalv1alpha1.ServerStateMaintenance {
serverMaintenanceRef := r.getServerMaintenanceRefForServer(bmcSetting.Spec.ServerMaintenanceRefs, server.Spec.ServerMaintenanceRef.UID)
+ serverMaintenance := &metalv1alpha1.ServerMaintenance{}
+ key := client.ObjectKey{Name: serverMaintenanceRef.Name, Namespace: serverMaintenanceRef.Namespace}
+ if err := r.Get(ctx, key, serverMaintenance); err != nil {
+ log.V(1).Error(err, "failed to get referred server's Manitenance", "Server", server.Name, "ServerMaintenanceRef", serverMaintenanceRef)
+ return false
+ }
+ // this gives us the waiting time for the server to be prepared for maintenance by ServerMaintenance controller
+ if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
+ log.V(1).Info("ServerMaintenance not yet in maintenance state", "Server", server.Name, "ServerMaintenance", serverMaintenance.Name, "State", serverMaintenance.Status.State)
+ notInMaintenanceState = append(notInMaintenanceState, server.Name)
+ }
if server.Spec.ServerMaintenanceRef == nil || serverMaintenanceRef == nil {
// server in maintenance for other tasks. or
// server maintenance ref is wrong in either server or bmcSetting
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index d6a44344..6f2cd042 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -6,11 +6,15 @@ package controller
import (
"context"
"fmt"
+ "time"
"github.com/go-logr/logr"
"github.com/ironcore-dev/controller-utils/clientutils"
"github.com/ironcore-dev/controller-utils/metautils"
metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
+ "github.com/ironcore-dev/metal-operator/bmc"
+ "github.com/ironcore-dev/metal-operator/internal/bmcutils"
+ "github.com/stmcginnis/gofish/redfish"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -31,7 +35,10 @@ const (
// ServerMaintenanceReconciler reconciles a ServerMaintenance object
type ServerMaintenanceReconciler struct {
client.Client
- Scheme *runtime.Scheme
+ Scheme *runtime.Scheme
+ Insecure bool
+ ResyncInterval time.Duration
+ BMCOptions bmc.Options
}
// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servermaintenances,verbs=get;list;watch;create;update;patch;delete
@@ -114,13 +121,7 @@ func (r *ServerMaintenanceReconciler) handlePendingState(ctx context.Context, lo
}
if server.Spec.ServerClaimRef == nil {
log.V(1).Info("Server has no claim, move to maintenance right away", "Server", server.Name)
- if err = r.updateServerRef(ctx, log, serverMaintenance, server); err != nil {
- log.Error(err, "failed to patch server maintenance ref")
- return ctrl.Result{}, err
- }
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance); err != nil || modified {
- return ctrl.Result{}, err
- }
+ return r.moveToInMaintenanceState(ctx, log, serverMaintenance, server)
}
serverClaim := &metalv1alpha1.ServerClaim{}
if err := r.Get(ctx,
@@ -150,23 +151,11 @@ func (r *ServerMaintenanceReconciler) handlePendingState(ctx context.Context, lo
return ctrl.Result{}, nil
}
log.V(1).Info("Server approved for maintenance", "Server", server.Name, "Maintenance", serverMaintenance.Name)
- if err = r.updateServerRef(ctx, log, serverMaintenance, server); err != nil {
- log.Error(err, "failed to patch server maintenance ref")
- return ctrl.Result{}, err
- }
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance); err != nil || modified {
- return ctrl.Result{}, err
- }
+ return r.moveToInMaintenanceState(ctx, log, serverMaintenance, server)
}
if serverMaintenance.Spec.Policy == metalv1alpha1.ServerMaintenancePolicyEnforced {
log.V(1).Info("Enforcing maintenance", "Server", server.Name, "Maintenance", serverMaintenance.Name)
- if err = r.updateServerRef(ctx, log, serverMaintenance, server); err != nil {
- log.Error(err, "failed to patch server maintenance ref")
- return ctrl.Result{}, err
- }
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance); err != nil || modified {
- return ctrl.Result{}, err
- }
+ return r.moveToInMaintenanceState(ctx, log, serverMaintenance, server)
}
return ctrl.Result{}, nil
}
@@ -348,6 +337,60 @@ func (r *ServerMaintenanceReconciler) cleanup(ctx context.Context, log logr.Logg
return nil
}
+func (r *ServerMaintenanceReconciler) moveToInMaintenanceState(
+ ctx context.Context,
+ log logr.Logger,
+ serverMaintenance *metalv1alpha1.ServerMaintenance,
+ server *metalv1alpha1.Server,
+) (ctrl.Result, error) {
+ if err := r.updateServerRef(ctx, log, serverMaintenance, server); err != nil {
+ log.Error(err, "failed to patch server maintenance ref")
+ return ctrl.Result{}, err
+ }
+
+ if wait, ok, err := r.configureMaintenanceBootConfig(ctx, log, serverMaintenance, server); err != nil {
+ return ctrl.Result{}, err
+ } else if wait {
+ return ctrl.Result{RequeueAfter: 2 * time.Minute}, nil
+ } else if !ok {
+ log.V(1).Info("maintenance boot configuration is completed")
+ }
+
+ if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance); err != nil || modified {
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+}
+
+func (r *ServerMaintenanceReconciler) configureMaintenanceBootConfig(
+ ctx context.Context,
+ log logr.Logger,
+ serverMaintenance *metalv1alpha1.ServerMaintenance,
+ server *metalv1alpha1.Server,
+) (bool, bool, error) {
+ if serverMaintenance.Spec.MaintenanceBootPolicy == metalv1alpha1.MaintenanceBootPolicyNone {
+ return false, false, nil
+ }
+ if server.Status.State != metalv1alpha1.ServerStateMaintenance {
+ log.V(1).Info("Waiting for server to enter maintenance state before configuring maintenance boot config", "Server", server.Name, "State", server.Status.State)
+ return true, false, nil
+ }
+
+ bmcClient, err := bmcutils.GetBMCClientForServer(ctx, r.Client, server, r.Insecure, r.BMCOptions)
+ if err != nil {
+ return false, false, fmt.Errorf("failed to create BMC client: %w", err)
+ }
+ defer bmcClient.Logout()
+
+ err = bmcClient.SetBootOverride(ctx, server.Spec.SystemURI, redfish.Boot{
+ BootSourceOverrideEnabled: redfish.ContinuousBootSourceOverrideEnabled,
+ BootSourceOverrideTarget: redfish.BootSourceOverrideTarget(serverMaintenance.Spec.MaintenanceBootPolicy),
+ BootSourceOverrideMode: redfish.UEFIBootSourceOverrideMode},
+ )
+ log.V(1).Info(" boot override during maintenance is set", "Server", server.Name)
+ return false, err == nil, err
+}
+
func (r *ServerMaintenanceReconciler) removeBootConfigRefFromServer(ctx context.Context, log logr.Logger, config *metalv1alpha1.ServerBootConfiguration, server *metalv1alpha1.Server) error {
if server == nil {
return nil
From d405ac39e4b31157d2f095d6dc821279c06e0f41 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Tue, 16 Sep 2025 09:25:42 +0200
Subject: [PATCH 02/12] skip reboot to disk with boot config to pxe
---
api/v1alpha1/constants.go | 5 +
api/v1alpha1/servermaintenance_types.go | 16 +--
api/v1alpha1/zz_generated.deepcopy.go | 2 +-
bmc/redfish.go | 14 +-
cmd/manager/main.go | 9 ++
...metal.ironcore.dev_servermaintenances.yaml | 4 -
config/dev/macdb.yaml | 34 ++++-
config/dev/manager_patch.yaml | 1 +
.../redfish_mockup_endpoint.yaml | 6 +-
.../samples/metal_v1alpha1_biosversion.yaml | 7 +-
.../servermaintenance_controller.go | 133 ++++++++++++------
11 files changed, 155 insertions(+), 76 deletions(-)
diff --git a/api/v1alpha1/constants.go b/api/v1alpha1/constants.go
index 796f1c07..882e3d4f 100644
--- a/api/v1alpha1/constants.go
+++ b/api/v1alpha1/constants.go
@@ -24,6 +24,11 @@ const (
OperationAnnotationForceUpdateOrDeleteInProgress = "ForceUpdateOrDeleteInProgress"
// OperationAnnotationForceUpdateInProgress allows update of a resource even if it is in progress.
OperationAnnotationForceUpdateInProgress = "ForceUpdateInProgress"
+
+ // OperationSkipBootConfiguration indicates which boot configuration operation should be skipped.
+ OperationSkipBootConfiguration = "metal.ironcore.dev/operation-skip-boot-configuration"
+ // OperationBootConfigurationSkip skips the boot configuration if set to this value.
+ OperationBootConfigurationSkip = "skip"
// OperationAnnotationForceReset forces a reset before next operation
OperationAnnotationForceReset = "ForceReset"
)
diff --git a/api/v1alpha1/servermaintenance_types.go b/api/v1alpha1/servermaintenance_types.go
index 9d2dae27..83d1a9bc 100644
--- a/api/v1alpha1/servermaintenance_types.go
+++ b/api/v1alpha1/servermaintenance_types.go
@@ -17,16 +17,6 @@ const (
ServerMaintenanceApprovalKey = "metal.ironcore.dev/maintenance-approval"
)
-// ServerMaintenancePolicy specifies the maintenance policy to be enforced on the server.
-type MaintenanceBootPolicy string
-
-const (
- // ServerMaintenanceBootPolicyBIOSSettingBoot specifies that the Server boot order is change to "BIOSSttings" during Maintenance period.
- MaintenanceBootPolicyBIOSSetup MaintenanceBootPolicy = "BiosSetup"
- // MaintenanceBootPolicyNone specifies that the Server boot order is not changed for Maintenance period.
- MaintenanceBootPolicyNone MaintenanceBootPolicy = "None"
-)
-
// ServerBootConfigurationTemplate defines the parameters to be used for rendering a boot configuration.
type ServerBootConfigurationTemplate struct {
// Name specifies the name of the boot configuration.
@@ -44,10 +34,6 @@ type ServerMaintenanceSpec struct {
// +optional
Policy ServerMaintenancePolicy `json:"policy,omitempty"`
- // MaintenanceBootPolicy specifies the boot policy to be enforced on the server during maintenance.
- // +optional
- MaintenanceBootPolicy MaintenanceBootPolicy `json:"maintenanceBootPolicy,omitempty"`
-
// ServerRef is a reference to the server that is to be maintained.
// +required
ServerRef *corev1.LocalObjectReference `json:"serverRef"`
@@ -85,6 +71,8 @@ const (
ServerMaintenanceStatePending ServerMaintenanceState = "Pending"
// ServerMaintenanceStateInMaintenance specifies that the server is in maintenance.
ServerMaintenanceStateInMaintenance ServerMaintenanceState = "InMaintenance"
+ // ServerMaintenanceStatePreapareMaintenance specifies that the server is in maintenance.
+ ServerMaintenanceStatePreapareMaintenance ServerMaintenanceState = "PrepareForMaintenance"
// ServerMaintenanceStateFailed specifies that the server maintenance has failed.
ServerMaintenanceStateFailed ServerMaintenanceState = "Failed"
)
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 333fe6c8..cd44503e 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -8,7 +8,7 @@
package v1alpha1
import (
- "k8s.io/api/core/v1"
+ v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
diff --git a/bmc/redfish.go b/bmc/redfish.go
index bb54d884..2b4db0f3 100644
--- a/bmc/redfish.go
+++ b/bmc/redfish.go
@@ -494,7 +494,19 @@ func (r *RedfishBMC) SetBootOverride(ctx context.Context, systemURI string, over
if err != nil {
return err
}
- return system.SetBoot(overrideConfig)
+ var tSystem struct {
+ Settings common.Settings `json:"@Redfish.Settings"`
+ }
+ // Unfortunately, some vendors (like Dell) support setting through different url.
+ // Hence we need to set it through this url.
+ err = json.Unmarshal(system.RawData, &tSystem)
+ if err != nil {
+ return err
+ }
+ if tSystem.Settings.SettingsObject.String() == "" {
+ return system.SetBoot(overrideConfig)
+ }
+ return system.Patch(tSystem.Settings.SettingsObject.String(), overrideConfig)
}
func (r *RedfishBMC) getFilteredBiosRegistryAttributes(readOnly bool, immutable bool) (map[string]RegistryEntryAttributes, error) {
diff --git a/cmd/manager/main.go b/cmd/manager/main.go
index 0dfba887..70ccdf31 100644
--- a/cmd/manager/main.go
+++ b/cmd/manager/main.go
@@ -366,6 +366,15 @@ func main() { // nolint: gocyclo
if err = (&controller.ServerMaintenanceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
+ BMCOptions: bmc.Options{
+ BasicAuth: true,
+ PowerPollingInterval: powerPollingInterval,
+ PowerPollingTimeout: powerPollingTimeout,
+ ResourcePollingInterval: resourcePollingInterval,
+ ResourcePollingTimeout: resourcePollingTimeout,
+ },
+ Insecure: insecure,
+ ResyncInterval: serverResyncInterval,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "ServerMaintenance")
os.Exit(1)
diff --git a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
index db233497..3be2d4d0 100644
--- a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
@@ -58,10 +58,6 @@ spec:
spec:
description: ServerMaintenanceSpec defines the desired state of a ServerMaintenance
properties:
- maintenanceBootPolicy:
- description: MaintenanceBootPolicy specifies the boot policy to be
- enforced on the server during maintenance.
- type: string
policy:
description: Policy specifies the maintenance policy to be enforced
on the server.
diff --git a/config/dev/macdb.yaml b/config/dev/macdb.yaml
index 07c2128f..78ca61e8 100644
--- a/config/dev/macdb.yaml
+++ b/config/dev/macdb.yaml
@@ -1,12 +1,34 @@
macPrefixes:
-- macPrefix: 23
- manufacturer: Foo
- protocol: RedfishKube
- port: 8000
+- macPrefix: f4
+ manufacturer: dell
+ protocol: Redfish
+ port: 443
type: bmc
defaultCredentials:
- - username: foo
- password: bar
+ - username: automation
+ password: Password12
+ console:
+ type: ssh
+ port: 22
+- macPrefix: 2c
+ manufacturer: dell
+ protocol: Redfish
+ port: 443
+ type: bmc
+ defaultCredentials:
+ - username: automation
+ password: Password12
console:
type: SSH
port: 22
+- macPrefix: 7C
+ manufacturer: hpe
+ protocol: Redfish
+ port: 443
+ type: bmc
+ defaultCredentials:
+ - username: automation
+ password: Password12
+ console:
+ type: ssh
+ port: 22
diff --git a/config/dev/manager_patch.yaml b/config/dev/manager_patch.yaml
index 55aff1af..8133956d 100644
--- a/config/dev/manager_patch.yaml
+++ b/config/dev/manager_patch.yaml
@@ -15,6 +15,7 @@ spec:
- --mac-prefixes-file=/etc/macdb/macdb.yaml
- --probe-image=ghcr.io/ironcore-dev/metalprobe:latest
- --probe-os-image=ghcr.io/ironcore-dev/os-images/gardenlinux:1443.10
+ - --insecure=false
- --registry-url=http://127.0.0.1:30000
- --registry-port=30000
- --enforce-first-boot
diff --git a/config/redfish-mockup/redfish_mockup_endpoint.yaml b/config/redfish-mockup/redfish_mockup_endpoint.yaml
index 0df99e91..2219b249 100644
--- a/config/redfish-mockup/redfish_mockup_endpoint.yaml
+++ b/config/redfish-mockup/redfish_mockup_endpoint.yaml
@@ -7,7 +7,7 @@ metadata:
app.kubernetes.io/part-of: metal-operator
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/created-by: metal-operator
- name: endpoint-sample
+ name: node005-ap046
spec:
- ip: "127.0.0.1"
- macAddress: "23:11:8A:33:CF:EA"
+ ip: "10.246.48.104"
+ macAddress: "2c:ea:7f:f3:6d:d2"
diff --git a/config/samples/metal_v1alpha1_biosversion.yaml b/config/samples/metal_v1alpha1_biosversion.yaml
index 46c2cf1e..a5e3de32 100644
--- a/config/samples/metal_v1alpha1_biosversion.yaml
+++ b/config/samples/metal_v1alpha1_biosversion.yaml
@@ -6,11 +6,10 @@ metadata:
app.kubernetes.io/managed-by: kustomize
name: biosversion-sample
spec:
- version: "U59 v2.34 (10/04/2024)"
+ version: "2.22.1"
image:
- URI: "https://foo-2.34_10_04_2024.signed.flash"
+ URI: "https://repo.eu-de-1.cloud.sap/hw-firmware/dell/poweredge-r640/BIOS_M73JJ_WN64_2.22.1.EXE"
transferProtocol: "HTTPS"
- updatePolicy: Normal
serverRef:
- name: endpoint-sample-hpe-system-0
+ name: node005-ap046-system-0
serverMaintenancePolicy: OwnerApproval
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index 6f2cd042..d173a2a0 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -100,8 +100,29 @@ func (r *ServerMaintenanceReconciler) ensureServerMaintenanceStateTransition(ctx
switch serverMaintenance.Status.State {
case metalv1alpha1.ServerMaintenanceStatePending:
return r.handlePendingState(ctx, log, serverMaintenance)
+ case metalv1alpha1.ServerMaintenanceStatePreapareMaintenance:
+ return r.handlePrepareMaintenanceState(ctx, log, serverMaintenance)
case metalv1alpha1.ServerMaintenanceStateInMaintenance:
- return r.handleInMaintenanceState(ctx, log, serverMaintenance)
+ log.V(1).Info("Server is under maintenance", "ServerMaintenance", serverMaintenance.Name)
+ server, err := r.getServerRef(ctx, serverMaintenance)
+ if err != nil {
+ return ctrl.Result{}, err
+ }
+
+ if config, err := r.getMaintenanceBootConfigurationRef(ctx, log, serverMaintenance); err != nil || config == nil {
+ return ctrl.Result{}, err
+ } else {
+ if config.Spec.IgnitionSecretRef.Name == serverMaintenance.Spec.ServerBootConfigurationTemplate.Name &&
+ config.Spec.Image == serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec.Image &&
+ config.Spec.ServerRef.Name == serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec.ServerRef.Name {
+ // no changes
+ log.V(1).Info("No changes in ServerBootConfigurationTemplate")
+ } else {
+ return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
+ }
+ }
+ err = r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance)
+ return ctrl.Result{}, err
case metalv1alpha1.ServerMaintenanceStateFailed:
return r.handleFailedState(log, serverMaintenance)
}
@@ -121,7 +142,7 @@ func (r *ServerMaintenanceReconciler) handlePendingState(ctx context.Context, lo
}
if server.Spec.ServerClaimRef == nil {
log.V(1).Info("Server has no claim, move to maintenance right away", "Server", server.Name)
- return r.moveToInMaintenanceState(ctx, log, serverMaintenance, server)
+ return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
}
serverClaim := &metalv1alpha1.ServerClaim{}
if err := r.Get(ctx,
@@ -151,16 +172,16 @@ func (r *ServerMaintenanceReconciler) handlePendingState(ctx context.Context, lo
return ctrl.Result{}, nil
}
log.V(1).Info("Server approved for maintenance", "Server", server.Name, "Maintenance", serverMaintenance.Name)
- return r.moveToInMaintenanceState(ctx, log, serverMaintenance, server)
+ return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
}
if serverMaintenance.Spec.Policy == metalv1alpha1.ServerMaintenancePolicyEnforced {
log.V(1).Info("Enforcing maintenance", "Server", server.Name, "Maintenance", serverMaintenance.Name)
- return r.moveToInMaintenanceState(ctx, log, serverMaintenance, server)
+ return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
}
return ctrl.Result{}, nil
}
-func (r *ServerMaintenanceReconciler) handleInMaintenanceState(ctx context.Context, log logr.Logger, serverMaintenance *metalv1alpha1.ServerMaintenance) (ctrl.Result, error) {
+func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.Context, log logr.Logger, serverMaintenance *metalv1alpha1.ServerMaintenance) (ctrl.Result, error) {
server, err := r.getServerRef(ctx, serverMaintenance)
if err != nil {
return ctrl.Result{}, err
@@ -169,29 +190,39 @@ func (r *ServerMaintenanceReconciler) handleInMaintenanceState(ctx context.Conte
if err != nil {
return ctrl.Result{}, err
}
- if config == nil {
- if err := r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance); err != nil {
- return ctrl.Result{}, err
- }
- return ctrl.Result{}, nil
- }
- if config.Status.State == metalv1alpha1.ServerBootConfigurationStatePending || config.Status.State == "" {
- log.V(1).Info("Server boot configuration is pending", "Server", server.Name)
- return ctrl.Result{}, nil
+
+ // Note: this is to check skip of BootConfiguration
+ // implication of skipping is that the server will still continue to run the OS during Maintenance
+ // might be subjected to reboots
+ val, found := serverMaintenance.GetAnnotations()[metalv1alpha1.OperationSkipBootConfiguration]
+ if config == nil && !(found && val == metalv1alpha1.OperationBootConfigurationSkip) {
+ log.V(1).Info("No ServerBootConfigurationTemplate boot configuration", "Server", server.Name)
+ _, err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed)
+ return ctrl.Result{}, err
}
- if config.Status.State == metalv1alpha1.ServerBootConfigurationStateError {
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed); err != nil || modified {
- return ctrl.Result{}, err
+
+ if config != nil {
+ if config.Status.State == metalv1alpha1.ServerBootConfigurationStatePending || config.Status.State == "" {
+ log.V(1).Info("Server boot configuration is pending", "Server", server.Name)
+ return ctrl.Result{}, nil
+ }
+ if config.Status.State == metalv1alpha1.ServerBootConfigurationStateError {
+ if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed); err != nil || modified {
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+ }
+ if config.Status.State == metalv1alpha1.ServerBootConfigurationStateReady {
+ log.V(1).Info("Server maintenance boot configuration is ready", "Server", server.Name)
}
- return ctrl.Result{}, nil
}
- if config.Status.State == metalv1alpha1.ServerBootConfigurationStateReady {
- log.V(1).Info("Server maintenance boot configuration is ready", "Server", server.Name)
+ if server.Spec.Power != serverMaintenance.Spec.ServerPower {
if err := r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance); err != nil {
return ctrl.Result{}, err
}
}
- return ctrl.Result{}, nil
+ _, err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance)
+ return ctrl.Result{}, err
}
func (r *ServerMaintenanceReconciler) applyServerBootConfiguration(ctx context.Context, log logr.Logger, maintenance *metalv1alpha1.ServerMaintenance, server *metalv1alpha1.Server) (*metalv1alpha1.ServerBootConfiguration, error) {
@@ -209,6 +240,7 @@ func (r *ServerMaintenanceReconciler) applyServerBootConfiguration(ctx context.C
}
opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, config, func() error {
config.Spec = maintenance.Spec.ServerBootConfigurationTemplate.Spec
+ config.Status.State = ""
return controllerutil.SetControllerReference(maintenance, config, r.Scheme)
})
if err != nil {
@@ -274,9 +306,21 @@ func (r *ServerMaintenanceReconciler) delete(ctx context.Context, log logr.Logge
if err != nil {
return ctrl.Result{}, err
}
+ // clean up the maintenance boot config if it was set, before removing server from Maintennace
+ if serverMaintenance != nil {
+ log.V(1).Info("Restoring boot order after maintenance")
+ if wait, ok, err := r.revertMaintenanceBootConfig(ctx, log, serverMaintenance, server); err != nil {
+ return ctrl.Result{}, err
+ } else if wait {
+ return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil
+ } else if !ok {
+ log.V(1).Info("maintenance boot revert is completed")
+ }
+ }
if err := r.cleanup(ctx, log, server); err != nil {
return ctrl.Result{}, err
}
+
log.V(1).Info("Ensuring that the finalizer is removed")
if modified, err := clientutils.PatchEnsureNoFinalizer(ctx, r.Client, serverMaintenance, ServerMaintenanceFinalizer); err != nil || modified {
return ctrl.Result{}, err
@@ -337,7 +381,7 @@ func (r *ServerMaintenanceReconciler) cleanup(ctx context.Context, log logr.Logg
return nil
}
-func (r *ServerMaintenanceReconciler) moveToInMaintenanceState(
+func (r *ServerMaintenanceReconciler) moveToPrepareMaintenanceState(
ctx context.Context,
log logr.Logger,
serverMaintenance *metalv1alpha1.ServerMaintenance,
@@ -348,34 +392,18 @@ func (r *ServerMaintenanceReconciler) moveToInMaintenanceState(
return ctrl.Result{}, err
}
- if wait, ok, err := r.configureMaintenanceBootConfig(ctx, log, serverMaintenance, server); err != nil {
- return ctrl.Result{}, err
- } else if wait {
- return ctrl.Result{RequeueAfter: 2 * time.Minute}, nil
- } else if !ok {
- log.V(1).Info("maintenance boot configuration is completed")
- }
-
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance); err != nil || modified {
+ if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStatePreapareMaintenance); err != nil || modified {
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
-func (r *ServerMaintenanceReconciler) configureMaintenanceBootConfig(
+func (r *ServerMaintenanceReconciler) revertMaintenanceBootConfig(
ctx context.Context,
log logr.Logger,
serverMaintenance *metalv1alpha1.ServerMaintenance,
server *metalv1alpha1.Server,
) (bool, bool, error) {
- if serverMaintenance.Spec.MaintenanceBootPolicy == metalv1alpha1.MaintenanceBootPolicyNone {
- return false, false, nil
- }
- if server.Status.State != metalv1alpha1.ServerStateMaintenance {
- log.V(1).Info("Waiting for server to enter maintenance state before configuring maintenance boot config", "Server", server.Name, "State", server.Status.State)
- return true, false, nil
- }
-
bmcClient, err := bmcutils.GetBMCClientForServer(ctx, r.Client, server, r.Insecure, r.BMCOptions)
if err != nil {
return false, false, fmt.Errorf("failed to create BMC client: %w", err)
@@ -383,11 +411,11 @@ func (r *ServerMaintenanceReconciler) configureMaintenanceBootConfig(
defer bmcClient.Logout()
err = bmcClient.SetBootOverride(ctx, server.Spec.SystemURI, redfish.Boot{
- BootSourceOverrideEnabled: redfish.ContinuousBootSourceOverrideEnabled,
- BootSourceOverrideTarget: redfish.BootSourceOverrideTarget(serverMaintenance.Spec.MaintenanceBootPolicy),
+ BootSourceOverrideEnabled: redfish.DisabledBootSourceOverrideEnabled,
+ BootSourceOverrideTarget: redfish.NoneBootSourceOverrideTarget,
BootSourceOverrideMode: redfish.UEFIBootSourceOverrideMode},
)
- log.V(1).Info(" boot override during maintenance is set", "Server", server.Name)
+ log.V(1).Info("boot override during maintenance is reverted", "Server", server.Name)
return false, err == nil, err
}
@@ -448,6 +476,25 @@ func (r *ServerMaintenanceReconciler) patchServerClaimAnnotation(ctx context.Con
return nil
}
+func (r *ServerMaintenanceReconciler) getMaintenanceBootConfigurationRef(
+ ctx context.Context,
+ log logr.Logger,
+ serverMaintenance *metalv1alpha1.ServerMaintenance,
+) (*metalv1alpha1.ServerBootConfiguration, error) {
+ if serverMaintenance.Spec.ServerBootConfigurationTemplate == nil {
+ return nil, nil
+ }
+ config := &metalv1alpha1.ServerBootConfiguration{}
+ if err := r.Get(ctx, types.NamespacedName{Name: serverMaintenance.Name, Namespace: serverMaintenance.Namespace}, config); err != nil {
+ if apierrors.IsNotFound(err) {
+ log.V(1).Info("ServerBootConfiguration for maintenance not found", "ServerBootConfiguration", serverMaintenance.Name)
+ return nil, nil
+ }
+ return nil, fmt.Errorf("failed to get server boot configuration: %w", err)
+ }
+ return config, nil
+}
+
// SetupWithManager sets up the controller with the Manager.
func (r *ServerMaintenanceReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
From 6adfbd959692a793520fde538404e13221474ed2 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Wed, 17 Sep 2025 12:47:25 +0200
Subject: [PATCH 03/12] remove maintainenceBootConfigRef
---
api/v1alpha1/server_types.go | 7 +--
api/v1alpha1/serverbootconfiguration_types.go | 16 +++++++
api/v1alpha1/zz_generated.deepcopy.go | 5 --
...ironcore.dev_serverbootconfigurations.yaml | 9 ++++
...metal.ironcore.dev_servermaintenances.yaml | 9 ++++
.../crd/bases/metal.ironcore.dev_servers.yaml | 47 +------------------
.../servermaintenance_controller.go | 36 +++++++++-----
.../servermaintenance_controller_test.go | 39 ++++++++++-----
8 files changed, 90 insertions(+), 78 deletions(-)
diff --git a/api/v1alpha1/server_types.go b/api/v1alpha1/server_types.go
index 5557ed71..abb1edf4 100644
--- a/api/v1alpha1/server_types.go
+++ b/api/v1alpha1/server_types.go
@@ -115,14 +115,11 @@ type ServerSpec struct {
// BootConfigurationRef is a reference to a BootConfiguration object that specifies
// the boot configuration for this server. This field is optional and can be omitted
// if no boot configuration is specified.
+ // This is used during initial provisioning of the server automatically.
+ // This is also used during maintenance if Maintenance needs server to be taken offline
// +optional
BootConfigurationRef *v1.ObjectReference `json:"bootConfigurationRef,omitempty"`
- // MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
- // the boot configuration for this server during maintenance. This field is optional and can be omitted
- // +optional
- MaintenanceBootConfigurationRef *v1.ObjectReference `json:"maintenanceBootConfigurationRef,omitempty"`
-
// BootOrder specifies the boot order of the server.
// +optional
BootOrder []BootOrder `json:"bootOrder,omitempty"`
diff --git a/api/v1alpha1/serverbootconfiguration_types.go b/api/v1alpha1/serverbootconfiguration_types.go
index e3f63e3f..3fc05271 100644
--- a/api/v1alpha1/serverbootconfiguration_types.go
+++ b/api/v1alpha1/serverbootconfiguration_types.go
@@ -19,12 +19,28 @@ type ServerBootConfigurationSpec struct {
// +optional
Image string `json:"image,omitempty"`
+ // BootType specifies the type of boot configuration.
+ // It can be either "Oneoff" or "Continueous". If not specified, it defaults to "Oneoff".
+ // +kubebuilder:validation:Enum=Oneoff;Continueous
+ // +optional
+ // +kubebuilder:default=Oneoff
+ Boottype BootType `json:"bootType,omitempty"`
+
// IgnitionSecretRef is a reference to the Kubernetes Secret object that contains
// the ignition configuration for the server. This field is optional and can be omitted if not specified.
// +optional
IgnitionSecretRef *v1.LocalObjectReference `json:"ignitionSecretRef,omitempty"`
}
+type BootType string
+
+const (
+ // BootTypeOneOff indicates that the server should boot into this configuration one time.
+ BootTypeOneOff BootType = "Oneoff"
+ // BootTypeContinueous indicates that the server should boot into this configuration continuously.
+ BootTypeContinueous BootType = "Continueous"
+)
+
// ServerBootConfigurationState defines the possible states of a ServerBootConfiguration.
type ServerBootConfigurationState string
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index cd44503e..3f13b999 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -1676,11 +1676,6 @@ func (in *ServerSpec) DeepCopyInto(out *ServerSpec) {
*out = new(v1.ObjectReference)
**out = **in
}
- if in.MaintenanceBootConfigurationRef != nil {
- in, out := &in.MaintenanceBootConfigurationRef, &out.MaintenanceBootConfigurationRef
- *out = new(v1.ObjectReference)
- **out = **in
- }
if in.BootOrder != nil {
in, out := &in.BootOrder, &out.BootOrder
*out = make([]BootOrder, len(*in))
diff --git a/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml b/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml
index 61f1d186..f84b81d5 100644
--- a/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml
+++ b/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml
@@ -57,6 +57,15 @@ spec:
description: ServerBootConfigurationSpec defines the desired state of
ServerBootConfiguration.
properties:
+ bootType:
+ default: Oneoff
+ description: |-
+ BootType specifies the type of boot configuration.
+ It can be either "Oneoff" or "Continueous". If not specified, it defaults to "Oneoff".
+ enum:
+ - Oneoff
+ - Continueous
+ type: string
ignitionSecretRef:
description: |-
IgnitionSecretRef is a reference to the Kubernetes Secret object that contains
diff --git a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
index 3be2d4d0..4037d418 100644
--- a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
@@ -73,6 +73,15 @@ spec:
description: Parameters specify the parameters to be used for
rendering the boot configuration.
properties:
+ bootType:
+ default: Oneoff
+ description: |-
+ BootType specifies the type of boot configuration.
+ It can be either "Oneoff" or "Continueous". If not specified, it defaults to "Oneoff".
+ enum:
+ - Oneoff
+ - Continueous
+ type: string
ignitionSecretRef:
description: |-
IgnitionSecretRef is a reference to the Kubernetes Secret object that contains
diff --git a/config/crd/bases/metal.ironcore.dev_servers.yaml b/config/crd/bases/metal.ironcore.dev_servers.yaml
index a54f23b0..ea85b2d4 100644
--- a/config/crd/bases/metal.ironcore.dev_servers.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servers.yaml
@@ -158,6 +158,8 @@ spec:
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
if no boot configuration is specified.
+ This is used during initial provisioning of the server automatically.
+ This is also used during maintenance if Maintenance needs server to be taken offline
properties:
apiVersion:
description: API version of the referent.
@@ -223,51 +225,6 @@ spec:
description: IndicatorLED specifies the desired state of the server's
indicator LED.
type: string
- maintenanceBootConfigurationRef:
- description: |-
- MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
- the boot configuration for this server during maintenance. This field is optional and can be omitted
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
power:
description: Power specifies the desired power state of the server.
type: string
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index d173a2a0..f5b1dbee 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -6,6 +6,7 @@ package controller
import (
"context"
"fmt"
+ "reflect"
"time"
"github.com/go-logr/logr"
@@ -112,9 +113,7 @@ func (r *ServerMaintenanceReconciler) ensureServerMaintenanceStateTransition(ctx
if config, err := r.getMaintenanceBootConfigurationRef(ctx, log, serverMaintenance); err != nil || config == nil {
return ctrl.Result{}, err
} else {
- if config.Spec.IgnitionSecretRef.Name == serverMaintenance.Spec.ServerBootConfigurationTemplate.Name &&
- config.Spec.Image == serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec.Image &&
- config.Spec.ServerRef.Name == serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec.ServerRef.Name {
+ if reflect.DeepEqual(config.Spec, serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec) {
// no changes
log.V(1).Info("No changes in ServerBootConfigurationTemplate")
} else {
@@ -186,6 +185,13 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
if err != nil {
return ctrl.Result{}, err
}
+
+ // rest of the operation can be done only if the server is in maintenance state
+ // else, we will hit conflict with ServerClaim controller which will still be asserting Specs on Serevr
+ if server.Status.State != metalv1alpha1.ServerStateMaintenance {
+ return ctrl.Result{}, fmt.Errorf("server is not in maintenance state, current state: %s", server.Status.State)
+ }
+
config, err := r.applyServerBootConfiguration(ctx, log, serverMaintenance, server)
if err != nil {
return ctrl.Result{}, err
@@ -248,7 +254,7 @@ func (r *ServerMaintenanceReconciler) applyServerBootConfiguration(ctx context.C
}
log.V(1).Info("Created or patched Config", "Config", config.Name, "Operation", opResult)
serverBase := server.DeepCopy()
- server.Spec.MaintenanceBootConfigurationRef = &v1.ObjectReference{
+ server.Spec.BootConfigurationRef = &v1.ObjectReference{
Namespace: config.Namespace,
Name: config.Name,
UID: config.UID,
@@ -317,7 +323,7 @@ func (r *ServerMaintenanceReconciler) delete(ctx context.Context, log logr.Logge
log.V(1).Info("maintenance boot revert is completed")
}
}
- if err := r.cleanup(ctx, log, server); err != nil {
+ if err := r.cleanup(ctx, log, server, serverMaintenance); err != nil {
return ctrl.Result{}, err
}
@@ -339,17 +345,22 @@ func (r *ServerMaintenanceReconciler) getServerRef(ctx context.Context, serverMa
return server, nil
}
-func (r *ServerMaintenanceReconciler) cleanup(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error {
+func (r *ServerMaintenanceReconciler) cleanup(
+ ctx context.Context,
+ log logr.Logger,
+ server *metalv1alpha1.Server,
+ serverMaintenance *metalv1alpha1.ServerMaintenance) error {
if server != nil && server.Spec.ServerMaintenanceRef != nil {
if err := r.removeMaintenanceRefFromServer(ctx, server); err != nil {
log.Error(err, "failed to remove maintenance ref from server")
}
}
- if server.Spec.MaintenanceBootConfigurationRef != nil {
+ if server.Spec.BootConfigurationRef != nil &&
+ serverMaintenance.Spec.ServerBootConfigurationTemplate != nil {
config := &metalv1alpha1.ServerBootConfiguration{
ObjectMeta: metav1.ObjectMeta{
- Name: server.Spec.MaintenanceBootConfigurationRef.Name,
- Namespace: server.Spec.MaintenanceBootConfigurationRef.Namespace,
+ Name: serverMaintenance.Name,
+ Namespace: serverMaintenance.Namespace,
},
}
if err := r.Delete(ctx, config); err != nil {
@@ -357,6 +368,7 @@ func (r *ServerMaintenanceReconciler) cleanup(ctx context.Context, log logr.Logg
return fmt.Errorf("failed to delete serverbootconfig: %w", err)
}
}
+ // note: remove the bootConfig this controller set
if err := r.removeBootConfigRefFromServer(ctx, log, config, server); err != nil {
return fmt.Errorf("failed to remove maintenance boot config ref from server: %w", err)
}
@@ -423,11 +435,13 @@ func (r *ServerMaintenanceReconciler) removeBootConfigRefFromServer(ctx context.
if server == nil {
return nil
}
- if ref := server.Spec.MaintenanceBootConfigurationRef; ref == nil || (ref.Name != config.Name && ref.Namespace != config.Namespace) {
+ if ref := server.Spec.BootConfigurationRef; ref == nil || (ref.Name != config.Name && ref.Namespace != config.Namespace) {
return nil
}
serverBase := server.DeepCopy()
- server.Spec.MaintenanceBootConfigurationRef = nil
+ // remove the boot configuration ref by the maintenance only
+ // this will be replaced by the reserved boot configuration if provided, by serverClaim
+ server.Spec.BootConfigurationRef = nil
if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil && !apierrors.IsNotFound(err) {
return err
}
diff --git a/internal/controller/servermaintenance_controller_test.go b/internal/controller/servermaintenance_controller_test.go
index bfb54953..6ace1d7e 100644
--- a/internal/controller/servermaintenance_controller_test.go
+++ b/internal/controller/servermaintenance_controller_test.go
@@ -153,17 +153,25 @@ var _ = Describe("ServerMaintenance Controller", func() {
metalv1alpha1.ServerMaintenanceReasonAnnotationKey: "test-maintenance",
metalv1alpha1.ServerMaintenanceApprovalKey: "true",
}
- Eventually(Object(server)).Should(SatisfyAll(
- HaveField("Spec.ServerMaintenanceRef.Name", serverMaintenance.Name),
- HaveField("Spec.MaintenanceBootConfigurationRef", Not(BeNil())),
- ))
+ By("Checking the ServerMaintenanceRef")
+ Eventually(Object(server)).Should(
+ HaveField("Spec.ServerMaintenanceRef", Not(BeNil())),
+ )
bootConfig := &metalv1alpha1.ServerBootConfiguration{}
Eventually(k8sClient.Get).WithArguments(ctx, types.NamespacedName{
- Name: server.Spec.MaintenanceBootConfigurationRef.Name,
- Namespace: server.Spec.MaintenanceBootConfigurationRef.Namespace,
+ Name: server.Spec.BootConfigurationRef.Name,
+ Namespace: server.Spec.BootConfigurationRef.Namespace,
}, bootConfig).Should(Succeed())
+ By("Checking the BootConfigurationRef")
+ Eventually(Object(server)).Should(SatisfyAll(
+ HaveField("Spec.ServerMaintenanceRef.Name", serverMaintenance.Name),
+ HaveField("Spec.BootConfigurationRef", Not(BeNil())),
+ HaveField("Spec.BootConfigurationRef.Name", serverMaintenance.Name),
+ HaveField("Spec.BootConfigurationRef.Namespace", serverMaintenance.Namespace),
+ ))
+
By("Patching the boot configuration to a Ready state")
Eventually(UpdateStatus(bootConfig, func() {
bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
@@ -189,7 +197,7 @@ var _ = Describe("ServerMaintenance Controller", func() {
Eventually(Object(server)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.ServerStateReserved),
HaveField("Spec.ServerMaintenanceRef", BeNil()),
- HaveField("Spec.MaintenanceBootConfigurationRef", BeNil()),
+ HaveField("Spec.BootConfigurationRef", BeNil()),
))
By("Checking the ServerClaim is cleaned up")
Eventually(Object(serverClaim)).Should(SatisfyAll(
@@ -244,19 +252,26 @@ var _ = Describe("ServerMaintenance Controller", func() {
}
Expect(k8sClient.Create(ctx, serverMaintenance01)).To(Succeed())
By("Checking the ServerMaintenanceRef")
- Eventually(Object(server)).Should(SatisfyAll(
+ Eventually(Object(server)).Should(
HaveField("Spec.ServerMaintenanceRef", Not(BeNil())),
- HaveField("Spec.MaintenanceBootConfigurationRef", Not(BeNil())),
- ))
+ )
Eventually(Object(serverMaintenance01)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.ServerMaintenanceStateInMaintenance),
))
bootConfig := &metalv1alpha1.ServerBootConfiguration{}
Eventually(k8sClient.Get).WithArguments(ctx, types.NamespacedName{
- Name: server.Spec.MaintenanceBootConfigurationRef.Name,
- Namespace: server.Spec.MaintenanceBootConfigurationRef.Namespace,
+ Name: server.Spec.BootConfigurationRef.Name,
+ Namespace: server.Spec.BootConfigurationRef.Namespace,
}, bootConfig).Should(Succeed())
+ By("Checking the BootConfigurationRef")
+ Eventually(Object(server)).Should(SatisfyAll(
+ HaveField("Spec.ServerMaintenanceRef.Name", serverMaintenance01.Name),
+ HaveField("Spec.BootConfigurationRef", Not(BeNil())),
+ HaveField("Spec.BootConfigurationRef.Name", serverMaintenance01.Name),
+ HaveField("Spec.BootConfigurationRef.Namespace", serverMaintenance01.Namespace),
+ ))
+
By("Patching the boot configuration to a Ready state")
Eventually(UpdateStatus(bootConfig, func() {
bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
From 1c676f2a8ccf3107bce014b986a468ab6df8f173 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Thu, 18 Sep 2025 15:04:30 +0200
Subject: [PATCH 04/12] bootconfig during maintenance
---
api/v1alpha1/server_types.go | 14 -
api/v1alpha1/serverbootconfiguration_types.go | 16 --
api/v1alpha1/servermaintenance_types.go | 25 ++
api/v1alpha1/zz_generated.deepcopy.go | 27 +-
bmc/bmc.go | 8 +-
bmc/redfish.go | 34 ++-
...ironcore.dev_serverbootconfigurations.yaml | 9 -
...metal.ironcore.dev_servermaintenances.yaml | 29 ++-
.../crd/bases/metal.ironcore.dev_servers.yaml | 20 --
.../controller/biossettings_controller.go | 29 ++-
.../biossettings_controller_test.go | 23 +-
internal/controller/biosversion_controller.go | 13 +
.../controller/biosversion_controller_test.go | 4 +
internal/controller/bmcsettings_controller.go | 39 ++-
.../controller/bmcsettings_controller_test.go | 27 ++
internal/controller/bmcversion_controller.go | 46 +++-
.../controller/bmcversion_controller_test.go | 14 +
internal/controller/common_test_helpers.go | 40 +--
internal/controller/server_controller.go | 34 ---
.../servermaintenance_controller.go | 246 ++++++++++++++----
.../servermaintenance_controller_test.go | 69 +++--
internal/controller/suite_test.go | 18 +-
22 files changed, 490 insertions(+), 294 deletions(-)
diff --git a/api/v1alpha1/server_types.go b/api/v1alpha1/server_types.go
index abb1edf4..2c9401db 100644
--- a/api/v1alpha1/server_types.go
+++ b/api/v1alpha1/server_types.go
@@ -59,16 +59,6 @@ type BMCAccess struct {
BMCSecretRef v1.LocalObjectReference `json:"bmcSecretRef"`
}
-// BootOrder represents the boot order of the server.
-type BootOrder struct {
- // Name is the name of the boot device.
- Name string `json:"name"`
- // Priority is the priority of the boot device.
- Priority int `json:"priority"`
- // Device is the device to boot from.
- Device string `json:"device"`
-}
-
// ServerSpec defines the desired state of a Server.
type ServerSpec struct {
// UUID is the unique identifier for the server.
@@ -120,10 +110,6 @@ type ServerSpec struct {
// +optional
BootConfigurationRef *v1.ObjectReference `json:"bootConfigurationRef,omitempty"`
- // BootOrder specifies the boot order of the server.
- // +optional
- BootOrder []BootOrder `json:"bootOrder,omitempty"`
-
// BIOSSettingsRef is a reference to a biossettings object that specifies
// the BIOS configuration for this server.
// +optional
diff --git a/api/v1alpha1/serverbootconfiguration_types.go b/api/v1alpha1/serverbootconfiguration_types.go
index 3fc05271..e3f63e3f 100644
--- a/api/v1alpha1/serverbootconfiguration_types.go
+++ b/api/v1alpha1/serverbootconfiguration_types.go
@@ -19,28 +19,12 @@ type ServerBootConfigurationSpec struct {
// +optional
Image string `json:"image,omitempty"`
- // BootType specifies the type of boot configuration.
- // It can be either "Oneoff" or "Continueous". If not specified, it defaults to "Oneoff".
- // +kubebuilder:validation:Enum=Oneoff;Continueous
- // +optional
- // +kubebuilder:default=Oneoff
- Boottype BootType `json:"bootType,omitempty"`
-
// IgnitionSecretRef is a reference to the Kubernetes Secret object that contains
// the ignition configuration for the server. This field is optional and can be omitted if not specified.
// +optional
IgnitionSecretRef *v1.LocalObjectReference `json:"ignitionSecretRef,omitempty"`
}
-type BootType string
-
-const (
- // BootTypeOneOff indicates that the server should boot into this configuration one time.
- BootTypeOneOff BootType = "Oneoff"
- // BootTypeContinueous indicates that the server should boot into this configuration continuously.
- BootTypeContinueous BootType = "Continueous"
-)
-
// ServerBootConfigurationState defines the possible states of a ServerBootConfiguration.
type ServerBootConfigurationState string
diff --git a/api/v1alpha1/servermaintenance_types.go b/api/v1alpha1/servermaintenance_types.go
index 83d1a9bc..d62c7e84 100644
--- a/api/v1alpha1/servermaintenance_types.go
+++ b/api/v1alpha1/servermaintenance_types.go
@@ -23,11 +23,33 @@ type ServerBootConfigurationTemplate struct {
// +required
Name string `json:"name"`
+ // BootType specifies the type of boot configuration.
+ // It can be either "Oneoff" or "Persistent". If not specified, it defaults to "Persistent".
+ // "Persistent" will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
+ //
+ // "Oneoff" will set the configuration to boot once. next boot-up will boot into default boot order.
+ // if power state mentioned in spec is "PowerOn", server will be powered on and will boot into the configuration.
+ // if power state is "PowerOff", server will be powered off and will boot into the configuration on next power on.
+ //
+ // +kubebuilder:validation:Enum=Oneoff;Persistent
+ // +optional
+ // +kubebuilder:default=Persistent
+ Boottype BootType `json:"bootType,omitempty"`
+
// Parameters specify the parameters to be used for rendering the boot configuration.
// +required
Spec ServerBootConfigurationSpec `json:"spec"`
}
+type BootType string
+
+const (
+ // BootTypeOneOff indicates that the server should boot into this configuration one time.
+ BootTypeOneOff BootType = "Oneoff"
+ // BootTypePersistent indicates that the server should boot into this configuration continuously.
+ BootTypePersistent BootType = "Persistent"
+)
+
// ServerMaintenanceSpec defines the desired state of a ServerMaintenance
type ServerMaintenanceSpec struct {
// Policy specifies the maintenance policy to be enforced on the server.
@@ -61,6 +83,9 @@ const (
type ServerMaintenanceStatus struct {
// State specifies the current state of the server maintenance.
State ServerMaintenanceState `json:"state,omitempty"`
+ // DefaultBootOrder specifies the default boot order of the server before maintenance.
+ // +optional
+ DefaultBootOrder []string `json:"defaultBootOrder,omitempty"`
}
// ServerMaintenanceState specifies the current state of the server maintenance.
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 3f13b999..fc65cec8 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -1053,21 +1053,6 @@ func (in *BMCVersionTemplate) DeepCopy() *BMCVersionTemplate {
return out
}
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *BootOrder) DeepCopyInto(out *BootOrder) {
- *out = *in
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BootOrder.
-func (in *BootOrder) DeepCopy() *BootOrder {
- if in == nil {
- return nil
- }
- out := new(BootOrder)
- in.DeepCopyInto(out)
- return out
-}
-
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ConsoleProtocol) DeepCopyInto(out *ConsoleProtocol) {
*out = *in
@@ -1535,7 +1520,7 @@ func (in *ServerMaintenance) DeepCopyInto(out *ServerMaintenance) {
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
in.Spec.DeepCopyInto(&out.Spec)
- out.Status = in.Status
+ in.Status.DeepCopyInto(&out.Status)
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerMaintenance.
@@ -1636,6 +1621,11 @@ func (in *ServerMaintenanceSpec) DeepCopy() *ServerMaintenanceSpec {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServerMaintenanceStatus) DeepCopyInto(out *ServerMaintenanceStatus) {
*out = *in
+ if in.DefaultBootOrder != nil {
+ in, out := &in.DefaultBootOrder, &out.DefaultBootOrder
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerMaintenanceStatus.
@@ -1676,11 +1666,6 @@ func (in *ServerSpec) DeepCopyInto(out *ServerSpec) {
*out = new(v1.ObjectReference)
**out = **in
}
- if in.BootOrder != nil {
- in, out := &in.BootOrder, &out.BootOrder
- *out = make([]BootOrder, len(*in))
- copy(*out, *in)
- }
if in.BIOSSettingsRef != nil {
in, out := &in.BIOSSettingsRef, &out.BIOSSettingsRef
*out = new(v1.LocalObjectReference)
diff --git a/bmc/bmc.go b/bmc/bmc.go
index 3f50898a..b602d036 100644
--- a/bmc/bmc.go
+++ b/bmc/bmc.go
@@ -68,6 +68,9 @@ type BMC interface {
// GetBootOrder retrieves the boot order for the system.
GetBootOrder(ctx context.Context, systemURI string) ([]string, error)
+ // GetBootOptions retrieves the boot options for the system.
+ GetBootOptions(ctx context.Context, systemURI string) ([]*redfish.BootOption, error)
+
// GetBiosAttributeValues retrieves BIOS attribute values for the system.
GetBiosAttributeValues(ctx context.Context, systemURI string, attributes []string) (redfish.SettingsAttributes, error)
@@ -99,10 +102,7 @@ type BMC interface {
GetBMCVersion(ctx context.Context, UUID string) (string, error)
// SetBootOrder sets the boot order for the system.
- SetBootOrder(ctx context.Context, systemURI string, order []string) error
-
- // SetBootOverride sets the boot Override for the system.
- SetBootOverride(ctx context.Context, systemURI string, overrideConfig redfish.Boot) error
+ SetBootOrder(ctx context.Context, systemURI string, order redfish.Boot) error
// GetStorages retrieves storage information for the system.
GetStorages(ctx context.Context, systemURI string) ([]Storage, error)
diff --git a/bmc/redfish.go b/bmc/redfish.go
index 2b4db0f3..ac8ecb25 100644
--- a/bmc/redfish.go
+++ b/bmc/redfish.go
@@ -320,6 +320,14 @@ func (r *RedfishBMC) GetBootOrder(ctx context.Context, systemURI string) ([]stri
return system.Boot.BootOrder, nil
}
+func (r *RedfishBMC) GetBootOptions(ctx context.Context, systemURI string) ([]*redfish.BootOption, error) {
+ system, err := r.getSystemFromUri(ctx, systemURI)
+ if err != nil {
+ return nil, err
+ }
+ return system.BootOptions()
+}
+
func (r *RedfishBMC) GetBiosVersion(ctx context.Context, systemURI string) (string, error) {
system, err := r.getSystemFromUri(ctx, systemURI)
if err != nil {
@@ -474,22 +482,7 @@ func (r *RedfishBMC) SetBMCAttributesImmediately(ctx context.Context, bmcUUID st
}
// SetBootOrder sets bios boot order
-func (r *RedfishBMC) SetBootOrder(ctx context.Context, systemURI string, bootOrder []string) error {
- system, err := r.getSystemFromUri(ctx, systemURI)
- if err != nil {
- return err
- }
- return system.SetBoot(
- redfish.Boot{
- BootSourceOverrideEnabled: redfish.ContinuousBootSourceOverrideEnabled,
- BootSourceOverrideTarget: redfish.NoneBootSourceOverrideTarget,
- BootOrder: bootOrder,
- },
- )
-}
-
-// SetBootOverride sets server boot override
-func (r *RedfishBMC) SetBootOverride(ctx context.Context, systemURI string, overrideConfig redfish.Boot) error {
+func (r *RedfishBMC) SetBootOrder(ctx context.Context, systemURI string, bootOrder redfish.Boot) error {
system, err := r.getSystemFromUri(ctx, systemURI)
if err != nil {
return err
@@ -504,12 +497,15 @@ func (r *RedfishBMC) SetBootOverride(ctx context.Context, systemURI string, over
return err
}
if tSystem.Settings.SettingsObject.String() == "" {
- return system.SetBoot(overrideConfig)
+ return system.SetBoot(bootOrder)
}
- return system.Patch(tSystem.Settings.SettingsObject.String(), overrideConfig)
+ return system.SetBoot(bootOrder)
}
-func (r *RedfishBMC) getFilteredBiosRegistryAttributes(readOnly bool, immutable bool) (map[string]RegistryEntryAttributes, error) {
+func (r *RedfishBMC) getFilteredBiosRegistryAttributes(
+ readOnly bool,
+ immutable bool,
+) (map[string]RegistryEntryAttributes, error) {
registries, err := r.client.Service.Registries()
if err != nil {
return nil, err
diff --git a/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml b/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml
index f84b81d5..61f1d186 100644
--- a/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml
+++ b/config/crd/bases/metal.ironcore.dev_serverbootconfigurations.yaml
@@ -57,15 +57,6 @@ spec:
description: ServerBootConfigurationSpec defines the desired state of
ServerBootConfiguration.
properties:
- bootType:
- default: Oneoff
- description: |-
- BootType specifies the type of boot configuration.
- It can be either "Oneoff" or "Continueous". If not specified, it defaults to "Oneoff".
- enum:
- - Oneoff
- - Continueous
- type: string
ignitionSecretRef:
description: |-
IgnitionSecretRef is a reference to the Kubernetes Secret object that contains
diff --git a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
index 4037d418..d0ef9f79 100644
--- a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
@@ -66,6 +66,20 @@ spec:
description: ServerBootConfigurationTemplate specifies the boot configuration
to be applied to the server during maintenance.
properties:
+ bootType:
+ default: Persistent
+ description: |-
+ BootType specifies the type of boot configuration.
+ It can be either "Oneoff" or "Persistent". If not specified, it defaults to "Persistent".
+ "Persistent" will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
+
+ "Oneoff" will set the configuration to boot once. next boot-up will boot into default boot order.
+ if power state mentioned in spec is "PowerOn", server will be powered on and will boot into the configuration.
+ if power state is "PowerOff", server will be powered off and will boot into the configuration on next power on.
+ enum:
+ - Oneoff
+ - Persistent
+ type: string
name:
description: Name specifies the name of the boot configuration.
type: string
@@ -73,15 +87,6 @@ spec:
description: Parameters specify the parameters to be used for
rendering the boot configuration.
properties:
- bootType:
- default: Oneoff
- description: |-
- BootType specifies the type of boot configuration.
- It can be either "Oneoff" or "Continueous". If not specified, it defaults to "Oneoff".
- enum:
- - Oneoff
- - Continueous
- type: string
ignitionSecretRef:
description: |-
IgnitionSecretRef is a reference to the Kubernetes Secret object that contains
@@ -150,6 +155,12 @@ spec:
status:
description: ServerMaintenanceStatus defines the observed state of a ServerMaintenance
properties:
+ defaultBootOrder:
+ description: DefaultBootOrder specifies the default boot order of
+ the server before maintenance.
+ items:
+ type: string
+ type: array
state:
description: State specifies the current state of the server maintenance.
type: string
diff --git a/config/crd/bases/metal.ironcore.dev_servers.yaml b/config/crd/bases/metal.ironcore.dev_servers.yaml
index ea85b2d4..27f8bb6a 100644
--- a/config/crd/bases/metal.ironcore.dev_servers.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servers.yaml
@@ -201,26 +201,6 @@ spec:
type: string
type: object
x-kubernetes-map-type: atomic
- bootOrder:
- description: BootOrder specifies the boot order of the server.
- items:
- description: BootOrder represents the boot order of the server.
- properties:
- device:
- description: Device is the device to boot from.
- type: string
- name:
- description: Name is the name of the boot device.
- type: string
- priority:
- description: Priority is the priority of the boot device.
- type: integer
- required:
- - device
- - name
- - priority
- type: object
- type: array
indicatorLED:
description: IndicatorLED specifies the desired state of the server's
indicator LED.
diff --git a/internal/controller/biossettings_controller.go b/internal/controller/biossettings_controller.go
index a4247437..fcd0dcbf 100644
--- a/internal/controller/biossettings_controller.go
+++ b/internal/controller/biossettings_controller.go
@@ -41,6 +41,7 @@ type BiosSettingsReconciler struct {
BMCOptions bmc.Options
ResyncInterval time.Duration
TimeoutExpiry time.Duration
+ ProbeImage string
}
const (
@@ -460,8 +461,13 @@ func (r *BiosSettingsReconciler) handleSettingInProgressState(
}
// check if the maintenance is granted
- if ok := r.checkIfMaintenanceGranted(ctx, log, biosSettings, server); !ok {
+ if ok, state := r.checkIfMaintenanceGranted(ctx, log, biosSettings, server); !ok {
log.V(1).Info("Waiting for maintenance to be granted before continuing with updating settings")
+ if state == metalv1alpha1.ServerMaintenanceStateFailed {
+ log.V(1).Info("server maintenance request failed. Please check the ServerMaintenance object")
+ err := r.updateBiosSettingsStatus(ctx, log, biosSettings, metalv1alpha1.BIOSSettingsStateFailed, nil)
+ return ctrl.Result{}, err
+ }
return ctrl.Result{}, nil
}
@@ -1147,38 +1153,38 @@ func (r *BiosSettingsReconciler) checkIfMaintenanceGranted(
log logr.Logger,
biosSettings *metalv1alpha1.BIOSSettings,
server *metalv1alpha1.Server,
-) bool {
+) (bool, metalv1alpha1.ServerMaintenanceState) {
if biosSettings.Spec.ServerMaintenanceRef == nil {
- return true
+ return true, ""
}
if server.Status.State == metalv1alpha1.ServerStateMaintenance {
serverMaintenence, err := r.getReferredServerMaintenance(ctx, log, biosSettings.Spec.ServerMaintenanceRef)
if err != nil {
log.V(1).Info("Failed to get referred ServerMaintenance", "error", err, "serverMaintenanceRef", biosSettings.Spec.ServerMaintenanceRef)
- return false
+ return false, ""
}
if serverMaintenence.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
log.V(1).Info("ServerMaintenance is not in maintenance. waiting...",
"serverMaintenance State", serverMaintenence.Status.State,
"serverMaintenance", serverMaintenence.Name)
- return false
+ return false, serverMaintenence.Status.State
}
if server.Spec.ServerMaintenanceRef == nil || server.Spec.ServerMaintenanceRef.UID != biosSettings.Spec.ServerMaintenanceRef.UID {
// server in maintenance for other tasks. or
// server maintenance ref is wrong in either server or biosSettings
// wait for update on the server obj
log.V(1).Info("Server is already in maintenance for other tasks", "Server", server.Name, "serverMaintenanceRef", server.Spec.ServerMaintenanceRef)
- return false
+ return false, serverMaintenence.Status.State
}
} else {
// we still need to wait for server to enter maintenance
// wait for update on the server obj
log.V(1).Info("Server not yet in maintenance", "Server", server.Name, "State", server.Status.State, "MaintenanceRef", server.Spec.ServerMaintenanceRef)
- return false
+ return false, ""
}
- return true
+ return true, metalv1alpha1.ServerMaintenanceStateInMaintenance
}
func (r *BiosSettingsReconciler) requestMaintenanceOnServer(
@@ -1202,6 +1208,13 @@ func (r *BiosSettingsReconciler) requestMaintenanceOnServer(
serverMaintenance.Spec.Policy = biosSettings.Spec.ServerMaintenancePolicy
serverMaintenance.Spec.ServerPower = metalv1alpha1.PowerOn
serverMaintenance.Spec.ServerRef = &corev1.LocalObjectReference{Name: server.Name}
+ serverMaintenance.Spec.ServerBootConfigurationTemplate = &metalv1alpha1.ServerBootConfigurationTemplate{
+ Name: biosSettings.Name,
+ Spec: metalv1alpha1.ServerBootConfigurationSpec{
+ ServerRef: corev1.LocalObjectReference{Name: server.Name},
+ Image: r.ProbeImage,
+ },
+ }
if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance && serverMaintenance.Status.State != "" {
serverMaintenance.Status.State = ""
}
diff --git a/internal/controller/biossettings_controller_test.go b/internal/controller/biossettings_controller_test.go
index 0656c177..acd0b5fe 100644
--- a/internal/controller/biossettings_controller_test.go
+++ b/internal/controller/biossettings_controller_test.go
@@ -315,6 +315,8 @@ var _ = Describe("BIOSSettings Controller", func() {
HaveField("Status.LastAppliedTime", BeNil()),
))
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
+
By("Ensuring that the Server is in correct power state")
Eventually(Object(server)).Should(
HaveField("Status.PowerState", metalv1alpha1.ServerOnPowerState),
@@ -428,6 +430,8 @@ var _ = Describe("BIOSSettings Controller", func() {
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateInProgress),
)
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
+
By("Ensuring that the Server is in Maintenance")
Eventually(Object(server)).Should(
HaveField("Status.State", metalv1alpha1.ServerStateMaintenance),
@@ -521,6 +525,8 @@ var _ = Describe("BIOSSettings Controller", func() {
}),
)
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
+
By("Ensuring that the Server is in Maintenance")
Eventually(Object(server)).Should(
HaveField("Status.State", metalv1alpha1.ServerStateMaintenance),
@@ -600,10 +606,11 @@ var _ = Describe("BIOSSettings Controller", func() {
})).Should(Succeed())
By("Ensuring that the biosSettings resource has setting updated, and moved the state")
- Eventually(Object(biosSettings)).Should(SatisfyAny(
+ Eventually(Object(biosSettings)).Should(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateInProgress),
- HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
- ))
+ )
+
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings.Name, ns.Name)
// due to nature of mocking, we cant not determine few steps here. hence need a longer wait time
Eventually(Object(biosSettings)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -678,6 +685,8 @@ var _ = Describe("BIOSSettings Controller", func() {
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateInProgress),
))
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings.Name, ns.Name)
+
Eventually(Object(biosSettings)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
HaveField("Status.LastAppliedTime.IsZero()", false),
@@ -769,6 +778,8 @@ var _ = Describe("BIOSSettings Sequence Controller", func() {
}
Expect(k8sClient.Create(ctx, biosSettings)).To(Succeed())
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings.Name, ns.Name)
+
By("Ensuring that the BIOSSetting Object has moved to completed")
Eventually(Object(biosSettings)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -910,6 +921,8 @@ var _ = Describe("BIOSSettings Sequence Controller", func() {
}
Expect(k8sClient.Create(ctx, biosSettings)).To(Succeed())
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings.Name, ns.Name)
+
By("Ensuring that the BIOSSetting Object has moved to completed")
Eventually(Object(biosSettings)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -928,6 +941,8 @@ var _ = Describe("BIOSSettings Sequence Controller", func() {
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateInProgress),
)
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings.Name, ns.Name)
+
By("Ensuring that the BIOSSetting Object has moved to completed")
Eventually(Object(biosSettings)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -969,6 +984,8 @@ var _ = Describe("BIOSSettings Sequence Controller", func() {
}
Expect(k8sClient.Create(ctx, biosSettings)).To(Succeed())
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings.Name, ns.Name)
+
Eventually(Object(biosSettings)).WithPolling(1 * time.Microsecond).Should(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateInProgress),
)
diff --git a/internal/controller/biosversion_controller.go b/internal/controller/biosversion_controller.go
index 531d5b12..0390a7c4 100644
--- a/internal/controller/biosversion_controller.go
+++ b/internal/controller/biosversion_controller.go
@@ -38,6 +38,7 @@ type BIOSVersionReconciler struct {
Scheme *runtime.Scheme
BMCOptions bmc.Options
ResyncInterval time.Duration
+ ProbeImage string
}
const (
@@ -213,6 +214,11 @@ func (r *BIOSVersionReconciler) ensureBiosVersionStateTransition(
if err != nil {
return false, fmt.Errorf("failed to get referred serverMaintenance obj from BIOSVersion: %w", err)
}
+ if serverMaintenence.Status.State == metalv1alpha1.ServerMaintenanceStateFailed {
+ log.V(1).Info("server maintenance request failed. Please check the ServerMaintenance object")
+ err := r.updateBiosVersionStatus(ctx, log, biosVersion, metalv1alpha1.BIOSVersionStateFailed, nil, nil, nil)
+ return false, err
+ }
if serverMaintenence.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
log.V(1).Info("ServerMaintenance is not in maintenance. waiting...",
"serverMaintenance State", serverMaintenence.Status.State,
@@ -624,6 +630,13 @@ func (r *BIOSVersionReconciler) requestMaintenanceOnServer(
serverMaintenance.Spec.Policy = biosVersion.Spec.ServerMaintenancePolicy
serverMaintenance.Spec.ServerPower = metalv1alpha1.PowerOn
serverMaintenance.Spec.ServerRef = &corev1.LocalObjectReference{Name: server.Name}
+ serverMaintenance.Spec.ServerBootConfigurationTemplate = &metalv1alpha1.ServerBootConfigurationTemplate{
+ Name: biosVersion.Name,
+ Spec: metalv1alpha1.ServerBootConfigurationSpec{
+ ServerRef: corev1.LocalObjectReference{Name: server.Name},
+ Image: r.ProbeImage,
+ },
+ }
if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance && serverMaintenance.Status.State != "" {
serverMaintenance.Status.State = ""
}
diff --git a/internal/controller/biosversion_controller_test.go b/internal/controller/biosversion_controller_test.go
index 2ccaef4e..b3331bff 100644
--- a/internal/controller/biosversion_controller_test.go
+++ b/internal/controller/biosversion_controller_test.go
@@ -189,6 +189,7 @@ var _ = Describe("BIOSVersion Controller", func() {
APIVersion: "metal.ironcore.dev/v1alpha1",
}),
))
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
ensureBiosVersionConditionTransisition(acc, biosVersion, server)
@@ -297,6 +298,7 @@ var _ = Describe("BIOSVersion Controller", func() {
APIVersion: "metal.ironcore.dev/v1alpha1",
}),
))
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
ensureBiosVersionConditionTransisition(acc, biosVersion, server)
@@ -356,6 +358,8 @@ var _ = Describe("BIOSVersion Controller", func() {
HaveField("Status.State", metalv1alpha1.BIOSVersionStateInProgress),
)
+ MarkBootConfigReady(ctx, k8sClient, biosVersion.Name, ns.Name)
+
Eventually(Object(biosVersion)).Should(
HaveField("Status.State", metalv1alpha1.BIOSVersionStateCompleted),
)
diff --git a/internal/controller/bmcsettings_controller.go b/internal/controller/bmcsettings_controller.go
index d44f5c20..af12f5ab 100644
--- a/internal/controller/bmcsettings_controller.go
+++ b/internal/controller/bmcsettings_controller.go
@@ -38,6 +38,7 @@ type BMCSettingsReconciler struct {
Insecure bool
Scheme *runtime.Scheme
BMCOptions bmc.Options
+ ProbeImage string
}
const BMCSettingFinalizer = "firmware.ironcore.dev/out-of-band-management"
@@ -313,9 +314,12 @@ func (r *BMCSettingsReconciler) handleSettingInProgressState(
}
// check if the maintenance is granted
- if ok := r.checkIfMaintenanceGranted(ctx, log, bmcSetting, bmcClient); !ok {
+ if ok, failed := r.checkIfMaintenanceGranted(ctx, log, bmcSetting, bmcClient); !ok {
log.V(1).Info("Waiting for maintenance to be granted before continuing with updating settings")
return ctrl.Result{}, err
+ } else if failed == metalv1alpha1.ServerMaintenanceStateFailed {
+ err := r.updateBMCSettingsStatus(ctx, log, bmcSetting, metalv1alpha1.BMCSettingsStateFailed)
+ return ctrl.Result{}, err
}
return r.updateSettingsAndVerify(ctx, log, bmcSetting, BMC, settingsDiff, bmcClient)
@@ -495,23 +499,23 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
log logr.Logger,
bmcSetting *metalv1alpha1.BMCSettings,
bmcClient bmc.BMC,
-) bool {
+) (bool, metalv1alpha1.ServerMaintenanceState) {
if bmcSetting.Spec.ServerMaintenanceRefs == nil {
- return false
+ return false, ""
}
servers, err := r.getServers(ctx, log, bmcSetting, bmcClient)
if err != nil {
log.V(1).Error(err, "Failed to get ref. servers to determine maintenance state ")
- return false
+ return false, ""
}
if len(bmcSetting.Spec.ServerMaintenanceRefs) != len(servers) {
log.V(1).Info("Not all servers have Maintenance", "ServerMaintenanceRefs", bmcSetting.Spec.ServerMaintenanceRefs, "Servers", servers)
- return false
+ return false, ""
}
- notInMaintenanceState := make([]string, 0, len(servers))
+ notInMaintenanceState := make(map[string]metalv1alpha1.ServerMaintenanceState, len(servers))
for _, server := range servers {
if server.Status.State == metalv1alpha1.ServerStateMaintenance {
serverMaintenanceRef := r.getServerMaintenanceRefForServer(bmcSetting.Spec.ServerMaintenanceRefs, server.Spec.ServerMaintenanceRef.UID)
@@ -519,12 +523,16 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
key := client.ObjectKey{Name: serverMaintenanceRef.Name, Namespace: serverMaintenanceRef.Namespace}
if err := r.Get(ctx, key, serverMaintenance); err != nil {
log.V(1).Error(err, "failed to get referred server's Manitenance", "Server", server.Name, "ServerMaintenanceRef", serverMaintenanceRef)
- return false
+ notInMaintenanceState[server.Name] = ""
+ }
+ // fail imediately if any of the server maintenance request failed, as we can not proceed further
+ if serverMaintenance.Status.State == metalv1alpha1.ServerMaintenanceStateFailed {
+ return false, metalv1alpha1.ServerMaintenanceStateFailed
}
// this gives us the waiting time for the server to be prepared for maintenance by ServerMaintenance controller
if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
log.V(1).Info("ServerMaintenance not yet in maintenance state", "Server", server.Name, "ServerMaintenance", serverMaintenance.Name, "State", serverMaintenance.Status.State)
- notInMaintenanceState = append(notInMaintenanceState, server.Name)
+ notInMaintenanceState[server.Name] = serverMaintenance.Status.State
}
if server.Spec.ServerMaintenanceRef == nil || serverMaintenanceRef == nil {
// server in maintenance for other tasks. or
@@ -535,13 +543,13 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
"ServerMaintenanceRef", server.Spec.ServerMaintenanceRef,
"BMCSettingMaintenaceRef", serverMaintenanceRef,
)
- notInMaintenanceState = append(notInMaintenanceState, server.Name)
+ notInMaintenanceState[server.Name] = serverMaintenance.Status.State
}
} else {
// we still need to wait for server to enter maintenance
// wait for update on the server obj
log.V(1).Info("Server not yet in maintenance", "Server", server.Name, "State", server.Status.State, "MaintenanceRef", server.Spec.ServerMaintenanceRef)
- notInMaintenanceState = append(notInMaintenanceState, server.Name)
+ notInMaintenanceState[server.Name] = metalv1alpha1.ServerMaintenanceStatePending
}
}
@@ -549,10 +557,10 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
log.V(1).Info("Some servers not yet in maintenance",
"Required maintenances on servers", bmcSetting.Spec.ServerMaintenanceRefs,
"Servers not in maintence", notInMaintenanceState)
- return false
+ return false, metalv1alpha1.ServerMaintenanceStatePreapareMaintenance
}
- return true
+ return true, metalv1alpha1.ServerMaintenanceStateInMaintenance
}
func (r *BMCSettingsReconciler) requestMaintenanceOnServers(
@@ -616,6 +624,13 @@ func (r *BMCSettingsReconciler) requestMaintenanceOnServers(
serverMaintenance.Spec.Policy = bmcSetting.Spec.ServerMaintenancePolicy
serverMaintenance.Spec.ServerPower = metalv1alpha1.PowerOn
serverMaintenance.Spec.ServerRef = &corev1.LocalObjectReference{Name: server.Name}
+ serverMaintenance.Spec.ServerBootConfigurationTemplate = &metalv1alpha1.ServerBootConfigurationTemplate{
+ Name: bmcSetting.Name,
+ Spec: metalv1alpha1.ServerBootConfigurationSpec{
+ ServerRef: corev1.LocalObjectReference{Name: server.Name},
+ Image: r.ProbeImage,
+ },
+ }
if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance && serverMaintenance.Status.State != "" {
serverMaintenance.Status.State = ""
}
diff --git a/internal/controller/bmcsettings_controller_test.go b/internal/controller/bmcsettings_controller_test.go
index b045ff0f..76544133 100644
--- a/internal/controller/bmcsettings_controller_test.go
+++ b/internal/controller/bmcsettings_controller_test.go
@@ -174,6 +174,17 @@ var _ = Describe("BMCSettings Controller", func() {
Eventually(Object(bmc)).Should(SatisfyAll(
HaveField("Spec.BMCSettingRef", &v1.LocalObjectReference{Name: bmcSettings.Name}),
))
+ By("Ensuring that the Maintenance resource has been created")
+ var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", HaveLen(1)))
+
+ serverMaintenance := &metalv1alpha1.ServerMaintenance{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: ns.Name,
+ Name: serverMaintenanceList.Items[0].Name,
+ },
+ }
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
By("Ensuring that the BMCSettings has reached next state")
Eventually(Object(bmcSettings)).Should(SatisfyAny(
@@ -260,6 +271,8 @@ var _ = Describe("BMCSettings Controller", func() {
metautils.SetAnnotation(serverClaim, metalv1alpha1.ServerMaintenanceApprovalKey, "true")
})).Should(Succeed())
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
+
Eventually(Object(bmcSettings)).Should(SatisfyAny(
HaveField("Status.State", metalv1alpha1.BMCSettingsStateInProgress),
HaveField("Status.State", metalv1alpha1.BMCSettingsStateApplied),
@@ -348,6 +361,8 @@ var _ = Describe("BMCSettings Controller", func() {
}
Eventually(Get(serverMaintenance)).Should(Succeed())
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
+
By("Ensuring that the BMCSettings resource hasmoved to next state")
Eventually(Object(BMCSettings)).Should(SatisfyAny(
HaveField("Status.State", metalv1alpha1.BMCSettingsStateInProgress),
@@ -419,6 +434,18 @@ var _ = Describe("BMCSettings Controller", func() {
HaveField("Status.State", metalv1alpha1.BMCSettingsStateInProgress),
)
+ By("Ensuring that the Maintenance resource has been created")
+ var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", HaveLen(1)))
+
+ serverMaintenance := &metalv1alpha1.ServerMaintenance{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: ns.Name,
+ Name: serverMaintenanceList.Items[0].Name,
+ },
+ }
+ Eventually(Get(serverMaintenance)).Should(Succeed())
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
Eventually(Object(BMCSettings)).Should(
HaveField("Status.State", metalv1alpha1.BMCSettingsStateApplied),
)
diff --git a/internal/controller/bmcversion_controller.go b/internal/controller/bmcversion_controller.go
index 30d79eaa..49b6e29f 100644
--- a/internal/controller/bmcversion_controller.go
+++ b/internal/controller/bmcversion_controller.go
@@ -38,6 +38,7 @@ type BMCVersionReconciler struct {
Scheme *runtime.Scheme
BMCOptions bmc.Options
ResyncInterval time.Duration
+ ProbeImage string
}
const (
@@ -242,9 +243,12 @@ func (r *BMCVersionReconciler) ensureBMCVersionStateTransition(
}
// check if the maintenance is granted
- if ok := r.checkIfMaintenanceGranted(ctx, log, bmcClient, bmcVersion); !ok {
+ if ok, failed := r.checkIfMaintenanceGranted(ctx, log, bmcClient, bmcVersion); !ok {
log.V(1).Info("Waiting for maintenance to be granted before continuing with updating bmc version")
return ctrl.Result{}, err
+ } else if failed == metalv1alpha1.ServerMaintenanceStateFailed {
+ err := r.updateBMCVersionStatus(ctx, log, bmcVersion, metalv1alpha1.BMCVersionStateFailed, nil, nil, nil)
+ return ctrl.Result{}, err
}
return r.handleUpgradeInProgressState(ctx, log, bmcVersion, bmcClient, bmc)
@@ -413,28 +417,43 @@ func (r *BMCVersionReconciler) checkIfMaintenanceGranted(
log logr.Logger,
bmcClient bmc.BMC,
bmcVersion *metalv1alpha1.BMCVersion,
-) bool {
+) (bool, metalv1alpha1.ServerMaintenanceState) {
// todo length
if bmcVersion.Spec.ServerMaintenanceRefs == nil {
- return true
+ return true, ""
}
servers, err := r.getServers(ctx, log, bmcClient, bmcVersion)
if err != nil {
log.V(1).Error(err, "Failed to get ref. servers to determine maintenance state ")
- return false
+ return false, ""
}
if len(bmcVersion.Spec.ServerMaintenanceRefs) != len(servers) {
log.V(1).Info("Not all servers have Maintenance", "ServerMaintenanceRefs", bmcVersion.Spec.ServerMaintenanceRefs, "Servers", servers)
- return false
+ return false, ""
}
- notInMaintenanceState := make(map[string]bool, len(servers))
+ notInMaintenanceState := make(map[string]metalv1alpha1.ServerMaintenanceState, len(servers))
for _, server := range servers {
if server.Status.State == metalv1alpha1.ServerStateMaintenance {
serverMaintenanceRef, ok := r.getServerMaintenanceRefForServer(bmcVersion.Spec.ServerMaintenanceRefs, server.Spec.ServerMaintenanceRef.UID)
+ serverMaintenance := &metalv1alpha1.ServerMaintenance{}
+ key := client.ObjectKey{Name: serverMaintenanceRef.Name, Namespace: serverMaintenanceRef.Namespace}
+ if err := r.Get(ctx, key, serverMaintenance); err != nil {
+ log.V(1).Error(err, "failed to get referred server's Manitenance", "Server", server.Name, "ServerMaintenanceRef", serverMaintenanceRef)
+ notInMaintenanceState[server.Name] = ""
+ }
+ if serverMaintenance.Status.State == metalv1alpha1.ServerMaintenanceStateFailed {
+ // fail imediately if any of the server maintenance request failed, as we can not proceed further
+ return false, metalv1alpha1.ServerMaintenanceStateFailed
+ }
+ // this gives us the waiting time for the server to be prepared for maintenance by ServerMaintenance controller
+ if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
+ log.V(1).Info("ServerMaintenance not yet in maintenance state", "Server", server.Name, "ServerMaintenance", serverMaintenance.Name, "State", serverMaintenance.Status.State)
+ notInMaintenanceState[server.Name] = serverMaintenance.Status.State
+ }
if server.Spec.ServerMaintenanceRef == nil || !ok || server.Spec.ServerMaintenanceRef.UID != serverMaintenanceRef.UID {
// server in maintenance for other tasks. or
// server maintenance ref is wrong in either server or bmcVersion
@@ -444,22 +463,22 @@ func (r *BMCVersionReconciler) checkIfMaintenanceGranted(
"ServerMaintenanceRef", server.Spec.ServerMaintenanceRef,
"BMCVersionMaintenaceRef", serverMaintenanceRef,
)
- notInMaintenanceState[server.Name] = false
+ notInMaintenanceState[server.Name] = serverMaintenance.Status.State
}
} else {
// we still need to wait for server to enter maintenance
// wait for update on the server obj
log.V(1).Info("Server not yet in maintenance", "Server", server.Name, "State", server.Status.State, "MaintenanceRef", server.Spec.ServerMaintenanceRef)
- notInMaintenanceState[server.Name] = false
+ notInMaintenanceState[server.Name] = metalv1alpha1.ServerMaintenanceStatePending
}
}
if len(notInMaintenanceState) > 0 {
log.V(1).Info("Some servers not yet in maintenance", "req maintenances on servers", bmcVersion.Spec.ServerMaintenanceRefs)
- return false
+ return false, metalv1alpha1.ServerMaintenanceStatePreapareMaintenance
}
- return true
+ return true, metalv1alpha1.ServerMaintenanceStateInMaintenance
}
func (r *BMCVersionReconciler) checkVersionAndTransistionState(
@@ -764,6 +783,13 @@ func (r *BMCVersionReconciler) requestMaintenanceOnServers(
serverMaintenance.Spec.Policy = bmcVersion.Spec.ServerMaintenancePolicy
serverMaintenance.Spec.ServerPower = metalv1alpha1.PowerOn
serverMaintenance.Spec.ServerRef = &corev1.LocalObjectReference{Name: server.Name}
+ serverMaintenance.Spec.ServerBootConfigurationTemplate = &metalv1alpha1.ServerBootConfigurationTemplate{
+ Name: bmcVersion.Name,
+ Spec: metalv1alpha1.ServerBootConfigurationSpec{
+ ServerRef: corev1.LocalObjectReference{Name: server.Name},
+ Image: r.ProbeImage,
+ },
+ }
if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance && serverMaintenance.Status.State != "" {
serverMaintenance.Status.State = ""
}
diff --git a/internal/controller/bmcversion_controller_test.go b/internal/controller/bmcversion_controller_test.go
index 52c264bd..7928958a 100644
--- a/internal/controller/bmcversion_controller_test.go
+++ b/internal/controller/bmcversion_controller_test.go
@@ -203,6 +203,7 @@ var _ = Describe("BMCVersion Controller", func() {
}),
))
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
ensureBMCVersionConditionTransisition(ctx, acc, bmcVersion)
By("Ensuring that BMC upgrade has completed")
@@ -306,6 +307,8 @@ var _ = Describe("BMCVersion Controller", func() {
}),
))
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
+
ensureBMCVersionConditionTransisition(ctx, acc, bmcVersion)
By("Ensuring that BMC upgrade has completed")
@@ -363,6 +366,17 @@ var _ = Describe("BMCVersion Controller", func() {
Eventually(Object(bmcVersion)).Should(
HaveField("Status.State", metalv1alpha1.BMCVersionStateInProgress),
)
+ By("Ensuring that the Maintenance resource has been created")
+ var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", HaveLen(1)))
+
+ serverMaintenance := &metalv1alpha1.ServerMaintenance{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: ns.Name,
+ Name: serverMaintenanceList.Items[0].Name,
+ },
+ }
+ MarkBootConfigReady(ctx, k8sClient, serverMaintenance.Name, serverMaintenance.Namespace)
Eventually(Object(bmcVersion)).Should(
HaveField("Status.State", metalv1alpha1.BMCVersionStateCompleted),
diff --git a/internal/controller/common_test_helpers.go b/internal/controller/common_test_helpers.go
index 88ae6e3e..c149adf9 100644
--- a/internal/controller/common_test_helpers.go
+++ b/internal/controller/common_test_helpers.go
@@ -17,6 +17,7 @@ import (
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)
@@ -62,25 +63,7 @@ func TransitionServerFromInitialToAvailableState(ctx context.Context, k8sClient
// need long time to create boot config
// as we go through multiple reconcile before creating the boot config
By("Ensuring the boot configuration has been created")
- bootConfig := &metalv1alpha1.ServerBootConfiguration{
- ObjectMeta: metav1.ObjectMeta{
- Namespace: BootConfigNameSpace,
- Name: server.Name,
- },
- }
- Eventually(Get(bootConfig)).Should(
- Succeed(),
- fmt.Sprintf("Expected to get the bootConfig %v, created by Server %v", bootConfig, server.Name),
- )
- Eventually(Object(bootConfig)).Should(
- HaveField("Status.State", metalv1alpha1.ServerBootConfigurationStatePending),
- "Expected to get the bootConfig to reach pending state")
-
- By("Patching the boot configuration to a Ready state")
- Eventually(UpdateStatus(bootConfig, func() {
- bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
- })).Should(Succeed(), fmt.Sprintf("Unable to set the bootconfig %v to Ready State", bootConfig))
-
+ bootConfig := MarkBootConfigReady(ctx, k8sClient, server.Name, BootConfigNameSpace)
Eventually(Object(bootConfig)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.ServerBootConfigurationStateReady),
HaveField("Spec.IgnitionSecretRef", Not(BeNil())),
@@ -246,3 +229,22 @@ func CreateServerClaim(
}
return serverClaim
}
+
+func MarkBootConfigReady(ctx context.Context, k8sClient client.Client, name, namespace string) *metalv1alpha1.ServerBootConfiguration {
+ bootConfig := &metalv1alpha1.ServerBootConfiguration{}
+ Eventually(k8sClient.Get).WithArguments(ctx, types.NamespacedName{
+ Name: name,
+ Namespace: namespace,
+ }, bootConfig).Should(Succeed())
+
+ Eventually(Object(bootConfig)).Should(
+ HaveField("Status.State", metalv1alpha1.ServerBootConfigurationStatePending),
+ "Expected to get the bootConfig to reach pending state",
+ )
+
+ By("Patching the boot configuration to a Ready state")
+ Eventually(UpdateStatus(bootConfig, func() {
+ bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
+ })).Should(Succeed())
+ return bootConfig
+}
diff --git a/internal/controller/server_controller.go b/internal/controller/server_controller.go
index ef48ae25..774bd578 100644
--- a/internal/controller/server_controller.go
+++ b/internal/controller/server_controller.go
@@ -12,7 +12,6 @@ import (
"fmt"
"io"
"net/http"
- "sort"
"strings"
"time"
@@ -234,11 +233,6 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve
}
log.V(1).Info("Updated Server status")
- if err := r.applyBootOrder(ctx, log, bmcClient, server); err != nil {
- return ctrl.Result{}, fmt.Errorf("failed to update server bios boot order: %w", err)
- }
- log.V(1).Info("Updated Server BIOS boot order")
-
requeue, err := r.ensureServerStateTransition(ctx, log, bmcClient, server)
if requeue && err == nil {
// we need to update the ServerStatus after state transition to make sure it reflects the changes done
@@ -1018,34 +1012,6 @@ func (r *ServerReconciler) invalidateRegistryEntryForServer(log logr.Logger, ser
return nil
}
-func (r *ServerReconciler) applyBootOrder(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) error {
- if server.Spec.BMCRef == nil && server.Spec.BMC == nil {
- log.V(1).Info("Server has no BMC connection configured")
- return nil
- }
-
- order, err := bmcClient.GetBootOrder(ctx, server.Spec.SystemURI)
- if err != nil {
- return fmt.Errorf("failed to create BMC client: %w", err)
- }
-
- sort.Slice(server.Spec.BootOrder, func(i, j int) bool {
- return server.Spec.BootOrder[i].Priority < server.Spec.BootOrder[j].Priority
- })
- newOrder := []string{}
- change := false
- for i, boot := range server.Spec.BootOrder {
- newOrder = append(newOrder, boot.Device)
- if order[i] != boot.Device {
- change = true
- }
- }
- if change {
- return bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, newOrder)
- }
- return nil
-}
-
func (r *ServerReconciler) handleAnnotionOperations(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) {
annotations := server.GetAnnotations()
operation, ok := annotations[metalv1alpha1.OperationAnnotation]
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index f5b1dbee..f67110ea 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -7,6 +7,7 @@ import (
"context"
"fmt"
"reflect"
+ "strings"
"time"
"github.com/go-logr/logr"
@@ -110,17 +111,12 @@ func (r *ServerMaintenanceReconciler) ensureServerMaintenanceStateTransition(ctx
return ctrl.Result{}, err
}
- if config, err := r.getMaintenanceBootConfigurationRef(ctx, log, serverMaintenance); err != nil || config == nil {
+ if changed, _, err := r.hasBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
return ctrl.Result{}, err
- } else {
- if reflect.DeepEqual(config.Spec, serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec) {
- // no changes
- log.V(1).Info("No changes in ServerBootConfigurationTemplate")
- } else {
- return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
- }
+ } else if changed {
+ return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
}
- err = r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance)
+ err = r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance.Spec.ServerPower)
return ctrl.Result{}, err
case metalv1alpha1.ServerMaintenanceStateFailed:
return r.handleFailedState(log, serverMaintenance)
@@ -189,19 +185,37 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
// rest of the operation can be done only if the server is in maintenance state
// else, we will hit conflict with ServerClaim controller which will still be asserting Specs on Serevr
if server.Status.State != metalv1alpha1.ServerStateMaintenance {
- return ctrl.Result{}, fmt.Errorf("server is not in maintenance state, current state: %s", server.Status.State)
+ log.V(1).Info("Waiting for server to be in maintenance state", "Server", server.Name, "CurrentState", server.Status.State)
+ return ctrl.Result{}, nil
}
- config, err := r.applyServerBootConfiguration(ctx, log, serverMaintenance, server)
- if err != nil {
+ config := &metalv1alpha1.ServerBootConfiguration{}
+ changed := false
+ if changed, config, err = r.hasBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
return ctrl.Result{}, err
+ } else if changed || config == nil {
+ // turn server power off before applying new boot configuration
+ // this will help schedule the job to change the boot order for the server
+ // note: if the server is already off, this will be a no-op
+ if server.Spec.Power != metalv1alpha1.PowerOff && server.Status.PowerState != metalv1alpha1.ServerOffPowerState {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOff); err != nil {
+ return ctrl.Result{}, err
+ }
+ // requeue to give time to the server to power off
+ return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil
+ }
+ // if no config found or if it has changed, we need to create or path it
+ config, err = r.applyServerBootConfiguration(ctx, log, serverMaintenance, server)
+ if err != nil {
+ return ctrl.Result{}, err
+ }
}
// Note: this is to check skip of BootConfiguration
// implication of skipping is that the server will still continue to run the OS during Maintenance
// might be subjected to reboots
val, found := serverMaintenance.GetAnnotations()[metalv1alpha1.OperationSkipBootConfiguration]
- if config == nil && !(found && val == metalv1alpha1.OperationBootConfigurationSkip) {
+ if config == nil && (!found || val != metalv1alpha1.OperationBootConfigurationSkip) {
log.V(1).Info("No ServerBootConfigurationTemplate boot configuration", "Server", server.Name)
_, err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed)
return ctrl.Result{}, err
@@ -220,10 +234,16 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
}
if config.Status.State == metalv1alpha1.ServerBootConfigurationStateReady {
log.V(1).Info("Server maintenance boot configuration is ready", "Server", server.Name)
+ // now we change the boot order to the server and power it on to complete the configuration
+ if requeue, err := r.configureBootOrder(ctx, log, server, serverMaintenance); err != nil {
+ return ctrl.Result{}, err
+ } else if requeue {
+ return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil
+ }
}
}
if server.Spec.Power != serverMaintenance.Spec.ServerPower {
- if err := r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance); err != nil {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance.Spec.ServerPower); err != nil {
return ctrl.Result{}, err
}
}
@@ -267,9 +287,12 @@ func (r *ServerMaintenanceReconciler) applyServerBootConfiguration(ctx context.C
return config, nil
}
-func (r *ServerMaintenanceReconciler) setAndPatchServerPowerState(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server, maintenance *metalv1alpha1.ServerMaintenance) error {
+func (r *ServerMaintenanceReconciler) setAndPatchServerPowerState(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server, power metalv1alpha1.Power) error {
+ if server.Spec.Power == power {
+ return nil
+ }
serverBase := server.DeepCopy()
- server.Spec.Power = maintenance.Spec.ServerPower
+ server.Spec.Power = power
if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil {
return fmt.Errorf("failed to patch server power state: %w", err)
}
@@ -312,17 +335,21 @@ func (r *ServerMaintenanceReconciler) delete(ctx context.Context, log logr.Logge
if err != nil {
return ctrl.Result{}, err
}
- // clean up the maintenance boot config if it was set, before removing server from Maintennace
- if serverMaintenance != nil {
- log.V(1).Info("Restoring boot order after maintenance")
- if wait, ok, err := r.revertMaintenanceBootConfig(ctx, log, serverMaintenance, server); err != nil {
+ if requeue, err := r.revertMaintenanceBootConfig(ctx, log, server, serverMaintenance); err != nil {
+ return ctrl.Result{}, err
+ } else if requeue {
+ return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil
+ }
+ // make sure the server is powered on before removing maintenance
+ // this will ensure the server is booted into default boot order
+ if server.Spec.Power != metalv1alpha1.PowerOn && server.Status.PowerState != metalv1alpha1.ServerOnPowerState {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOn); err != nil {
return ctrl.Result{}, err
- } else if wait {
- return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil
- } else if !ok {
- log.V(1).Info("maintenance boot revert is completed")
}
+ // requeue to give time to the server to power off
+ return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil
}
+
if err := r.cleanup(ctx, log, server, serverMaintenance); err != nil {
return ctrl.Result{}, err
}
@@ -400,7 +427,6 @@ func (r *ServerMaintenanceReconciler) moveToPrepareMaintenanceState(
server *metalv1alpha1.Server,
) (ctrl.Result, error) {
if err := r.updateServerRef(ctx, log, serverMaintenance, server); err != nil {
- log.Error(err, "failed to patch server maintenance ref")
return ctrl.Result{}, err
}
@@ -410,28 +436,134 @@ func (r *ServerMaintenanceReconciler) moveToPrepareMaintenanceState(
return ctrl.Result{}, nil
}
+func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server, maintenance *metalv1alpha1.ServerMaintenance) (bool, error) {
+ bmcClient, err := bmcutils.GetBMCClientForServer(ctx, r.Client, server, r.Insecure, r.BMCOptions)
+ if err != nil {
+ return false, fmt.Errorf("failed to create BMC client: %w", err)
+ }
+ defer bmcClient.Logout()
+
+ switch maintenance.Spec.ServerBootConfigurationTemplate.Boottype {
+ case metalv1alpha1.BootTypeOneOff:
+ // note: with this option, the next server boot up will exter pxe boot.
+ // if Maintenance Spec.ServerPower is PowerOn, server will be powered on and will boot into pxe
+ // if Maintenance Spec.ServerPower is PowerOff, server will be powered off and will boot into pxe on next power on
+ // we do not need to save the default boot order as the server will boot into default boot order on next boot up
+ if err := bmcClient.SetPXEBootOnce(ctx, server.Spec.SystemURI); err != nil {
+ return false, fmt.Errorf("failed to set PXE boot once: %w", err)
+ }
+ case metalv1alpha1.BootTypePersistent:
+ bootOrder, err := bmcClient.GetBootOrder(ctx, server.Spec.SystemURI)
+ if err != nil {
+ return false, fmt.Errorf("failed to get boot order: %w", err)
+ }
+ bootOptions, err := bmcClient.GetBootOptions(ctx, server.Spec.SystemURI)
+ if err != nil {
+ return false, fmt.Errorf("failed to get boot options: %w", err)
+ }
+
+ pxeBoot := ""
+ for _, bootOption := range bootOptions {
+ if strings.Contains(strings.ToLower(bootOption.DisplayName), "pxe") {
+ pxeBoot = bootOption.BootOptionReference
+ if strings.EqualFold(bootOrder[0], bootOption.BootOptionReference) {
+ // already set to pxe boot
+ return false, nil
+ }
+ break
+ }
+ }
+
+ if maintenance.Status.DefaultBootOrder == nil {
+ // note in this option, the server will always boot into pxe until changed again
+ // the job needs to be completed, else any other BIOS config job triggered will fail as exsisting job is not completed
+ if err := bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, redfish.Boot{BootOrder: []string{pxeBoot}}); err != nil {
+ return false, fmt.Errorf("failed to set PXE boot once: %w", err)
+ }
+ // save the current boot order to revert back after maintenance
+ maintenanceBase := maintenance.DeepCopy()
+ maintenance.Status.DefaultBootOrder = bootOrder
+ if err := r.Status().Patch(ctx, maintenance, client.MergeFrom(maintenanceBase)); err != nil {
+ return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ log.V(1).Info("Saved default boot order for server", "Server", server.Name, "BootOrder", bootOrder)
+ }
+ // we need to complete the job to set the boot order correctly. Turn the server power on
+ if server.Spec.Power != metalv1alpha1.PowerOn && server.Status.PowerState != metalv1alpha1.ServerOnPowerState {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOn); err != nil {
+ return false, err
+ }
+ // requeue to give time to the server to power off
+ return true, nil
+ }
+ default:
+ return false, fmt.Errorf("unknown boot type: %s", maintenance.Spec.ServerBootConfigurationTemplate.Boottype)
+ }
+ log.V(1).Info("Configured boot order for server",
+ "Server", server.Name,
+ "BootType", maintenance.Spec.ServerBootConfigurationTemplate.Boottype)
+ return false, nil
+}
+
func (r *ServerMaintenanceReconciler) revertMaintenanceBootConfig(
ctx context.Context,
log logr.Logger,
- serverMaintenance *metalv1alpha1.ServerMaintenance,
server *metalv1alpha1.Server,
-) (bool, bool, error) {
+ serverMaintenance *metalv1alpha1.ServerMaintenance,
+) (bool, error) {
+ // we need to revert the boot order only if it was changed by the maintenance
+ if serverMaintenance.Status.DefaultBootOrder == nil {
+ return false, nil
+ }
+ // if the server is not off, we need to turn it off first to change the boot order
+ // this will help schedule the job to change the boot order for the server
+ // note: if the server is already off, this will be a no-op
+ if server.Spec.Power != metalv1alpha1.PowerOff && server.Status.PowerState != metalv1alpha1.ServerOffPowerState {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOff); err != nil {
+ return false, err
+ }
+ // requeue to give time to the server to power off
+ return true, nil
+ }
+
bmcClient, err := bmcutils.GetBMCClientForServer(ctx, r.Client, server, r.Insecure, r.BMCOptions)
if err != nil {
- return false, false, fmt.Errorf("failed to create BMC client: %w", err)
+ return false, fmt.Errorf("failed to create BMC client: %w", err)
}
defer bmcClient.Logout()
- err = bmcClient.SetBootOverride(ctx, server.Spec.SystemURI, redfish.Boot{
- BootSourceOverrideEnabled: redfish.DisabledBootSourceOverrideEnabled,
- BootSourceOverrideTarget: redfish.NoneBootSourceOverrideTarget,
- BootSourceOverrideMode: redfish.UEFIBootSourceOverrideMode},
- )
- log.V(1).Info("boot override during maintenance is reverted", "Server", server.Name)
- return false, err == nil, err
+ err = bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, redfish.Boot{
+ BootOrder: serverMaintenance.Status.DefaultBootOrder,
+ })
+ if err != nil {
+ return false, fmt.Errorf("failed to revert boot order: %w", err)
+ }
+
+ // save the current boot order to revert back after maintenance
+ maintenanceBase := serverMaintenance.DeepCopy()
+ serverMaintenance.Status.DefaultBootOrder = nil
+ if err := r.Status().Patch(ctx, serverMaintenance, client.MergeFrom(maintenanceBase)); err != nil {
+ return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ log.V(1).Info("revert to default boot order", "Server", server.Name, "BootOrder", nil)
+
+ if server.Spec.Power != metalv1alpha1.PowerOn && server.Status.PowerState != metalv1alpha1.ServerOnPowerState {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOn); err != nil {
+ return false, err
+ }
+ // requeue to give time to the server to power off
+ return true, nil
+ }
+ log.V(1).Info("boot override during maintenance is reverted", "Server", serverMaintenance.Name)
+ return false, nil
}
-func (r *ServerMaintenanceReconciler) removeBootConfigRefFromServer(ctx context.Context, log logr.Logger, config *metalv1alpha1.ServerBootConfiguration, server *metalv1alpha1.Server) error {
+func (r *ServerMaintenanceReconciler) removeBootConfigRefFromServer(
+ ctx context.Context,
+ log logr.Logger,
+ config *metalv1alpha1.ServerBootConfiguration,
+ server *metalv1alpha1.Server,
+) error {
if server == nil {
return nil
}
@@ -509,6 +641,26 @@ func (r *ServerMaintenanceReconciler) getMaintenanceBootConfigurationRef(
return config, nil
}
+func (r *ServerMaintenanceReconciler) hasBootConfigurationChanged(
+ ctx context.Context,
+ log logr.Logger,
+ serverMaintenance *metalv1alpha1.ServerMaintenance,
+) (bool, *metalv1alpha1.ServerBootConfiguration, error) {
+ config, err := r.getMaintenanceBootConfigurationRef(ctx, log, serverMaintenance)
+ if err != nil {
+ return false, config, err
+ }
+ if config == nil {
+ return false, config, nil
+ }
+ if reflect.DeepEqual(config.Spec, serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec) {
+ // no changes
+ log.V(1).Info("No changes in ServerBootConfigurationTemplate")
+ return false, config, nil
+ }
+ return true, config, nil
+}
+
// SetupWithManager sets up the controller with the Manager.
func (r *ServerMaintenanceReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
@@ -521,27 +673,13 @@ func (r *ServerMaintenanceReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *ServerMaintenanceReconciler) enqueueMaintenanceByServerRefs() handler.EventHandler {
return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, object client.Object) []reconcile.Request {
- log := ctrl.LoggerFrom(ctx)
server := object.(*metalv1alpha1.Server)
var req []reconcile.Request
- maintenanceList := &metalv1alpha1.ServerMaintenanceList{}
- if err := r.List(ctx, maintenanceList); err != nil {
- log.Error(err, "failed to list host serverMaintenances")
- return nil
- }
- for _, maintenance := range maintenanceList.Items {
- if server.Spec.ServerMaintenanceRef != nil && maintenance.Spec.ServerRef.Name == server.Spec.ServerMaintenanceRef.Name {
- req = append(req, reconcile.Request{
- NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name},
- })
- return req
- }
- if server.Spec.ServerMaintenanceRef == nil {
- req = append(req, reconcile.Request{
- NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name},
- })
- }
+ if server.Spec.ServerMaintenanceRef != nil {
+ return []reconcile.Request{{
+ NamespacedName: types.NamespacedName{Namespace: server.Spec.ServerMaintenanceRef.Namespace, Name: server.Spec.ServerMaintenanceRef.Name},
+ }}
}
return req
})
diff --git a/internal/controller/servermaintenance_controller_test.go b/internal/controller/servermaintenance_controller_test.go
index 6ace1d7e..d64a2543 100644
--- a/internal/controller/servermaintenance_controller_test.go
+++ b/internal/controller/servermaintenance_controller_test.go
@@ -8,7 +8,6 @@ import (
. "github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
- "k8s.io/apimachinery/pkg/types"
. "sigs.k8s.io/controller-runtime/pkg/envtest/komega"
"github.com/ironcore-dev/controller-utils/metautils"
@@ -97,7 +96,7 @@ var _ = Describe("ServerMaintenance Controller", func() {
}
Expect(k8sClient.Create(ctx, serverMaintenance)).To(Succeed())
Eventually(Object(serverMaintenance)).Should(SatisfyAll(
- HaveField("Status.State", metalv1alpha1.ServerMaintenanceStateInMaintenance),
+ HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreapareMaintenance),
))
By("Checking the Server is in maintenance")
@@ -153,29 +152,17 @@ var _ = Describe("ServerMaintenance Controller", func() {
metalv1alpha1.ServerMaintenanceReasonAnnotationKey: "test-maintenance",
metalv1alpha1.ServerMaintenanceApprovalKey: "true",
}
- By("Checking the ServerMaintenanceRef")
- Eventually(Object(server)).Should(
- HaveField("Spec.ServerMaintenanceRef", Not(BeNil())),
- )
- bootConfig := &metalv1alpha1.ServerBootConfiguration{}
-
- Eventually(k8sClient.Get).WithArguments(ctx, types.NamespacedName{
- Name: server.Spec.BootConfigurationRef.Name,
- Namespace: server.Spec.BootConfigurationRef.Namespace,
- }, bootConfig).Should(Succeed())
-
- By("Checking the BootConfigurationRef")
+ By("Checking the ServerMaintenanceRef and BootConfigurationRef")
+ By("Checking the ")
Eventually(Object(server)).Should(SatisfyAll(
+ HaveField("Spec.ServerMaintenanceRef", Not(BeNil())),
HaveField("Spec.ServerMaintenanceRef.Name", serverMaintenance.Name),
HaveField("Spec.BootConfigurationRef", Not(BeNil())),
HaveField("Spec.BootConfigurationRef.Name", serverMaintenance.Name),
HaveField("Spec.BootConfigurationRef.Namespace", serverMaintenance.Namespace),
))
- By("Patching the boot configuration to a Ready state")
- Eventually(UpdateStatus(bootConfig, func() {
- bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
- })).Should(Succeed())
+ MarkBootConfigReady(ctx, k8sClient, server.Spec.BootConfigurationRef.Name, server.Spec.BootConfigurationRef.Namespace)
By("Checking the Server is in maintenance")
Eventually(Object(server)).Should(SatisfyAll(
@@ -185,7 +172,6 @@ var _ = Describe("ServerMaintenance Controller", func() {
Eventually(Object(serverClaim)).Should(SatisfyAll(
HaveField("ObjectMeta.Annotations", maintenanceLabels),
))
-
By("Checking the ServerMaintenance is in maintenance")
Eventually(Object(serverMaintenance)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.ServerMaintenanceStateInMaintenance),
@@ -197,7 +183,7 @@ var _ = Describe("ServerMaintenance Controller", func() {
Eventually(Object(server)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.ServerStateReserved),
HaveField("Spec.ServerMaintenanceRef", BeNil()),
- HaveField("Spec.BootConfigurationRef", BeNil()),
+ HaveField("Spec.BootConfigurationRef.Name", serverClaim.Name),
))
By("Checking the ServerClaim is cleaned up")
Eventually(Object(serverClaim)).Should(SatisfyAll(
@@ -251,31 +237,24 @@ var _ = Describe("ServerMaintenance Controller", func() {
},
}
Expect(k8sClient.Create(ctx, serverMaintenance01)).To(Succeed())
- By("Checking the ServerMaintenanceRef")
- Eventually(Object(server)).Should(
- HaveField("Spec.ServerMaintenanceRef", Not(BeNil())),
- )
- Eventually(Object(serverMaintenance01)).Should(SatisfyAll(
- HaveField("Status.State", metalv1alpha1.ServerMaintenanceStateInMaintenance),
- ))
- bootConfig := &metalv1alpha1.ServerBootConfiguration{}
- Eventually(k8sClient.Get).WithArguments(ctx, types.NamespacedName{
- Name: server.Spec.BootConfigurationRef.Name,
- Namespace: server.Spec.BootConfigurationRef.Namespace,
- }, bootConfig).Should(Succeed())
-
- By("Checking the BootConfigurationRef")
+ By("Checking the ServerMaintenanceRef and BootConfigurationRef")
Eventually(Object(server)).Should(SatisfyAll(
+ HaveField("Spec.ServerMaintenanceRef", Not(BeNil())),
HaveField("Spec.ServerMaintenanceRef.Name", serverMaintenance01.Name),
HaveField("Spec.BootConfigurationRef", Not(BeNil())),
HaveField("Spec.BootConfigurationRef.Name", serverMaintenance01.Name),
HaveField("Spec.BootConfigurationRef.Namespace", serverMaintenance01.Namespace),
))
+ By("Checking the ServerMaintenance state is PrepareMaintenance")
+ Eventually(Object(serverMaintenance01)).Should(SatisfyAll(
+ HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreapareMaintenance),
+ ))
+ MarkBootConfigReady(ctx, k8sClient, server.Spec.BootConfigurationRef.Name, server.Spec.BootConfigurationRef.Namespace)
- By("Patching the boot configuration to a Ready state")
- Eventually(UpdateStatus(bootConfig, func() {
- bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
- })).Should(Succeed())
+ By("Checking the ServerMaintenance state is PrepareMaintenance")
+ Eventually(Object(serverMaintenance01)).Should(SatisfyAll(
+ HaveField("Status.State", metalv1alpha1.ServerMaintenanceStateInMaintenance),
+ ))
By("Creating a second ServerMaintenance object")
Expect(k8sClient.Create(ctx, serverMaintenance02)).To(Succeed())
@@ -295,11 +274,21 @@ var _ = Describe("ServerMaintenance Controller", func() {
By("Deleting first ServerMaintenance to finish the maintenance on the server")
Eventually(k8sClient.Delete).WithArguments(ctx, serverMaintenance01).Should(Succeed())
+ By("Checking the second ServerMaintenance is now PreparingMaintenance")
Eventually(Object(serverMaintenance02)).Should(SatisfyAll(
- HaveField("Status.State", metalv1alpha1.ServerMaintenanceStateInMaintenance),
+ HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreapareMaintenance),
+ ))
+
+ By("Checking the ServerMaintenanceRef and BootConfigurationRef")
+ Eventually(Object(server)).Should(SatisfyAll(
+ HaveField("Spec.ServerMaintenanceRef", Not(BeNil())),
+ HaveField("Spec.ServerMaintenanceRef.Name", serverMaintenance02.Name),
+ HaveField("Spec.BootConfigurationRef", Not(BeNil())),
+ HaveField("Spec.BootConfigurationRef.Name", serverMaintenance02.Name),
+ HaveField("Spec.BootConfigurationRef.Namespace", serverMaintenance02.Namespace),
))
+ MarkBootConfigReady(ctx, k8sClient, server.Spec.BootConfigurationRef.Name, server.Spec.BootConfigurationRef.Namespace)
- By("Checking the second ServerMaintenance is now in maintenance")
Eventually(Object(serverMaintenance02)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.ServerMaintenanceStateInMaintenance),
))
diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go
index 07404fd9..4403a647 100644
--- a/internal/controller/suite_test.go
+++ b/internal/controller/suite_test.go
@@ -30,6 +30,8 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/envtest"
. "sigs.k8s.io/controller-runtime/pkg/envtest/komega"
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+ "sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
//+kubebuilder:scaffold:imports
)
@@ -118,6 +120,7 @@ func deleteAndList(ctx context.Context, obj client.Object, objList client.Object
var _ = BeforeSuite(func() {
By("bootstrapping test environment")
+ logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
testEnv = &envtest.Environment{
CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")},
ErrorIfCRDPathMissing: true,
@@ -262,8 +265,15 @@ func SetupTest() *corev1.Namespace {
}).SetupWithManager(k8sManager)).To(Succeed())
Expect((&ServerMaintenanceReconciler{
- Client: k8sManager.GetClient(),
- Scheme: k8sManager.GetScheme(),
+ Client: k8sManager.GetClient(),
+ Scheme: k8sManager.GetScheme(),
+ ResyncInterval: 10 * time.Millisecond,
+ BMCOptions: bmc.Options{
+ PowerPollingInterval: 50 * time.Millisecond,
+ PowerPollingTimeout: 200 * time.Millisecond,
+ BasicAuth: true,
+ },
+ Insecure: true,
}).SetupWithManager(k8sManager)).To(Succeed())
Expect((&BiosSettingsReconciler{
@@ -278,6 +288,7 @@ func SetupTest() *corev1.Namespace {
BasicAuth: true,
},
TimeoutExpiry: 6 * time.Second,
+ ProbeImage: "foo:latest",
}).SetupWithManager(k8sManager)).To(Succeed())
Expect((&BIOSVersionReconciler{
@@ -291,6 +302,7 @@ func SetupTest() *corev1.Namespace {
PowerPollingTimeout: 200 * time.Millisecond,
BasicAuth: true,
},
+ ProbeImage: "foo:latest",
}).SetupWithManager(k8sManager)).To(Succeed())
Expect((&BIOSVersionSetReconciler{
@@ -309,6 +321,7 @@ func SetupTest() *corev1.Namespace {
PowerPollingTimeout: 200 * time.Millisecond,
BasicAuth: true,
},
+ ProbeImage: "foo:latest",
}).SetupWithManager(k8sManager)).To(Succeed())
Expect((&BMCVersionReconciler{
@@ -322,6 +335,7 @@ func SetupTest() *corev1.Namespace {
PowerPollingTimeout: 200 * time.Millisecond,
BasicAuth: true,
},
+ ProbeImage: "foo:latest",
}).SetupWithManager(k8sManager)).To(Succeed())
Expect((&BMCVersionSetReconciler{
From 58a08a499b025ed38b032478d379d512888319c1 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Mon, 29 Sep 2025 10:32:23 +0200
Subject: [PATCH 05/12] unit test updates with bootConfig
---
.../biossettings_controller_test.go | 4 +-
.../biossettingsset_controller_test.go | 20 ++++++-
.../biosversionset_controller_test.go | 59 ++++++++++++++++++-
.../controller/bmcversionset_controller.go | 13 +++-
.../bmcversionset_controller_test.go | 20 +++++++
.../servermaintenance_controller.go | 21 +++++++
6 files changed, 129 insertions(+), 8 deletions(-)
diff --git a/internal/controller/biossettings_controller_test.go b/internal/controller/biossettings_controller_test.go
index acd0b5fe..1780702c 100644
--- a/internal/controller/biossettings_controller_test.go
+++ b/internal/controller/biossettings_controller_test.go
@@ -52,7 +52,7 @@ var _ = Describe("BIOSSettings Controller", func() {
server = &metalv1alpha1.Server{
ObjectMeta: metav1.ObjectMeta{
Namespace: ns.Name,
- GenerateName: "test-maintenance-",
+ GenerateName: "test-biossettings-",
},
Spec: metalv1alpha1.ServerSpec{
SystemUUID: "38947555-7742-3448-3784-823347823834",
@@ -725,7 +725,7 @@ var _ = Describe("BIOSSettings Sequence Controller", func() {
server = &metalv1alpha1.Server{
ObjectMeta: metav1.ObjectMeta{
Namespace: ns.Name,
- GenerateName: "test-maintenance-",
+ GenerateName: "test-biossettings-",
},
Spec: metalv1alpha1.ServerSpec{
SystemUUID: "38947555-7742-3448-3784-823347823834",
diff --git a/internal/controller/biossettingsset_controller_test.go b/internal/controller/biossettingsset_controller_test.go
index 2edba2d6..dc54e24f 100644
--- a/internal/controller/biossettingsset_controller_test.go
+++ b/internal/controller/biossettingsset_controller_test.go
@@ -39,7 +39,7 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
By("Creating a Server")
server01 = &metalv1alpha1.Server{
ObjectMeta: metav1.ObjectMeta{
- GenerateName: "test-maintenance-01-",
+ GenerateName: "test-biossettingsset-01-",
Labels: map[string]string{
"metal.ironcore.dev/Manufacturer": "foo",
},
@@ -64,7 +64,7 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
By("Creating a second Server")
server02 = &metalv1alpha1.Server{
ObjectMeta: metav1.ObjectMeta{
- GenerateName: "test-maintenance-02-",
+ GenerateName: "test-biossettingsset-02-",
Labels: map[string]string{
"metal.ironcore.dev/Manufacturer": "bar",
},
@@ -89,7 +89,7 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
By("Creating a third Server")
server03 = &metalv1alpha1.Server{
ObjectMeta: metav1.ObjectMeta{
- GenerateName: "test-maintenance-03-",
+ GenerateName: "test-biossettingsset-03-",
Labels: map[string]string{
"metal.ironcore.dev/Manufacturer": "bar",
},
@@ -149,6 +149,8 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
}
Eventually(Get(biosSettings02)).Should(Succeed())
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings02.Name, ns.Name)
+
By("Checking the biosSettings02 have completed")
Eventually(Object(biosSettings02)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -171,6 +173,8 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
}
Eventually(Get(biosSettings03)).Should(Succeed())
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings03.Name, ns.Name)
+
By("Checking the biosSettings03 have completed")
Eventually(Object(biosSettings03)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -245,6 +249,8 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
HaveField("Status.FailedBIOSSettings", BeNumerically("==", 0)),
))
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings02.Name, ns.Name)
+
By("Checking the biosSettings02 have completed")
Eventually(Object(biosSettings02)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -258,6 +264,8 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
})),
))
+ _ = MarkBootConfigReady(ctx, k8sClient, biosSettings03.Name, ns.Name)
+
By("Checking the biosSettings03 have completed")
Eventually(Object(biosSettings03)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -305,6 +313,8 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
Eventually(Get(biosSettings02)).Should(Succeed())
Eventually(Get(biosSettings03)).Should(Succeed())
+ // we do not mark boot config ready for biosSettings02 once again, as
+ // as the biosSettings will already be applied on it
By("Checking the biosSettings02 have completed")
Eventually(Object(biosSettings02)).Should(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
@@ -341,6 +351,10 @@ var _ = Describe("BIOSSettingsSet Controller", func() {
HaveField("Status.FailedBIOSSettings", BeNumerically("==", 0)),
))
+ // we do not mark boot config ready for biosSettings01, as
+ // the server in unit testing all point to same mockup server
+ // and Hence, the biosSettings will already be applied on it when other biosSettings are applied.
+
By("Checking the biosSettings01 have completed")
Eventually(Object(biosSettings01)).Should(
HaveField("Status.State", metalv1alpha1.BIOSSettingsStateApplied),
diff --git a/internal/controller/biosversionset_controller_test.go b/internal/controller/biosversionset_controller_test.go
index eecb7903..dd5b477e 100644
--- a/internal/controller/biosversionset_controller_test.go
+++ b/internal/controller/biosversionset_controller_test.go
@@ -165,6 +165,20 @@ var _ = Describe("BIOSVersionSet Controller", func() {
HaveField("Status.FailedBIOSVersion", BeNumerically("==", 0)),
))
+ By("Ensuring that the Maintenance resource has been created")
+ var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", Not(BeEmpty())))
+
+ serverMaintenance02 := &metalv1alpha1.ServerMaintenance{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: ns.Name,
+ Name: biosVersion02.Name,
+ },
+ }
+ Eventually(Get(serverMaintenance02)).Should(Succeed())
+
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintenance02.Name, ns.Name)
+
By("Checking the biosVersion01 have completed")
Eventually(Object(biosVersion02)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSVersionStateCompleted),
@@ -180,6 +194,16 @@ var _ = Describe("BIOSVersionSet Controller", func() {
})),
))
+ serverMaintenance03 := &metalv1alpha1.ServerMaintenance{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: ns.Name,
+ Name: biosVersion03.Name,
+ },
+ }
+ Eventually(Get(serverMaintenance03)).Should(Succeed())
+
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintenance03.Name, ns.Name)
+
By("Checking the biosVersion02 have completed")
Eventually(Object(biosVersion03)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSVersionStateCompleted),
@@ -253,7 +277,21 @@ var _ = Describe("BIOSVersionSet Controller", func() {
HaveField("Status.FailedBIOSVersion", BeNumerically("==", 0)),
))
- By("Checking the biosVersion01 have completed")
+ By("Ensuring that the Maintenance resource has been created")
+ var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", Not(BeEmpty())))
+
+ serverMaintenance02 := &metalv1alpha1.ServerMaintenance{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: ns.Name,
+ Name: biosVersion02.Name,
+ },
+ }
+ Eventually(Get(serverMaintenance02)).Should(Succeed())
+
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintenance02.Name, ns.Name)
+
+ By("Checking the biosVersion02 have completed")
Eventually(Object(biosVersion02)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSVersionStateCompleted),
HaveField("OwnerReferences", ContainElement(metav1.OwnerReference{
@@ -266,7 +304,17 @@ var _ = Describe("BIOSVersionSet Controller", func() {
})),
))
- By("Checking the biosVersion02 have completed")
+ serverMaintenance03 := &metalv1alpha1.ServerMaintenance{
+ ObjectMeta: metav1.ObjectMeta{
+ Namespace: ns.Name,
+ Name: biosVersion03.Name,
+ },
+ }
+ Eventually(Get(serverMaintenance03)).Should(Succeed())
+
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintenance03.Name, ns.Name)
+
+ By("Checking the biosVersion03 have completed")
Eventually(Object(biosVersion03)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BIOSVersionStateCompleted),
HaveField("OwnerReferences", ContainElement(metav1.OwnerReference{
@@ -312,6 +360,8 @@ var _ = Describe("BIOSVersionSet Controller", func() {
Eventually(Get(biosVersion02)).Should(Succeed())
Eventually(Get(biosVersion03)).Should(Succeed())
+ // we do not mark boot config ready for biosVersion02 once again, as
+ // as the biosVersion will already be applied on it
By("Checking the biosVersion01 have completed")
Eventually(Object(biosVersion02)).Should(
HaveField("Status.State", metalv1alpha1.BIOSVersionStateCompleted),
@@ -348,6 +398,11 @@ var _ = Describe("BIOSVersionSet Controller", func() {
HaveField("Status.FailedBIOSVersion", BeNumerically("==", 0)),
))
+ // we do not mark boot config ready for biosVersion01, as
+ // the server in unit testing all point to same mockup server
+ // and Hence, the biosSettings will already be applied on it when other biosSettings are applied.
+ // _ = MarkBootConfigReady(ctx, k8sClient, <>.Name, ns.Name)
+
By("Checking the biosVersion01 have completed")
Eventually(Object(biosVersion01)).Should(
HaveField("Status.State", metalv1alpha1.BIOSVersionStateCompleted),
diff --git a/internal/controller/bmcversionset_controller.go b/internal/controller/bmcversionset_controller.go
index f6754804..89a64113 100644
--- a/internal/controller/bmcversionset_controller.go
+++ b/internal/controller/bmcversionset_controller.go
@@ -302,7 +302,18 @@ func (r *BMCVersionSetReconciler) patchBMCVersionfromTemplate(
continue
}
opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, &bmcVersion, func() error {
- bmcVersion.Spec.BMCVersionTemplate = *bmcVersionTemplate.DeepCopy()
+
+ if bmcVersionTemplate.ServerMaintenanceRefs != nil &&
+ len(bmcVersion.Spec.ServerMaintenanceRefs) == len(bmcVersionTemplate.ServerMaintenanceRefs) {
+ // find a better solution to handle corner cases
+ // like the spec provided is equal length but has wrong value.
+ // this will make bmcVersion create new serverMaintenance and hence this patch will be wrong
+ bmcVersion.Spec.BMCVersionTemplate = *bmcVersionTemplate.DeepCopy()
+ } else {
+ serverMaintenanceRefs := bmcVersion.Spec.ServerMaintenanceRefs
+ bmcVersion.Spec.BMCVersionTemplate = *bmcVersionTemplate.DeepCopy()
+ bmcVersion.Spec.ServerMaintenanceRefs = serverMaintenanceRefs
+ }
return nil
}) //nolint:errcheck
if err != nil {
diff --git a/internal/controller/bmcversionset_controller_test.go b/internal/controller/bmcversionset_controller_test.go
index 8cbb789e..45fa89ed 100644
--- a/internal/controller/bmcversionset_controller_test.go
+++ b/internal/controller/bmcversionset_controller_test.go
@@ -170,6 +170,16 @@ var _ = Describe("BMCVersionSet Controller", func() {
HaveField("Status.FailedBMCVersion", BeNumerically("==", 0)),
))
+ By("Ensuring that the BootConfig resource has been created/ marked ready")
+ var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", Not(BeNil())))
+ for _, serverMaintaince := range serverMaintenanceList.Items {
+ if metav1.IsControlledBy(&serverMaintaince, bmcVersion02) || metav1.IsControlledBy(&serverMaintaince, bmcVersion03) {
+ By(fmt.Sprintf("Marking the maintenance %v ", serverMaintaince.Name))
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintaince.Name, ns.Name)
+ }
+ }
+
By("Checking the bmcVersion02 have completed")
Eventually(Object(bmcVersion02)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BMCVersionStateCompleted),
@@ -262,6 +272,16 @@ var _ = Describe("BMCVersionSet Controller", func() {
HaveField("Status.FailedBMCVersion", BeNumerically("==", 0)),
))
+ By("Ensuring that the BootConfig resource has been created/ marked ready")
+ var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", Not(BeNil())))
+ for _, serverMaintaince := range serverMaintenanceList.Items {
+ if metav1.IsControlledBy(&serverMaintaince, bmcVersion02) || metav1.IsControlledBy(&serverMaintaince, bmcVersion03) {
+ By(fmt.Sprintf("Marking the maintenance %v ", serverMaintaince.Name))
+ _ = MarkBootConfigReady(ctx, k8sClient, serverMaintaince.Name, ns.Name)
+ }
+ }
+
By("Checking the bmcVersion02 have completed")
Eventually(Object(bmcVersion02)).Should(SatisfyAll(
HaveField("Status.State", metalv1alpha1.BMCVersionStateCompleted),
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index f67110ea..d1d08106 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -673,6 +673,7 @@ func (r *ServerMaintenanceReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *ServerMaintenanceReconciler) enqueueMaintenanceByServerRefs() handler.EventHandler {
return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, object client.Object) []reconcile.Request {
+ log := ctrl.LoggerFrom(ctx)
server := object.(*metalv1alpha1.Server)
var req []reconcile.Request
@@ -681,6 +682,26 @@ func (r *ServerMaintenanceReconciler) enqueueMaintenanceByServerRefs() handler.E
NamespacedName: types.NamespacedName{Namespace: server.Spec.ServerMaintenanceRef.Namespace, Name: server.Spec.ServerMaintenanceRef.Name},
}}
}
+
+ maintenanceList := &metalv1alpha1.ServerMaintenanceList{}
+ if err := r.List(ctx, maintenanceList); err != nil {
+ log.Error(err, "failed to list host serverMaintenances")
+ return nil
+ }
+ for _, maintenance := range maintenanceList.Items {
+ if server.Spec.ServerMaintenanceRef != nil && maintenance.Name == server.Spec.ServerMaintenanceRef.Name {
+ req = append(req, reconcile.Request{
+ NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name},
+ })
+ return req
+ }
+ if maintenance.Spec.ServerRef != nil && maintenance.Spec.ServerRef.Name == server.Name {
+ req = append(req, reconcile.Request{
+ NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name},
+ })
+ }
+ }
+
return req
})
}
From 067b699b1c78f199e5c4b190781a8506a427c3b1 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Wed, 1 Oct 2025 13:32:11 +0200
Subject: [PATCH 06/12] generalize logic for rearranging boot order
---
internal/controller/helper.go | 40 +++++++++++++++++++
.../servermaintenance_controller.go | 26 +++++++-----
2 files changed, 57 insertions(+), 9 deletions(-)
diff --git a/internal/controller/helper.go b/internal/controller/helper.go
index 5f23bb82..abcc103b 100644
--- a/internal/controller/helper.go
+++ b/internal/controller/helper.go
@@ -8,8 +8,10 @@ import (
"fmt"
"math/big"
"reflect"
+ "slices"
metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
+ "github.com/stmcginnis/gofish/redfish"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
)
@@ -74,6 +76,44 @@ func GenerateRandomPassword(length int) ([]byte, error) {
return result, nil
}
+func rearrangeBootOrder(
+ currentOrder []string,
+ currentBootOptions []*redfish.BootOption,
+ highPriorityBootOrderFunc func(*redfish.BootOption) bool,
+ removeBootOrderFunc func(*redfish.BootOption) bool,
+) []string {
+ var highPrioirtyBootDevices []*redfish.BootOption
+ var removedBootOption []*redfish.BootOption
+ for _, bootOption := range currentBootOptions {
+ if removeBootOrderFunc(bootOption) {
+ removedBootOption = append(removedBootOption, bootOption)
+ }
+ if highPriorityBootOrderFunc(bootOption) {
+ highPrioirtyBootDevices = append(highPrioirtyBootDevices, bootOption)
+ }
+ }
+
+ var newBootOrder []string
+ // Add high priority boot devices first
+ for _, bootDevice := range highPrioirtyBootDevices {
+ newBootOrder = append(newBootOrder, bootDevice.BootOptionReference)
+ }
+
+ for _, bootOrder := range currentOrder {
+ if slices.Contains(newBootOrder, bootOrder) {
+ continue
+ }
+ if slices.ContainsFunc(removedBootOption, func(removed *redfish.BootOption) bool {
+ return removed.BootOptionReference == bootOrder
+ }) {
+ continue
+ }
+ newBootOrder = append(newBootOrder, bootOrder)
+ }
+
+ return newBootOrder
+}
+
func enqueFromChildObjUpdatesExceptAnnotation(e event.UpdateEvent) bool {
isNil := func(arg any) bool {
if v := reflect.ValueOf(arg); !v.IsValid() || ((v.Kind() == reflect.Ptr ||
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index d1d08106..d0da0614 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -457,27 +457,35 @@ func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, lo
if err != nil {
return false, fmt.Errorf("failed to get boot order: %w", err)
}
+ // if the boot order lenght is less than 2, we cannot change pxe as first boot
+ if len(bootOrder) < 2 {
+ return false, nil
+ }
bootOptions, err := bmcClient.GetBootOptions(ctx, server.Spec.SystemURI)
if err != nil {
return false, fmt.Errorf("failed to get boot options: %w", err)
}
- pxeBoot := ""
- for _, bootOption := range bootOptions {
- if strings.Contains(strings.ToLower(bootOption.DisplayName), "pxe") {
- pxeBoot = bootOption.BootOptionReference
- if strings.EqualFold(bootOrder[0], bootOption.BootOptionReference) {
- // already set to pxe boot
- return false, nil
+ var isDiskBootOption = func(bootOption *redfish.BootOption) bool {
+ diskIndicators := []string{"/HD(", "/Sata(", "/NVMe(", "/Scsi(", "/USB("}
+ for _, indicator := range diskIndicators {
+ if strings.Contains(bootOption.UefiDevicePath, indicator) {
+ return true
}
- break
}
+ return false
+ }
+
+ var isPxeBootOption = func(bootOption *redfish.BootOption) bool {
+ return strings.Contains(strings.ToLower(bootOption.DisplayName), "pxe")
}
+ bootOrderChanged := rearrangeBootOrder(bootOrder, bootOptions, isPxeBootOption, isDiskBootOption)
+
if maintenance.Status.DefaultBootOrder == nil {
// note in this option, the server will always boot into pxe until changed again
// the job needs to be completed, else any other BIOS config job triggered will fail as exsisting job is not completed
- if err := bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, redfish.Boot{BootOrder: []string{pxeBoot}}); err != nil {
+ if err := bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, redfish.Boot{BootOrder: bootOrderChanged}); err != nil {
return false, fmt.Errorf("failed to set PXE boot once: %w", err)
}
// save the current boot order to revert back after maintenance
From 3f99cfe68dde79fd2bc740856c397b4804e4100d Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Wed, 8 Oct 2025 17:01:32 +0200
Subject: [PATCH 07/12] Update logic to handle settings changes
---
api/v1alpha1/servermaintenance_types.go | 33 +++
api/v1alpha1/zz_generated.deepcopy.go | 28 ++-
...metal.ironcore.dev_servermaintenances.yaml | 21 +-
config/dev/macdb.yaml | 34 +--
config/dev/manager_patch.yaml | 1 -
.../redfish_mockup_endpoint.yaml | 6 +-
.../samples/metal_v1alpha1_biosversion.yaml | 7 +-
.../metal_v1alpha1_servermaintenance.yaml | 14 +-
internal/controller/bmcsettings_controller.go | 6 +-
internal/controller/bmcversion_controller.go | 6 +-
internal/controller/helper.go | 2 +-
.../servermaintenance_controller.go | 222 ++++++++++++++----
12 files changed, 277 insertions(+), 103 deletions(-)
diff --git a/api/v1alpha1/servermaintenance_types.go b/api/v1alpha1/servermaintenance_types.go
index d62c7e84..9f450d3e 100644
--- a/api/v1alpha1/servermaintenance_types.go
+++ b/api/v1alpha1/servermaintenance_types.go
@@ -83,11 +83,44 @@ const (
type ServerMaintenanceStatus struct {
// State specifies the current state of the server maintenance.
State ServerMaintenanceState `json:"state,omitempty"`
+
+ // BootOrder specifies the boot order of the server before maintenance.
+ // +optional
+ BootOrderStatus *ServerMaintenanceBootOrder `json:"bootOrderStatus,omitempty"`
+}
+
+type ServerMaintenanceBootOrder struct {
// DefaultBootOrder specifies the default boot order of the server before maintenance.
// +optional
DefaultBootOrder []string `json:"defaultBootOrder,omitempty"`
+
+ State ServerMaintenanceBootOrderState `json:"state,omitempty"`
}
+// ServerMaintenanceState specifies the current state of the server maintenance.
+type ServerMaintenanceBootOrderState string
+
+const (
+
+ // BootOrderConfigNoOp specifies that the server bootOrder is as expected.
+ BootOrderConfigNoOp ServerMaintenanceBootOrderState = "NoOp"
+
+ // BootOrderConfigInProgress specifies that the server bootOrder configuration is InProgress.
+ BootOrderConfigInProgress ServerMaintenanceBootOrderState = "InProgress"
+
+ // BootOrderConfigSuccess specifies that the server bootOrder configuration is Completed.
+ BootOrderConfigSuccess ServerMaintenanceBootOrderState = "Completed"
+
+ // BootOrderConfigSuccessRevertInProgress specifies that the server bootOrder revert to default is InProgress.
+ BootOrderConfigSuccessRevertInProgress ServerMaintenanceBootOrderState = "RevertInProgress"
+
+ // BootOrderConfigRevertSuccess specifies that the server bootOrder configuration is Completed.
+ BootOrderConfigRevertSuccess ServerMaintenanceBootOrderState = "RevertCompleted"
+
+ // BootOrderOneOffPxeBootSuccess specifies that the server bootOrder configuration is set to boot pxe once.
+ BootOrderOneOffPxeBootSuccess ServerMaintenanceBootOrderState = "PxeOneOffBootSuccess"
+)
+
// ServerMaintenanceState specifies the current state of the server maintenance.
type ServerMaintenanceState string
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index fc65cec8..bafeae7b 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -1541,6 +1541,26 @@ func (in *ServerMaintenance) DeepCopyObject() runtime.Object {
return nil
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServerMaintenanceBootOrder) DeepCopyInto(out *ServerMaintenanceBootOrder) {
+ *out = *in
+ if in.DefaultBootOrder != nil {
+ in, out := &in.DefaultBootOrder, &out.DefaultBootOrder
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerMaintenanceBootOrder.
+func (in *ServerMaintenanceBootOrder) DeepCopy() *ServerMaintenanceBootOrder {
+ if in == nil {
+ return nil
+ }
+ out := new(ServerMaintenanceBootOrder)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServerMaintenanceList) DeepCopyInto(out *ServerMaintenanceList) {
*out = *in
@@ -1621,10 +1641,10 @@ func (in *ServerMaintenanceSpec) DeepCopy() *ServerMaintenanceSpec {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServerMaintenanceStatus) DeepCopyInto(out *ServerMaintenanceStatus) {
*out = *in
- if in.DefaultBootOrder != nil {
- in, out := &in.DefaultBootOrder, &out.DefaultBootOrder
- *out = make([]string, len(*in))
- copy(*out, *in)
+ if in.BootOrderStatus != nil {
+ in, out := &in.BootOrderStatus, &out.BootOrderStatus
+ *out = new(ServerMaintenanceBootOrder)
+ (*in).DeepCopyInto(*out)
}
}
diff --git a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
index d0ef9f79..6b0120aa 100644
--- a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
@@ -155,12 +155,21 @@ spec:
status:
description: ServerMaintenanceStatus defines the observed state of a ServerMaintenance
properties:
- defaultBootOrder:
- description: DefaultBootOrder specifies the default boot order of
- the server before maintenance.
- items:
- type: string
- type: array
+ bootOrderStatus:
+ description: BootOrder specifies the boot order of the server before
+ maintenance.
+ properties:
+ defaultBootOrder:
+ description: DefaultBootOrder specifies the default boot order
+ of the server before maintenance.
+ items:
+ type: string
+ type: array
+ state:
+ description: ServerMaintenanceState specifies the current state
+ of the server maintenance.
+ type: string
+ type: object
state:
description: State specifies the current state of the server maintenance.
type: string
diff --git a/config/dev/macdb.yaml b/config/dev/macdb.yaml
index 78ca61e8..07c2128f 100644
--- a/config/dev/macdb.yaml
+++ b/config/dev/macdb.yaml
@@ -1,34 +1,12 @@
macPrefixes:
-- macPrefix: f4
- manufacturer: dell
- protocol: Redfish
- port: 443
+- macPrefix: 23
+ manufacturer: Foo
+ protocol: RedfishKube
+ port: 8000
type: bmc
defaultCredentials:
- - username: automation
- password: Password12
- console:
- type: ssh
- port: 22
-- macPrefix: 2c
- manufacturer: dell
- protocol: Redfish
- port: 443
- type: bmc
- defaultCredentials:
- - username: automation
- password: Password12
+ - username: foo
+ password: bar
console:
type: SSH
port: 22
-- macPrefix: 7C
- manufacturer: hpe
- protocol: Redfish
- port: 443
- type: bmc
- defaultCredentials:
- - username: automation
- password: Password12
- console:
- type: ssh
- port: 22
diff --git a/config/dev/manager_patch.yaml b/config/dev/manager_patch.yaml
index 8133956d..55aff1af 100644
--- a/config/dev/manager_patch.yaml
+++ b/config/dev/manager_patch.yaml
@@ -15,7 +15,6 @@ spec:
- --mac-prefixes-file=/etc/macdb/macdb.yaml
- --probe-image=ghcr.io/ironcore-dev/metalprobe:latest
- --probe-os-image=ghcr.io/ironcore-dev/os-images/gardenlinux:1443.10
- - --insecure=false
- --registry-url=http://127.0.0.1:30000
- --registry-port=30000
- --enforce-first-boot
diff --git a/config/redfish-mockup/redfish_mockup_endpoint.yaml b/config/redfish-mockup/redfish_mockup_endpoint.yaml
index 2219b249..0df99e91 100644
--- a/config/redfish-mockup/redfish_mockup_endpoint.yaml
+++ b/config/redfish-mockup/redfish_mockup_endpoint.yaml
@@ -7,7 +7,7 @@ metadata:
app.kubernetes.io/part-of: metal-operator
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/created-by: metal-operator
- name: node005-ap046
+ name: endpoint-sample
spec:
- ip: "10.246.48.104"
- macAddress: "2c:ea:7f:f3:6d:d2"
+ ip: "127.0.0.1"
+ macAddress: "23:11:8A:33:CF:EA"
diff --git a/config/samples/metal_v1alpha1_biosversion.yaml b/config/samples/metal_v1alpha1_biosversion.yaml
index a5e3de32..46c2cf1e 100644
--- a/config/samples/metal_v1alpha1_biosversion.yaml
+++ b/config/samples/metal_v1alpha1_biosversion.yaml
@@ -6,10 +6,11 @@ metadata:
app.kubernetes.io/managed-by: kustomize
name: biosversion-sample
spec:
- version: "2.22.1"
+ version: "U59 v2.34 (10/04/2024)"
image:
- URI: "https://repo.eu-de-1.cloud.sap/hw-firmware/dell/poweredge-r640/BIOS_M73JJ_WN64_2.22.1.EXE"
+ URI: "https://foo-2.34_10_04_2024.signed.flash"
transferProtocol: "HTTPS"
+ updatePolicy: Normal
serverRef:
- name: node005-ap046-system-0
+ name: endpoint-sample-hpe-system-0
serverMaintenancePolicy: OwnerApproval
diff --git a/config/samples/metal_v1alpha1_servermaintenance.yaml b/config/samples/metal_v1alpha1_servermaintenance.yaml
index 5e440cc1..f19a3b12 100644
--- a/config/samples/metal_v1alpha1_servermaintenance.yaml
+++ b/config/samples/metal_v1alpha1_servermaintenance.yaml
@@ -5,5 +5,17 @@ metadata:
app.kubernetes.io/name: metal-operator
app.kubernetes.io/managed-by: kustomize
name: servermaintenance-sample
+ namespace: default
spec:
- # TODO(user): Add fields here
+ serverRef:
+ name: node005-ap046-system-0
+ policy: metalv1alpha1.ServerMaintenancePolicyEnforced
+ serverPower: metalv1alpha1.PowerOn
+ serverBootConfigurationTemplate:
+ name: "test-boot"
+ spec:
+ ignitionSecretRef:
+ name: ignition-secret
+ serverRef:
+ name: server-system-0
+ image: "ghcr.io//foo/linux:C.X"
diff --git a/internal/controller/bmcsettings_controller.go b/internal/controller/bmcsettings_controller.go
index af12f5ab..8de737f6 100644
--- a/internal/controller/bmcsettings_controller.go
+++ b/internal/controller/bmcsettings_controller.go
@@ -314,10 +314,10 @@ func (r *BMCSettingsReconciler) handleSettingInProgressState(
}
// check if the maintenance is granted
- if ok, failed := r.checkIfMaintenanceGranted(ctx, log, bmcSetting, bmcClient); !ok {
+ if ok, state := r.checkIfMaintenanceGranted(ctx, log, bmcSetting, bmcClient); !ok {
log.V(1).Info("Waiting for maintenance to be granted before continuing with updating settings")
return ctrl.Result{}, err
- } else if failed == metalv1alpha1.ServerMaintenanceStateFailed {
+ } else if state == metalv1alpha1.ServerMaintenanceStateFailed {
err := r.updateBMCSettingsStatus(ctx, log, bmcSetting, metalv1alpha1.BMCSettingsStateFailed)
return ctrl.Result{}, err
}
@@ -525,7 +525,7 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
log.V(1).Error(err, "failed to get referred server's Manitenance", "Server", server.Name, "ServerMaintenanceRef", serverMaintenanceRef)
notInMaintenanceState[server.Name] = ""
}
- // fail imediately if any of the server maintenance request failed, as we can not proceed further
+ // fail immediately if any of the server maintenance request failed, as we can not proceed further
if serverMaintenance.Status.State == metalv1alpha1.ServerMaintenanceStateFailed {
return false, metalv1alpha1.ServerMaintenanceStateFailed
}
diff --git a/internal/controller/bmcversion_controller.go b/internal/controller/bmcversion_controller.go
index 49b6e29f..8f85cb9d 100644
--- a/internal/controller/bmcversion_controller.go
+++ b/internal/controller/bmcversion_controller.go
@@ -243,10 +243,10 @@ func (r *BMCVersionReconciler) ensureBMCVersionStateTransition(
}
// check if the maintenance is granted
- if ok, failed := r.checkIfMaintenanceGranted(ctx, log, bmcClient, bmcVersion); !ok {
+ if ok, state := r.checkIfMaintenanceGranted(ctx, log, bmcClient, bmcVersion); !ok {
log.V(1).Info("Waiting for maintenance to be granted before continuing with updating bmc version")
return ctrl.Result{}, err
- } else if failed == metalv1alpha1.ServerMaintenanceStateFailed {
+ } else if state == metalv1alpha1.ServerMaintenanceStateFailed {
err := r.updateBMCVersionStatus(ctx, log, bmcVersion, metalv1alpha1.BMCVersionStateFailed, nil, nil, nil)
return ctrl.Result{}, err
}
@@ -446,7 +446,7 @@ func (r *BMCVersionReconciler) checkIfMaintenanceGranted(
notInMaintenanceState[server.Name] = ""
}
if serverMaintenance.Status.State == metalv1alpha1.ServerMaintenanceStateFailed {
- // fail imediately if any of the server maintenance request failed, as we can not proceed further
+ // fail immediately if any of the server maintenance request failed, as we can not proceed further
return false, metalv1alpha1.ServerMaintenanceStateFailed
}
// this gives us the waiting time for the server to be prepared for maintenance by ServerMaintenance controller
diff --git a/internal/controller/helper.go b/internal/controller/helper.go
index abcc103b..5d42ca10 100644
--- a/internal/controller/helper.go
+++ b/internal/controller/helper.go
@@ -93,7 +93,7 @@ func rearrangeBootOrder(
}
}
- var newBootOrder []string
+ newBootOrder := make([]string, 0, len(currentOrder))
// Add high priority boot devices first
for _, bootDevice := range highPrioirtyBootDevices {
newBootOrder = append(newBootOrder, bootDevice.BootOptionReference)
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index d0da0614..56445c73 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -111,7 +111,7 @@ func (r *ServerMaintenanceReconciler) ensureServerMaintenanceStateTransition(ctx
return ctrl.Result{}, err
}
- if changed, _, err := r.hasBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
+ if changed, _, err := r.isBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
return ctrl.Result{}, err
} else if changed {
return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
@@ -189,9 +189,9 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
return ctrl.Result{}, nil
}
- config := &metalv1alpha1.ServerBootConfiguration{}
+ var config *metalv1alpha1.ServerBootConfiguration
changed := false
- if changed, config, err = r.hasBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
+ if changed, config, err = r.isBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
return ctrl.Result{}, err
} else if changed || config == nil {
// turn server power off before applying new boot configuration
@@ -338,8 +338,10 @@ func (r *ServerMaintenanceReconciler) delete(ctx context.Context, log logr.Logge
if requeue, err := r.revertMaintenanceBootConfig(ctx, log, server, serverMaintenance); err != nil {
return ctrl.Result{}, err
} else if requeue {
+ log.V(1).Info("Wait for boot order to be reverted on server", "Server", server.Name)
return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil
}
+ log.V(1).Info("Boot order reverted on server", "Server", server.Name)
// make sure the server is powered on before removing maintenance
// this will ensure the server is booted into default boot order
if server.Spec.Power != metalv1alpha1.PowerOn && server.Status.PowerState != metalv1alpha1.ServerOnPowerState {
@@ -445,20 +447,38 @@ func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, lo
switch maintenance.Spec.ServerBootConfigurationTemplate.Boottype {
case metalv1alpha1.BootTypeOneOff:
- // note: with this option, the next server boot up will exter pxe boot.
+ // note: with this option, the next server boot up will enter pxe boot.
// if Maintenance Spec.ServerPower is PowerOn, server will be powered on and will boot into pxe
// if Maintenance Spec.ServerPower is PowerOff, server will be powered off and will boot into pxe on next power on
// we do not need to save the default boot order as the server will boot into default boot order on next boot up
if err := bmcClient.SetPXEBootOnce(ctx, server.Spec.SystemURI); err != nil {
return false, fmt.Errorf("failed to set PXE boot once: %w", err)
}
+ log.V(1).Info("Configured PXE boot once for server",
+ "Server", server.Name)
+ status := &metalv1alpha1.ServerMaintenanceBootOrder{
+ State: metalv1alpha1.BootOrderOneOffPxeBootSuccess,
+ }
+ if err := r.patchBootOrderStatus(ctx, log, maintenance, status); err != nil {
+ return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ // end state, continue with maintenance
+ return false, nil
case metalv1alpha1.BootTypePersistent:
bootOrder, err := bmcClient.GetBootOrder(ctx, server.Spec.SystemURI)
if err != nil {
return false, fmt.Errorf("failed to get boot order: %w", err)
}
- // if the boot order lenght is less than 2, we cannot change pxe as first boot
+ // if the boot order length is less than 2, we cannot change pxe as first boot
if len(bootOrder) < 2 {
+ log.V(1).Info("boot order for server can not be changed as it has less than 2 boot options", "bootOrder", bootOrder)
+ status := &metalv1alpha1.ServerMaintenanceBootOrder{
+ State: metalv1alpha1.BootOrderConfigNoOp,
+ }
+ if err := r.patchBootOrderStatus(ctx, log, maintenance, status); err != nil {
+ return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ // end state, continue with maintenance
return false, nil
}
bootOptions, err := bmcClient.GetBootOptions(ctx, server.Spec.SystemURI)
@@ -482,35 +502,64 @@ func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, lo
bootOrderChanged := rearrangeBootOrder(bootOrder, bootOptions, isPxeBootOption, isDiskBootOption)
- if maintenance.Status.DefaultBootOrder == nil {
+ if reflect.DeepEqual(bootOrder, bootOrderChanged) {
+ // boot order is already set to pxe first, nothing to do
+ log.V(1).Info("boot order for server is set to pxe first",
+ "Server", server.Name,
+ "BootOrder", bootOrder)
+ status := &metalv1alpha1.ServerMaintenanceBootOrder{}
+ if maintenance.Status.BootOrderStatus == nil {
+ status = &metalv1alpha1.ServerMaintenanceBootOrder{
+ State: metalv1alpha1.BootOrderConfigNoOp,
+ }
+ } else {
+ status = maintenance.Status.BootOrderStatus.DeepCopy()
+ status.State = metalv1alpha1.BootOrderConfigSuccess
+ }
+ if err := r.patchBootOrderStatus(ctx, log, maintenance, status); err != nil {
+ return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ // end state, continue with maintenance
+ return false, nil
+ }
+
+ if maintenance.Status.BootOrderStatus == nil {
// note in this option, the server will always boot into pxe until changed again
// the job needs to be completed, else any other BIOS config job triggered will fail as exsisting job is not completed
if err := bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, redfish.Boot{BootOrder: bootOrderChanged}); err != nil {
return false, fmt.Errorf("failed to set PXE boot once: %w", err)
}
+ log.V(1).Info("Configured boot order for server",
+ "Server", server.Name,
+ "BootType", maintenance.Spec.ServerBootConfigurationTemplate.Boottype,
+ "BootOrder", bootOrderChanged)
// save the current boot order to revert back after maintenance
- maintenanceBase := maintenance.DeepCopy()
- maintenance.Status.DefaultBootOrder = bootOrder
- if err := r.Status().Patch(ctx, maintenance, client.MergeFrom(maintenanceBase)); err != nil {
+ status := &metalv1alpha1.ServerMaintenanceBootOrder{
+ DefaultBootOrder: bootOrder,
+ State: metalv1alpha1.BootOrderConfigInProgress,
+ }
+ if err := r.patchBootOrderStatus(ctx, log, maintenance, status); err != nil {
return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
}
- log.V(1).Info("Saved default boot order for server", "Server", server.Name, "BootOrder", bootOrder)
+ log.V(1).Info("Saved default boot order for server to revert post maintenance", "Server", server.Name, "BootOrder", bootOrder)
+ // requeue to give time to the server to actually do the work and verify it
+ return true, nil
}
// we need to complete the job to set the boot order correctly. Turn the server power on
if server.Spec.Power != metalv1alpha1.PowerOn && server.Status.PowerState != metalv1alpha1.ServerOnPowerState {
if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOn); err != nil {
return false, err
}
+ log.V(1).Info("Requested server power On to complete boot order change",
+ "Server", server.Name)
// requeue to give time to the server to power off
return true, nil
}
+ log.V(1).Info("wait for boot order to be set on server", "Server", server.Name)
+ return true, nil
default:
return false, fmt.Errorf("unknown boot type: %s", maintenance.Spec.ServerBootConfigurationTemplate.Boottype)
}
- log.V(1).Info("Configured boot order for server",
- "Server", server.Name,
- "BootType", maintenance.Spec.ServerBootConfigurationTemplate.Boottype)
- return false, nil
}
func (r *ServerMaintenanceReconciler) revertMaintenanceBootConfig(
@@ -520,48 +569,111 @@ func (r *ServerMaintenanceReconciler) revertMaintenanceBootConfig(
serverMaintenance *metalv1alpha1.ServerMaintenance,
) (bool, error) {
// we need to revert the boot order only if it was changed by the maintenance
- if serverMaintenance.Status.DefaultBootOrder == nil {
- return false, nil
- }
- // if the server is not off, we need to turn it off first to change the boot order
- // this will help schedule the job to change the boot order for the server
- // note: if the server is already off, this will be a no-op
- if server.Spec.Power != metalv1alpha1.PowerOff && server.Status.PowerState != metalv1alpha1.ServerOffPowerState {
- if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOff); err != nil {
- return false, err
+ if serverMaintenance.Status.BootOrderStatus != nil && serverMaintenance.Status.BootOrderStatus.DefaultBootOrder != nil {
+ bmcClient, err := bmcutils.GetBMCClientForServer(ctx, r.Client, server, r.Insecure, r.BMCOptions)
+ if err != nil {
+ return false, fmt.Errorf("failed to create BMC client: %w", err)
}
- // requeue to give time to the server to power off
- return true, nil
- }
+ defer bmcClient.Logout()
+ bootOrder, err := bmcClient.GetBootOrder(ctx, server.Spec.SystemURI)
+ if err != nil {
+ return false, fmt.Errorf("failed to get boot order: %w", err)
+ }
+ if serverMaintenance.Status.BootOrderStatus.State == metalv1alpha1.BootOrderConfigSuccess {
+ // if the server is not off, we need to turn it off first to change the boot order
+ // this will help schedule the job to change the boot order for the server
+ // note: if the server is already off, this will be a no-op
+ if server.Spec.Power != metalv1alpha1.PowerOff || server.Status.PowerState != metalv1alpha1.ServerOffPowerState {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOff); err != nil {
+ return false, err
+ }
+ // requeue to give time to the server to power off
+ return true, nil
+ }
- bmcClient, err := bmcutils.GetBMCClientForServer(ctx, r.Client, server, r.Insecure, r.BMCOptions)
- if err != nil {
- return false, fmt.Errorf("failed to create BMC client: %w", err)
- }
- defer bmcClient.Logout()
+ // sanitize the boot order to remove any invalid boot options
+ // this case can happen if the bios settings were changed boot options during maintenance
+ // we remove only the invalid boot options and keep the rest in same order
+ // because the settings will take care of new added boot device and apend it to end
+ currentBootDeviceMap := make(map[string]struct{})
+ for _, bootDevice := range bootOrder {
+ currentBootDeviceMap[bootDevice] = struct{}{}
+ }
+ var sanitizedBootOrder []string
+ for _, bootDevice := range serverMaintenance.Status.BootOrderStatus.DefaultBootOrder {
+ if _, ok := currentBootDeviceMap[bootDevice]; !ok {
+ log.V(1).Info("Removing invalid boot option from default boot order",
+ "Server", server.Name,
+ "BootDevice", bootDevice)
+ continue
+ }
+ sanitizedBootOrder = append(sanitizedBootOrder, bootDevice)
+ }
- err = bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, redfish.Boot{
- BootOrder: serverMaintenance.Status.DefaultBootOrder,
- })
- if err != nil {
- return false, fmt.Errorf("failed to revert boot order: %w", err)
- }
+ err = bmcClient.SetBootOrder(ctx, server.Spec.SystemURI, redfish.Boot{
+ BootOrder: sanitizedBootOrder,
+ })
+ if err != nil {
+ return false, fmt.Errorf("failed to revert boot order: %w", err)
+ }
+ // save the current boot order to revert back after maintenance
+ status := &metalv1alpha1.ServerMaintenanceBootOrder{
+ DefaultBootOrder: serverMaintenance.Status.BootOrderStatus.DefaultBootOrder,
+ State: metalv1alpha1.BootOrderConfigSuccessRevertInProgress,
+ }
+ if err := r.patchBootOrderStatus(ctx, log, serverMaintenance, status); err != nil {
+ return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ log.V(1).Info("Patched revert to default boot order", "Server", server.Name, "BootOrder", nil)
+ // requeue to give time to the server to actually do the work and verify it
+ return true, nil
+ }
- // save the current boot order to revert back after maintenance
- maintenanceBase := serverMaintenance.DeepCopy()
- serverMaintenance.Status.DefaultBootOrder = nil
- if err := r.Status().Patch(ctx, serverMaintenance, client.MergeFrom(maintenanceBase)); err != nil {
- return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
- }
- log.V(1).Info("revert to default boot order", "Server", server.Name, "BootOrder", nil)
+ if serverMaintenance.Status.BootOrderStatus.State == metalv1alpha1.BootOrderConfigSuccessRevertInProgress {
+ if server.Spec.Power != metalv1alpha1.PowerOn || server.Status.PowerState != metalv1alpha1.ServerOnPowerState {
+ if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOn); err != nil {
+ return false, err
+ }
+ // requeue to give time to the server to power on
+ return true, nil
+ }
- if server.Spec.Power != metalv1alpha1.PowerOn && server.Status.PowerState != metalv1alpha1.ServerOnPowerState {
- if err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOn); err != nil {
- return false, err
+ revertCompleted := false
+ if reflect.DeepEqual(bootOrder, serverMaintenance.Status.BootOrderStatus.DefaultBootOrder) {
+ revertCompleted = true
+ } else {
+ // sometimes, changing biosSettings leads to change in number of boot options
+ // hence, we need check the default boot order and ignore extra from current boot order
+ for idx, bootDevice := range serverMaintenance.Status.BootOrderStatus.DefaultBootOrder {
+ // cases where boot option is disabled in bios settings
+ if idx >= len(bootOrder) {
+ break
+ }
+ if bootDevice != bootOrder[idx] {
+ log.V(1).Info("boot order for server is not yet reverted to default. Waiting...",
+ "Server", server.Name,
+ "CurrentBootOrder", bootOrder,
+ "DefaultBootOrder", serverMaintenance.Status.BootOrderStatus.DefaultBootOrder)
+ return true, nil
+ }
+ }
+ revertCompleted = true
+ }
+ if revertCompleted {
+ log.V(1).Info("boot order for server has been reverted to default",
+ "Server", server.Name,
+ "BootOrder", bootOrder)
+ if err := r.patchBootOrderStatus(ctx, log, serverMaintenance, nil); err != nil {
+ return false, fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ // end state, continue with maintenance complete
+ return false, nil
+ }
+ log.V(1).Info("wait for boot order to be reverted on server", "Server", server.Name)
+ return true, nil
}
- // requeue to give time to the server to power off
- return true, nil
}
+
log.V(1).Info("boot override during maintenance is reverted", "Server", serverMaintenance.Name)
return false, nil
}
@@ -589,6 +701,16 @@ func (r *ServerMaintenanceReconciler) removeBootConfigRefFromServer(
return nil
}
+func (r *ServerMaintenanceReconciler) patchBootOrderStatus(ctx context.Context, log logr.Logger, maintenance *metalv1alpha1.ServerMaintenance, bootOrderStatus *metalv1alpha1.ServerMaintenanceBootOrder) error {
+ maintenanceBase := maintenance.DeepCopy()
+ maintenance.Status.BootOrderStatus = bootOrderStatus
+ if err := r.Status().Patch(ctx, maintenance, client.MergeFrom(maintenanceBase)); err != nil {
+ return fmt.Errorf("failed to patch maintenance default boot order: %w", err)
+ }
+ log.V(1).Info("Patched maintenance boot order status", "ServerMaintenance", maintenance.Name, "BootOrderStatus", bootOrderStatus)
+ return nil
+}
+
func (r *ServerMaintenanceReconciler) removeMaintenanceRefFromServer(ctx context.Context, server *metalv1alpha1.Server) error {
serverBase := server.DeepCopy()
server.Spec.ServerMaintenanceRef = nil
@@ -649,7 +771,7 @@ func (r *ServerMaintenanceReconciler) getMaintenanceBootConfigurationRef(
return config, nil
}
-func (r *ServerMaintenanceReconciler) hasBootConfigurationChanged(
+func (r *ServerMaintenanceReconciler) isBootConfigurationChanged(
ctx context.Context,
log logr.Logger,
serverMaintenance *metalv1alpha1.ServerMaintenance,
From 9c6a959b9842876d0721d80cbefcc07fa07f5509 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Wed, 8 Oct 2025 18:34:43 +0200
Subject: [PATCH 08/12] generated docs
---
...metal.ironcore.dev_servermaintenances.yaml | 33 ++-
.../crd/metal.ironcore.dev_servers.yaml | 67 +-----
docs/api-reference/api.md | 225 ++++++++++--------
3 files changed, 158 insertions(+), 167 deletions(-)
diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
index d74bd7b2..841b6522 100755
--- a/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
+++ b/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
@@ -64,10 +64,6 @@ spec:
spec:
description: ServerMaintenanceSpec defines the desired state of a ServerMaintenance
properties:
- maintenanceBootPolicy:
- description: MaintenanceBootPolicy specifies the boot policy to be
- enforced on the server during maintenance.
- type: string
policy:
description: Policy specifies the maintenance policy to be enforced
on the server.
@@ -76,6 +72,20 @@ spec:
description: ServerBootConfigurationTemplate specifies the boot configuration
to be applied to the server during maintenance.
properties:
+ bootType:
+ default: Persistent
+ description: |-
+ BootType specifies the type of boot configuration.
+ It can be either "Oneoff" or "Persistent". If not specified, it defaults to "Persistent".
+ "Persistent" will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
+
+ "Oneoff" will set the configuration to boot once. next boot-up will boot into default boot order.
+ if power state mentioned in spec is "PowerOn", server will be powered on and will boot into the configuration.
+ if power state is "PowerOff", server will be powered off and will boot into the configuration on next power on.
+ enum:
+ - Oneoff
+ - Persistent
+ type: string
name:
description: Name specifies the name of the boot configuration.
type: string
@@ -151,6 +161,21 @@ spec:
status:
description: ServerMaintenanceStatus defines the observed state of a ServerMaintenance
properties:
+ bootOrderStatus:
+ description: BootOrder specifies the boot order of the server before
+ maintenance.
+ properties:
+ defaultBootOrder:
+ description: DefaultBootOrder specifies the default boot order
+ of the server before maintenance.
+ items:
+ type: string
+ type: array
+ state:
+ description: ServerMaintenanceState specifies the current state
+ of the server maintenance.
+ type: string
+ type: object
state:
description: State specifies the current state of the server maintenance.
type: string
diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
index 155f67c5..b5ad500f 100755
--- a/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
+++ b/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
@@ -164,6 +164,8 @@ spec:
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
if no boot configuration is specified.
+ This is used during initial provisioning of the server automatically.
+ This is also used during maintenance if Maintenance needs server to be taken offline
properties:
apiVersion:
description: API version of the referent.
@@ -205,75 +207,10 @@ spec:
type: string
type: object
x-kubernetes-map-type: atomic
- bootOrder:
- description: BootOrder specifies the boot order of the server.
- items:
- description: BootOrder represents the boot order of the server.
- properties:
- device:
- description: Device is the device to boot from.
- type: string
- name:
- description: Name is the name of the boot device.
- type: string
- priority:
- description: Priority is the priority of the boot device.
- type: integer
- required:
- - device
- - name
- - priority
- type: object
- type: array
indicatorLED:
description: IndicatorLED specifies the desired state of the server's
indicator LED.
type: string
- maintenanceBootConfigurationRef:
- description: |-
- MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
- the boot configuration for this server during maintenance. This field is optional and can be omitted
- properties:
- apiVersion:
- description: API version of the referent.
- type: string
- fieldPath:
- description: |-
- If referring to a piece of an object instead of an entire object, this string
- should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
- For example, if the object reference is to a container within a pod, this would take on a value like:
- "spec.containers{name}" (where "name" refers to the name of the container that triggered
- the event) or if no container name is specified "spec.containers[2]" (container with
- index 2 in this pod). This syntax is chosen only to have some well-defined way of
- referencing a part of an object.
- type: string
- kind:
- description: |-
- Kind of the referent.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
- type: string
- name:
- description: |-
- Name of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
- type: string
- namespace:
- description: |-
- Namespace of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
- type: string
- resourceVersion:
- description: |-
- Specific resourceVersion to which this reference is made, if any.
- More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
- type: string
- uid:
- description: |-
- UID of the referent.
- More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
- type: string
- type: object
- x-kubernetes-map-type: atomic
power:
description: Power specifies the desired power state of the server.
type: string
diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md
index b58f76af..3e51df99 100644
--- a/docs/api-reference/api.md
+++ b/docs/api-reference/api.md
@@ -2577,56 +2577,27 @@ ServerMaintenancePolicy
-
+
-(Appears on:ServerSpec)
+(Appears on:ServerBootConfigurationTemplate)
-
BootOrder represents the boot order of the server.
-| Field |
+Value |
Description |
-
-
-
-name
-
-string
-
+ |
"Oneoff" |
+BootTypeOneOff indicates that the server should boot into this configuration one time.
|
-
- Name is the name of the boot device.
- |
-
-
-
-priority
-
-int
-
+ |
"Persistent" |
+BootTypePersistent indicates that the server should boot into this configuration continuously.
|
-
- Priority is the priority of the boot device.
- |
-
-
-
-device
-
-string
-
- |
-
- Device is the device to boot from.
- |
-
-
+
@@ -3525,36 +3496,9 @@ Kubernetes core/v1.ObjectReference
(Optional)
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
-if no boot configuration is specified.
-
-
-
-
-maintenanceBootConfigurationRef
-
-
-Kubernetes core/v1.ObjectReference
-
-
- |
-
-(Optional)
- MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
-the boot configuration for this server during maintenance. This field is optional and can be omitted
- |
-
-
-
-bootOrder
-
-
-[]BootOrder
-
-
- |
-
-(Optional)
- BootOrder specifies the boot order of the server.
+if no boot configuration is specified.
+This is used during initial provisioning of the server automatically.
+This is also used during maintenance if Maintenance needs server to be taken offline
|
@@ -3833,6 +3777,25 @@ string
+bootType
+
+
+BootType
+
+
+ |
+
+(Optional)
+ BootType specifies the type of boot configuration.
+It can be either “Oneoff” or “Persistent”. If not specified, it defaults to “Persistent”.
+“Persistent” will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
+“Oneoff” will set the configuration to boot once. next boot-up will boot into default boot order.
+if power state mentioned in spec is “PowerOn”, server will be powered on and will boot into the configuration.
+if power state is “PowerOff”, server will be powered off and will boot into the configuration on next power on.
+ |
+
+
+
spec
@@ -4248,6 +4211,82 @@ ServerMaintenanceStatus
|
+ServerMaintenanceBootOrder
+
+
+(Appears on:ServerMaintenanceStatus)
+
+
+
+
+
+
+| Field |
+Description |
+
+
+
+
+
+defaultBootOrder
+
+[]string
+
+ |
+
+(Optional)
+ DefaultBootOrder specifies the default boot order of the server before maintenance.
+ |
+
+
+
+state
+
+
+ServerMaintenanceBootOrderState
+
+
+ |
+
+ |
+
+
+
+ServerMaintenanceBootOrderState
+(string alias)
+
+(Appears on:ServerMaintenanceBootOrder)
+
+
+
ServerMaintenanceState specifies the current state of the server maintenance.
+
+
+
+
+| Value |
+Description |
+
+
+"InProgress" |
+BootOrderConfigInProgress specifies that the server bootOrder configuration is InProgress.
+ |
+
"NoOp" |
+BootOrderConfigNoOp specifies that the server bootOrder is as expected.
+ |
+
"RevertCompleted" |
+BootOrderConfigRevertSuccess specifies that the server bootOrder configuration is Completed.
+ |
+
"Completed" |
+BootOrderConfigSuccess specifies that the server bootOrder configuration is Completed.
+ |
+
"RevertInProgress" |
+BootOrderConfigSuccessRevertInProgress specifies that the server bootOrder revert to default is InProgress.
+ |
+
"PxeOneOffBootSuccess" |
+BootOrderOneOffPxeBootSuccess specifies that the server bootOrder configuration is set to boot pxe once.
+ |
+
+
ServerMaintenancePolicy
(string alias)
@@ -4400,6 +4439,9 @@ ServerBootConfigurationTemplate
"Pending" |
ServerMaintenanceStatePending specifies that the server maintenance is pending.
|
+
"PrepareForMaintenance" |
+ServerMaintenanceStatePreapareMaintenance specifies that the server is in maintenance.
+ |
ServerMaintenanceStatus
@@ -4431,6 +4473,20 @@ ServerMaintenanceState
State specifies the current state of the server maintenance.
+
+
+bootOrderStatus
+
+
+ServerMaintenanceBootOrder
+
+
+ |
+
+(Optional)
+ BootOrder specifies the boot order of the server before maintenance.
+ |
+
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
-if no boot configuration is specified.
-
-
-
-
-maintenanceBootConfigurationRef
-
-
-Kubernetes core/v1.ObjectReference
-
-
- |
-
-(Optional)
- MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
-the boot configuration for this server during maintenance. This field is optional and can be omitted
- |
-
-
-
-bootOrder
-
-
-[]BootOrder
-
-
- |
-
-(Optional)
- BootOrder specifies the boot order of the server.
+if no boot configuration is specified.
+This is used during initial provisioning of the server automatically.
+This is also used during maintenance if Maintenance needs server to be taken offline
|
From 2e83a09eeafd6ff2df939aedd2e83ff276833298 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Mon, 13 Oct 2025 09:46:06 +0200
Subject: [PATCH 09/12] update variable name as review comments suggested
---
api/v1alpha1/constants.go | 4 ++--
api/v1alpha1/servermaintenance_types.go | 14 ++++++++------
.../metal.ironcore.dev_servermaintenances.yaml | 12 +++++++-----
.../crd/metal.ironcore.dev_servermaintenances.yaml | 12 +++++++-----
docs/api-reference/api.md | 10 ++++++----
.../controller/servermaintenance_controller.go | 8 ++++----
6 files changed, 34 insertions(+), 26 deletions(-)
diff --git a/api/v1alpha1/constants.go b/api/v1alpha1/constants.go
index 882e3d4f..2b8f6cdf 100644
--- a/api/v1alpha1/constants.go
+++ b/api/v1alpha1/constants.go
@@ -27,8 +27,8 @@ const (
// OperationSkipBootConfiguration indicates which boot configuration operation should be skipped.
OperationSkipBootConfiguration = "metal.ironcore.dev/operation-skip-boot-configuration"
- // OperationBootConfigurationSkip skips the boot configuration if set to this value.
- OperationBootConfigurationSkip = "skip"
+ // SkipBootConfiguration skips the boot configuration if set to this value.
+ SkipBootConfiguration = "skip"
// OperationAnnotationForceReset forces a reset before next operation
OperationAnnotationForceReset = "ForceReset"
)
diff --git a/api/v1alpha1/servermaintenance_types.go b/api/v1alpha1/servermaintenance_types.go
index 9f450d3e..8570b4e2 100644
--- a/api/v1alpha1/servermaintenance_types.go
+++ b/api/v1alpha1/servermaintenance_types.go
@@ -24,17 +24,17 @@ type ServerBootConfigurationTemplate struct {
Name string `json:"name"`
// BootType specifies the type of boot configuration.
- // It can be either "Oneoff" or "Persistent". If not specified, it defaults to "Persistent".
+ // It can be either "OneOff" or "Persistent". If not specified, it defaults to "Persistent".
// "Persistent" will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
//
- // "Oneoff" will set the configuration to boot once. next boot-up will boot into default boot order.
+ // "OneOff" will set the configuration to boot once. next boot-up will boot into default boot order.
// if power state mentioned in spec is "PowerOn", server will be powered on and will boot into the configuration.
// if power state is "PowerOff", server will be powered off and will boot into the configuration on next power on.
//
- // +kubebuilder:validation:Enum=Oneoff;Persistent
+ // +kubebuilder:validation:Enum=OneOff;Persistent
// +optional
// +kubebuilder:default=Persistent
- Boottype BootType `json:"bootType,omitempty"`
+ BootType BootType `json:"bootType,omitempty"`
// Parameters specify the parameters to be used for rendering the boot configuration.
// +required
@@ -45,7 +45,7 @@ type BootType string
const (
// BootTypeOneOff indicates that the server should boot into this configuration one time.
- BootTypeOneOff BootType = "Oneoff"
+ BootTypeOneOff BootType = "OneOff"
// BootTypePersistent indicates that the server should boot into this configuration continuously.
BootTypePersistent BootType = "Persistent"
)
@@ -84,7 +84,9 @@ type ServerMaintenanceStatus struct {
// State specifies the current state of the server maintenance.
State ServerMaintenanceState `json:"state,omitempty"`
- // BootOrder specifies the boot order of the server before maintenance.
+ // BootOrderStatus specifies the boot order of the server before maintenance.
+ // is used to store the boot order of server before it is changed for maintenance.
+ // used only with BootType Persistent.
// +optional
BootOrderStatus *ServerMaintenanceBootOrder `json:"bootOrderStatus,omitempty"`
}
diff --git a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
index 6b0120aa..38443b29 100644
--- a/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servermaintenances.yaml
@@ -70,14 +70,14 @@ spec:
default: Persistent
description: |-
BootType specifies the type of boot configuration.
- It can be either "Oneoff" or "Persistent". If not specified, it defaults to "Persistent".
+ It can be either "OneOff" or "Persistent". If not specified, it defaults to "Persistent".
"Persistent" will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
- "Oneoff" will set the configuration to boot once. next boot-up will boot into default boot order.
+ "OneOff" will set the configuration to boot once. next boot-up will boot into default boot order.
if power state mentioned in spec is "PowerOn", server will be powered on and will boot into the configuration.
if power state is "PowerOff", server will be powered off and will boot into the configuration on next power on.
enum:
- - Oneoff
+ - OneOff
- Persistent
type: string
name:
@@ -156,8 +156,10 @@ spec:
description: ServerMaintenanceStatus defines the observed state of a ServerMaintenance
properties:
bootOrderStatus:
- description: BootOrder specifies the boot order of the server before
- maintenance.
+ description: |-
+ BootOrderStatus specifies the boot order of the server before maintenance.
+ is used to store the boot order of server before it is changed for maintenance.
+ used only with BootType Persistent.
properties:
defaultBootOrder:
description: DefaultBootOrder specifies the default boot order
diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
index 841b6522..1fcb029a 100755
--- a/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
+++ b/dist/chart/templates/crd/metal.ironcore.dev_servermaintenances.yaml
@@ -76,14 +76,14 @@ spec:
default: Persistent
description: |-
BootType specifies the type of boot configuration.
- It can be either "Oneoff" or "Persistent". If not specified, it defaults to "Persistent".
+ It can be either "OneOff" or "Persistent". If not specified, it defaults to "Persistent".
"Persistent" will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
- "Oneoff" will set the configuration to boot once. next boot-up will boot into default boot order.
+ "OneOff" will set the configuration to boot once. next boot-up will boot into default boot order.
if power state mentioned in spec is "PowerOn", server will be powered on and will boot into the configuration.
if power state is "PowerOff", server will be powered off and will boot into the configuration on next power on.
enum:
- - Oneoff
+ - OneOff
- Persistent
type: string
name:
@@ -162,8 +162,10 @@ spec:
description: ServerMaintenanceStatus defines the observed state of a ServerMaintenance
properties:
bootOrderStatus:
- description: BootOrder specifies the boot order of the server before
- maintenance.
+ description: |-
+ BootOrderStatus specifies the boot order of the server before maintenance.
+ is used to store the boot order of server before it is changed for maintenance.
+ used only with BootType Persistent.
properties:
defaultBootOrder:
description: DefaultBootOrder specifies the default boot order
diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md
index 3e51df99..7f228856 100644
--- a/docs/api-reference/api.md
+++ b/docs/api-reference/api.md
@@ -2591,7 +2591,7 @@ ServerMaintenancePolicy
| Description |
-"Oneoff" |
+
"OneOff" |
BootTypeOneOff indicates that the server should boot into this configuration one time.
|
"Persistent" |
@@ -3787,9 +3787,9 @@ BootType
(Optional)
BootType specifies the type of boot configuration.
-It can be either “Oneoff” or “Persistent”. If not specified, it defaults to “Persistent”.
+It can be either “OneOff” or “Persistent”. If not specified, it defaults to “Persistent”.
“Persistent” will set the configuration reboot the server before setting the server to the desired power state mentioned in spec.
-“Oneoff” will set the configuration to boot once. next boot-up will boot into default boot order.
+ “OneOff” will set the configuration to boot once. next boot-up will boot into default boot order.
if power state mentioned in spec is “PowerOn”, server will be powered on and will boot into the configuration.
if power state is “PowerOff”, server will be powered off and will boot into the configuration on next power on.
|
@@ -4484,7 +4484,9 @@ ServerMaintenanceBootOrder
(Optional)
- BootOrder specifies the boot order of the server before maintenance.
+BootOrderStatus specifies the boot order of the server before maintenance.
+is used to store the boot order of server before it is changed for maintenance.
+used only with BootType Persistent.
|
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index 56445c73..9d2fd409 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -215,7 +215,7 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
// implication of skipping is that the server will still continue to run the OS during Maintenance
// might be subjected to reboots
val, found := serverMaintenance.GetAnnotations()[metalv1alpha1.OperationSkipBootConfiguration]
- if config == nil && (!found || val != metalv1alpha1.OperationBootConfigurationSkip) {
+ if config == nil && (!found || val != metalv1alpha1.SkipBootConfiguration) {
log.V(1).Info("No ServerBootConfigurationTemplate boot configuration", "Server", server.Name)
_, err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed)
return ctrl.Result{}, err
@@ -445,7 +445,7 @@ func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, lo
}
defer bmcClient.Logout()
- switch maintenance.Spec.ServerBootConfigurationTemplate.Boottype {
+ switch maintenance.Spec.ServerBootConfigurationTemplate.BootType {
case metalv1alpha1.BootTypeOneOff:
// note: with this option, the next server boot up will enter pxe boot.
// if Maintenance Spec.ServerPower is PowerOn, server will be powered on and will boot into pxe
@@ -531,7 +531,7 @@ func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, lo
}
log.V(1).Info("Configured boot order for server",
"Server", server.Name,
- "BootType", maintenance.Spec.ServerBootConfigurationTemplate.Boottype,
+ "BootType", maintenance.Spec.ServerBootConfigurationTemplate.BootType,
"BootOrder", bootOrderChanged)
// save the current boot order to revert back after maintenance
status := &metalv1alpha1.ServerMaintenanceBootOrder{
@@ -558,7 +558,7 @@ func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, lo
log.V(1).Info("wait for boot order to be set on server", "Server", server.Name)
return true, nil
default:
- return false, fmt.Errorf("unknown boot type: %s", maintenance.Spec.ServerBootConfigurationTemplate.Boottype)
+ return false, fmt.Errorf("unknown boot type: %s", maintenance.Spec.ServerBootConfigurationTemplate.BootType)
}
}
From 2082a829416527af8bcbfe19d1ff5ffff25d585c Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Wed, 22 Oct 2025 10:16:34 +0200
Subject: [PATCH 10/12] make sure all bootConfigs are marked ready
---
internal/controller/bmcversionset_controller_test.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/internal/controller/bmcversionset_controller_test.go b/internal/controller/bmcversionset_controller_test.go
index 45fa89ed..298f105e 100644
--- a/internal/controller/bmcversionset_controller_test.go
+++ b/internal/controller/bmcversionset_controller_test.go
@@ -172,7 +172,7 @@ var _ = Describe("BMCVersionSet Controller", func() {
By("Ensuring that the BootConfig resource has been created/ marked ready")
var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
- Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", Not(BeNil())))
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", HaveLen(2)))
for _, serverMaintaince := range serverMaintenanceList.Items {
if metav1.IsControlledBy(&serverMaintaince, bmcVersion02) || metav1.IsControlledBy(&serverMaintaince, bmcVersion03) {
By(fmt.Sprintf("Marking the maintenance %v ", serverMaintaince.Name))
@@ -274,7 +274,7 @@ var _ = Describe("BMCVersionSet Controller", func() {
By("Ensuring that the BootConfig resource has been created/ marked ready")
var serverMaintenanceList metalv1alpha1.ServerMaintenanceList
- Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", Not(BeNil())))
+ Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", HaveLen(2)))
for _, serverMaintaince := range serverMaintenanceList.Items {
if metav1.IsControlledBy(&serverMaintaince, bmcVersion02) || metav1.IsControlledBy(&serverMaintaince, bmcVersion03) {
By(fmt.Sprintf("Marking the maintenance %v ", serverMaintaince.Name))
From 7fa5d651d9d9ed8e285c34eea4361df73c801707 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Wed, 22 Oct 2025 10:26:46 +0200
Subject: [PATCH 11/12] Deprecate the BootOrder field in server resource
---
api/v1alpha1/server_types.go | 28 ++++-
api/v1alpha1/zz_generated.deepcopy.go | 25 ++++
.../crd/bases/metal.ironcore.dev_servers.yaml | 69 +++++++++-
.../crd/metal.ironcore.dev_servers.yaml | 69 +++++++++-
docs/api-reference/api.md | 119 +++++++++++++++++-
internal/controller/server_controller.go | 8 ++
6 files changed, 306 insertions(+), 12 deletions(-)
diff --git a/api/v1alpha1/server_types.go b/api/v1alpha1/server_types.go
index 2c9401db..ddf13c5f 100644
--- a/api/v1alpha1/server_types.go
+++ b/api/v1alpha1/server_types.go
@@ -18,6 +18,12 @@ const (
// PowerOff indicates that the device is powered off.
PowerOff Power = "Off"
+
+ // TopologyHeightUnit is the annotation key for the height unit of a server in a rack.
+ TopologyHeightUnit = "topology.metal.ironcore.dev/heightunit"
+
+ // TopologyRack is the annotation key for the rack of a server.
+ TopologyRack = "topology.metal.ironcore.dev/rack"
)
// ServerPowerState defines the possible power states for a server.
@@ -59,6 +65,16 @@ type BMCAccess struct {
BMCSecretRef v1.LocalObjectReference `json:"bmcSecretRef"`
}
+// BootOrder represents the boot order of the server.
+type BootOrder struct {
+ // Name is the name of the boot device.
+ Name string `json:"name"`
+ // Priority is the priority of the boot device.
+ Priority int `json:"priority"`
+ // Device is the device to boot from.
+ Device string `json:"device"`
+}
+
// ServerSpec defines the desired state of a Server.
type ServerSpec struct {
// UUID is the unique identifier for the server.
@@ -105,11 +121,19 @@ type ServerSpec struct {
// BootConfigurationRef is a reference to a BootConfiguration object that specifies
// the boot configuration for this server. This field is optional and can be omitted
// if no boot configuration is specified.
- // This is used during initial provisioning of the server automatically.
- // This is also used during maintenance if Maintenance needs server to be taken offline
// +optional
BootConfigurationRef *v1.ObjectReference `json:"bootConfigurationRef,omitempty"`
+ // MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
+ // the boot configuration for this server during maintenance. This field is optional and can be omitted
+ // +optional
+ MaintenanceBootConfigurationRef *v1.ObjectReference `json:"maintenanceBootConfigurationRef,omitempty"`
+
+ // BootOrder specifies the boot order of the server.
+ // Deprecated: currently not supported.
+ // +optional
+ BootOrder []BootOrder `json:"bootOrder,omitempty"`
+
// BIOSSettingsRef is a reference to a biossettings object that specifies
// the BIOS configuration for this server.
// +optional
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index bafeae7b..09d61e41 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -1053,6 +1053,21 @@ func (in *BMCVersionTemplate) DeepCopy() *BMCVersionTemplate {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *BootOrder) DeepCopyInto(out *BootOrder) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BootOrder.
+func (in *BootOrder) DeepCopy() *BootOrder {
+ if in == nil {
+ return nil
+ }
+ out := new(BootOrder)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ConsoleProtocol) DeepCopyInto(out *ConsoleProtocol) {
*out = *in
@@ -1686,6 +1701,16 @@ func (in *ServerSpec) DeepCopyInto(out *ServerSpec) {
*out = new(v1.ObjectReference)
**out = **in
}
+ if in.MaintenanceBootConfigurationRef != nil {
+ in, out := &in.MaintenanceBootConfigurationRef, &out.MaintenanceBootConfigurationRef
+ *out = new(v1.ObjectReference)
+ **out = **in
+ }
+ if in.BootOrder != nil {
+ in, out := &in.BootOrder, &out.BootOrder
+ *out = make([]BootOrder, len(*in))
+ copy(*out, *in)
+ }
if in.BIOSSettingsRef != nil {
in, out := &in.BIOSSettingsRef, &out.BIOSSettingsRef
*out = new(v1.LocalObjectReference)
diff --git a/config/crd/bases/metal.ironcore.dev_servers.yaml b/config/crd/bases/metal.ironcore.dev_servers.yaml
index 27f8bb6a..4a004a6e 100644
--- a/config/crd/bases/metal.ironcore.dev_servers.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servers.yaml
@@ -158,8 +158,6 @@ spec:
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
if no boot configuration is specified.
- This is used during initial provisioning of the server automatically.
- This is also used during maintenance if Maintenance needs server to be taken offline
properties:
apiVersion:
description: API version of the referent.
@@ -201,10 +199,77 @@ spec:
type: string
type: object
x-kubernetes-map-type: atomic
+ bootOrder:
+ description: |-
+ BootOrder specifies the boot order of the server.
+ Deprecated: currently not supported.
+ items:
+ description: BootOrder represents the boot order of the server.
+ properties:
+ device:
+ description: Device is the device to boot from.
+ type: string
+ name:
+ description: Name is the name of the boot device.
+ type: string
+ priority:
+ description: Priority is the priority of the boot device.
+ type: integer
+ required:
+ - device
+ - name
+ - priority
+ type: object
+ type: array
indicatorLED:
description: IndicatorLED specifies the desired state of the server's
indicator LED.
type: string
+ maintenanceBootConfigurationRef:
+ description: |-
+ MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
+ the boot configuration for this server during maintenance. This field is optional and can be omitted
+ properties:
+ apiVersion:
+ description: API version of the referent.
+ type: string
+ fieldPath:
+ description: |-
+ If referring to a piece of an object instead of an entire object, this string
+ should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+ For example, if the object reference is to a container within a pod, this would take on a value like:
+ "spec.containers{name}" (where "name" refers to the name of the container that triggered
+ the event) or if no container name is specified "spec.containers[2]" (container with
+ index 2 in this pod). This syntax is chosen only to have some well-defined way of
+ referencing a part of an object.
+ type: string
+ kind:
+ description: |-
+ Kind of the referent.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ name:
+ description: |-
+ Name of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ namespace:
+ description: |-
+ Namespace of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+ type: string
+ resourceVersion:
+ description: |-
+ Specific resourceVersion to which this reference is made, if any.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+ type: string
+ uid:
+ description: |-
+ UID of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
power:
description: Power specifies the desired power state of the server.
type: string
diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
index b5ad500f..1991de31 100755
--- a/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
+++ b/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
@@ -164,8 +164,6 @@ spec:
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
if no boot configuration is specified.
- This is used during initial provisioning of the server automatically.
- This is also used during maintenance if Maintenance needs server to be taken offline
properties:
apiVersion:
description: API version of the referent.
@@ -207,10 +205,77 @@ spec:
type: string
type: object
x-kubernetes-map-type: atomic
+ bootOrder:
+ description: |-
+ BootOrder specifies the boot order of the server.
+ Deprecated: currently not supported.
+ items:
+ description: BootOrder represents the boot order of the server.
+ properties:
+ device:
+ description: Device is the device to boot from.
+ type: string
+ name:
+ description: Name is the name of the boot device.
+ type: string
+ priority:
+ description: Priority is the priority of the boot device.
+ type: integer
+ required:
+ - device
+ - name
+ - priority
+ type: object
+ type: array
indicatorLED:
description: IndicatorLED specifies the desired state of the server's
indicator LED.
type: string
+ maintenanceBootConfigurationRef:
+ description: |-
+ MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
+ the boot configuration for this server during maintenance. This field is optional and can be omitted
+ properties:
+ apiVersion:
+ description: API version of the referent.
+ type: string
+ fieldPath:
+ description: |-
+ If referring to a piece of an object instead of an entire object, this string
+ should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2].
+ For example, if the object reference is to a container within a pod, this would take on a value like:
+ "spec.containers{name}" (where "name" refers to the name of the container that triggered
+ the event) or if no container name is specified "spec.containers[2]" (container with
+ index 2 in this pod). This syntax is chosen only to have some well-defined way of
+ referencing a part of an object.
+ type: string
+ kind:
+ description: |-
+ Kind of the referent.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ name:
+ description: |-
+ Name of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ namespace:
+ description: |-
+ Namespace of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/
+ type: string
+ resourceVersion:
+ description: |-
+ Specific resourceVersion to which this reference is made, if any.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency
+ type: string
+ uid:
+ description: |-
+ UID of the referent.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
power:
description: Power specifies the desired power state of the server.
type: string
diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md
index 7f228856..20382b9c 100644
--- a/docs/api-reference/api.md
+++ b/docs/api-reference/api.md
@@ -2577,6 +2577,57 @@ ServerMaintenancePolicy
+
+
+(Appears on:ServerSpec)
+
+
+
BootOrder represents the boot order of the server.
+
+
+
+
+| Field |
+Description |
+
+
+
+
+
+name
+
+string
+
+ |
+
+ Name is the name of the boot device.
+ |
+
+
+
+priority
+
+int
+
+ |
+
+ Priority is the priority of the boot device.
+ |
+
+
+
+device
+
+string
+
+ |
+
+ Device is the device to boot from.
+ |
+
+
+
@@ -3496,9 +3547,37 @@ Kubernetes core/v1.ObjectReference
(Optional)
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
-if no boot configuration is specified.
-This is used during initial provisioning of the server automatically.
-This is also used during maintenance if Maintenance needs server to be taken offline
+if no boot configuration is specified.
+
+
+
+
+maintenanceBootConfigurationRef
+
+
+Kubernetes core/v1.ObjectReference
+
+
+ |
+
+(Optional)
+ MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
+the boot configuration for this server during maintenance. This field is optional and can be omitted
+ |
+
+
+
+bootOrder
+
+
+[]BootOrder
+
+
+ |
+
+(Optional)
+ BootOrder specifies the boot order of the server.
+Deprecated: currently not supported.
|
@@ -4677,9 +4756,37 @@ Kubernetes core/v1.ObjectReference
(Optional)
BootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server. This field is optional and can be omitted
-if no boot configuration is specified.
-This is used during initial provisioning of the server automatically.
-This is also used during maintenance if Maintenance needs server to be taken offline
+if no boot configuration is specified.
+
+
+
+
+maintenanceBootConfigurationRef
+
+
+Kubernetes core/v1.ObjectReference
+
+
+ |
+
+(Optional)
+ MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
+the boot configuration for this server during maintenance. This field is optional and can be omitted
+ |
+
+
+
+bootOrder
+
+
+[]BootOrder
+
+
+ |
+
+(Optional)
+ BootOrder specifies the boot order of the server.
+Deprecated: currently not supported.
|
diff --git a/internal/controller/server_controller.go b/internal/controller/server_controller.go
index 774bd578..68f8b02d 100644
--- a/internal/controller/server_controller.go
+++ b/internal/controller/server_controller.go
@@ -198,6 +198,14 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve
return ctrl.Result{}, err
}
+ // temp until we remove the unused field: handle deprecated BootOrder field
+ if len(server.Spec.BootOrder) > 0 {
+ serverBase := server.DeepCopy()
+ server.Spec.BootOrder = nil
+ err := r.Patch(ctx, server, client.MergeFrom(serverBase))
+ return ctrl.Result{}, err
+ }
+
if modified, err := r.handleAnnotionOperations(ctx, log, bmcClient, server); err != nil || modified {
return ctrl.Result{}, err
}
From e972a07e5c7a2870fc1c3704a0fbe9c9ca733964 Mon Sep 17 00:00:00 2001
From: Nagadeesh Nagaraja
Date: Mon, 27 Oct 2025 15:23:20 +0100
Subject: [PATCH 12/12] update logic to based on review comments
---
api/v1alpha1/constants.go | 4 +-
api/v1alpha1/servermaintenance_types.go | 4 +-
docs/api-reference/api.md | 4 +-
.../controller/biossettings_controller.go | 29 +++----
internal/controller/bmcsettings_controller.go | 30 ++++---
internal/controller/bmcversion_controller.go | 30 +++----
.../servermaintenance_controller.go | 78 +++++++++++--------
.../servermaintenance_controller_test.go | 6 +-
8 files changed, 102 insertions(+), 83 deletions(-)
diff --git a/api/v1alpha1/constants.go b/api/v1alpha1/constants.go
index 2b8f6cdf..e26f4246 100644
--- a/api/v1alpha1/constants.go
+++ b/api/v1alpha1/constants.go
@@ -25,10 +25,8 @@ const (
// OperationAnnotationForceUpdateInProgress allows update of a resource even if it is in progress.
OperationAnnotationForceUpdateInProgress = "ForceUpdateInProgress"
- // OperationSkipBootConfiguration indicates which boot configuration operation should be skipped.
- OperationSkipBootConfiguration = "metal.ironcore.dev/operation-skip-boot-configuration"
// SkipBootConfiguration skips the boot configuration if set to this value.
- SkipBootConfiguration = "skip"
+ SkipBootConfiguration = "skip-boot-configuration-enforement"
// OperationAnnotationForceReset forces a reset before next operation
OperationAnnotationForceReset = "ForceReset"
)
diff --git a/api/v1alpha1/servermaintenance_types.go b/api/v1alpha1/servermaintenance_types.go
index 8570b4e2..df094062 100644
--- a/api/v1alpha1/servermaintenance_types.go
+++ b/api/v1alpha1/servermaintenance_types.go
@@ -131,8 +131,8 @@ const (
ServerMaintenanceStatePending ServerMaintenanceState = "Pending"
// ServerMaintenanceStateInMaintenance specifies that the server is in maintenance.
ServerMaintenanceStateInMaintenance ServerMaintenanceState = "InMaintenance"
- // ServerMaintenanceStatePreapareMaintenance specifies that the server is in maintenance.
- ServerMaintenanceStatePreapareMaintenance ServerMaintenanceState = "PrepareForMaintenance"
+ // ServerMaintenanceStatePreparing specifies that the server is in maintenance.
+ ServerMaintenanceStatePreparing ServerMaintenanceState = "Preparing"
// ServerMaintenanceStateFailed specifies that the server maintenance has failed.
ServerMaintenanceStateFailed ServerMaintenanceState = "Failed"
)
diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md
index 8aed1ce9..1284f4dc 100644
--- a/docs/api-reference/api.md
+++ b/docs/api-reference/api.md
@@ -4518,8 +4518,8 @@ ServerBootConfigurationTemplate
"Pending" |
ServerMaintenanceStatePending specifies that the server maintenance is pending.
|
-
"PrepareForMaintenance" |
-ServerMaintenanceStatePreapareMaintenance specifies that the server is in maintenance.
+ |
"Preparing" |
+ServerMaintenanceStatePreparing specifies that the server is in maintenance.
|
diff --git a/internal/controller/biossettings_controller.go b/internal/controller/biossettings_controller.go
index fcd0dcbf..790d07cd 100644
--- a/internal/controller/biossettings_controller.go
+++ b/internal/controller/biossettings_controller.go
@@ -461,13 +461,14 @@ func (r *BiosSettingsReconciler) handleSettingInProgressState(
}
// check if the maintenance is granted
- if ok, state := r.checkIfMaintenanceGranted(ctx, log, biosSettings, server); !ok {
+ state := r.getMaintenanceStateIfApproved(ctx, log, biosSettings, server)
+ if state == metalv1alpha1.ServerMaintenanceStateFailed {
+ log.V(1).Info("server maintenance request failed. Please check the ServerMaintenance object")
+ err := r.updateBiosSettingsStatus(ctx, log, biosSettings, metalv1alpha1.BIOSSettingsStateFailed, nil)
+ return ctrl.Result{}, err
+ }
+ if state != metalv1alpha1.ServerMaintenanceStateInMaintenance {
log.V(1).Info("Waiting for maintenance to be granted before continuing with updating settings")
- if state == metalv1alpha1.ServerMaintenanceStateFailed {
- log.V(1).Info("server maintenance request failed. Please check the ServerMaintenance object")
- err := r.updateBiosSettingsStatus(ctx, log, biosSettings, metalv1alpha1.BIOSSettingsStateFailed, nil)
- return ctrl.Result{}, err
- }
return ctrl.Result{}, nil
}
@@ -1148,43 +1149,43 @@ func (r *BiosSettingsReconciler) checkForRequiredPowerStatus(
return server.Status.PowerState == powerState
}
-func (r *BiosSettingsReconciler) checkIfMaintenanceGranted(
+func (r *BiosSettingsReconciler) getMaintenanceStateIfApproved(
ctx context.Context,
log logr.Logger,
biosSettings *metalv1alpha1.BIOSSettings,
server *metalv1alpha1.Server,
-) (bool, metalv1alpha1.ServerMaintenanceState) {
+) metalv1alpha1.ServerMaintenanceState {
if biosSettings.Spec.ServerMaintenanceRef == nil {
- return true, ""
+ return ""
}
if server.Status.State == metalv1alpha1.ServerStateMaintenance {
serverMaintenence, err := r.getReferredServerMaintenance(ctx, log, biosSettings.Spec.ServerMaintenanceRef)
if err != nil {
log.V(1).Info("Failed to get referred ServerMaintenance", "error", err, "serverMaintenanceRef", biosSettings.Spec.ServerMaintenanceRef)
- return false, ""
+ return ""
}
if serverMaintenence.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
log.V(1).Info("ServerMaintenance is not in maintenance. waiting...",
"serverMaintenance State", serverMaintenence.Status.State,
"serverMaintenance", serverMaintenence.Name)
- return false, serverMaintenence.Status.State
+ return serverMaintenence.Status.State
}
if server.Spec.ServerMaintenanceRef == nil || server.Spec.ServerMaintenanceRef.UID != biosSettings.Spec.ServerMaintenanceRef.UID {
// server in maintenance for other tasks. or
// server maintenance ref is wrong in either server or biosSettings
// wait for update on the server obj
log.V(1).Info("Server is already in maintenance for other tasks", "Server", server.Name, "serverMaintenanceRef", server.Spec.ServerMaintenanceRef)
- return false, serverMaintenence.Status.State
+ return serverMaintenence.Status.State
}
} else {
// we still need to wait for server to enter maintenance
// wait for update on the server obj
log.V(1).Info("Server not yet in maintenance", "Server", server.Name, "State", server.Status.State, "MaintenanceRef", server.Spec.ServerMaintenanceRef)
- return false, ""
+ return ""
}
- return true, metalv1alpha1.ServerMaintenanceStateInMaintenance
+ return metalv1alpha1.ServerMaintenanceStateInMaintenance
}
func (r *BiosSettingsReconciler) requestMaintenanceOnServer(
diff --git a/internal/controller/bmcsettings_controller.go b/internal/controller/bmcsettings_controller.go
index 8de737f6..4c3eda0a 100644
--- a/internal/controller/bmcsettings_controller.go
+++ b/internal/controller/bmcsettings_controller.go
@@ -314,13 +314,15 @@ func (r *BMCSettingsReconciler) handleSettingInProgressState(
}
// check if the maintenance is granted
- if ok, state := r.checkIfMaintenanceGranted(ctx, log, bmcSetting, bmcClient); !ok {
- log.V(1).Info("Waiting for maintenance to be granted before continuing with updating settings")
- return ctrl.Result{}, err
- } else if state == metalv1alpha1.ServerMaintenanceStateFailed {
+ state := r.getMaintenanceStateIfApproved(ctx, log, bmcSetting, bmcClient)
+ if state == metalv1alpha1.ServerMaintenanceStateFailed {
err := r.updateBMCSettingsStatus(ctx, log, bmcSetting, metalv1alpha1.BMCSettingsStateFailed)
return ctrl.Result{}, err
}
+ if state != metalv1alpha1.ServerMaintenanceStateInMaintenance {
+ log.V(1).Info("Waiting for maintenance to be granted before continuing with updating settings")
+ return ctrl.Result{}, err
+ }
return r.updateSettingsAndVerify(ctx, log, bmcSetting, BMC, settingsDiff, bmcClient)
}
@@ -494,25 +496,25 @@ func (r *BMCSettingsReconciler) getBMCVersionAndSettingsDifference(
return currentBMCVersion, diff, nil
}
-func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
+func (r *BMCSettingsReconciler) getMaintenanceStateIfApproved(
ctx context.Context,
log logr.Logger,
bmcSetting *metalv1alpha1.BMCSettings,
bmcClient bmc.BMC,
-) (bool, metalv1alpha1.ServerMaintenanceState) {
+) metalv1alpha1.ServerMaintenanceState {
if bmcSetting.Spec.ServerMaintenanceRefs == nil {
- return false, ""
+ return ""
}
servers, err := r.getServers(ctx, log, bmcSetting, bmcClient)
if err != nil {
log.V(1).Error(err, "Failed to get ref. servers to determine maintenance state ")
- return false, ""
+ return ""
}
if len(bmcSetting.Spec.ServerMaintenanceRefs) != len(servers) {
log.V(1).Info("Not all servers have Maintenance", "ServerMaintenanceRefs", bmcSetting.Spec.ServerMaintenanceRefs, "Servers", servers)
- return false, ""
+ return ""
}
notInMaintenanceState := make(map[string]metalv1alpha1.ServerMaintenanceState, len(servers))
@@ -527,7 +529,9 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
}
// fail immediately if any of the server maintenance request failed, as we can not proceed further
if serverMaintenance.Status.State == metalv1alpha1.ServerMaintenanceStateFailed {
- return false, metalv1alpha1.ServerMaintenanceStateFailed
+ // atleast one server maintenance failed
+ log.V(1).Info("ServerMaintenance request failed", "Server", server.Name, "ServerMaintenance", serverMaintenance.Name)
+ return metalv1alpha1.ServerMaintenanceStateFailed
}
// this gives us the waiting time for the server to be prepared for maintenance by ServerMaintenance controller
if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
@@ -557,10 +561,12 @@ func (r *BMCSettingsReconciler) checkIfMaintenanceGranted(
log.V(1).Info("Some servers not yet in maintenance",
"Required maintenances on servers", bmcSetting.Spec.ServerMaintenanceRefs,
"Servers not in maintence", notInMaintenanceState)
- return false, metalv1alpha1.ServerMaintenanceStatePreapareMaintenance
+ // some servers are not yet in maintenance
+ return metalv1alpha1.ServerMaintenanceStatePending
}
- return true, metalv1alpha1.ServerMaintenanceStateInMaintenance
+ // all servers are in maintenance
+ return metalv1alpha1.ServerMaintenanceStateInMaintenance
}
func (r *BMCSettingsReconciler) requestMaintenanceOnServers(
diff --git a/internal/controller/bmcversion_controller.go b/internal/controller/bmcversion_controller.go
index 8f85cb9d..1681cd27 100644
--- a/internal/controller/bmcversion_controller.go
+++ b/internal/controller/bmcversion_controller.go
@@ -243,13 +243,15 @@ func (r *BMCVersionReconciler) ensureBMCVersionStateTransition(
}
// check if the maintenance is granted
- if ok, state := r.checkIfMaintenanceGranted(ctx, log, bmcClient, bmcVersion); !ok {
- log.V(1).Info("Waiting for maintenance to be granted before continuing with updating bmc version")
- return ctrl.Result{}, err
- } else if state == metalv1alpha1.ServerMaintenanceStateFailed {
+ state := r.getMaintenanceStateIfApproved(ctx, log, bmcClient, bmcVersion)
+ if state == metalv1alpha1.ServerMaintenanceStateFailed {
err := r.updateBMCVersionStatus(ctx, log, bmcVersion, metalv1alpha1.BMCVersionStateFailed, nil, nil, nil)
return ctrl.Result{}, err
}
+ if state != metalv1alpha1.ServerMaintenanceStateInMaintenance {
+ log.V(1).Info("Waiting for maintenance to be granted before continuing with updating bmc version")
+ return ctrl.Result{}, err
+ }
return r.handleUpgradeInProgressState(ctx, log, bmcVersion, bmcClient, bmc)
case metalv1alpha1.BMCVersionStateCompleted:
@@ -412,27 +414,27 @@ func (r *BMCVersionReconciler) getBMCVersionFromBMC(
return currentBMCVersion, nil
}
-func (r *BMCVersionReconciler) checkIfMaintenanceGranted(
+func (r *BMCVersionReconciler) getMaintenanceStateIfApproved(
ctx context.Context,
log logr.Logger,
bmcClient bmc.BMC,
bmcVersion *metalv1alpha1.BMCVersion,
-) (bool, metalv1alpha1.ServerMaintenanceState) {
+) metalv1alpha1.ServerMaintenanceState {
// todo length
if bmcVersion.Spec.ServerMaintenanceRefs == nil {
- return true, ""
+ return ""
}
servers, err := r.getServers(ctx, log, bmcClient, bmcVersion)
if err != nil {
log.V(1).Error(err, "Failed to get ref. servers to determine maintenance state ")
- return false, ""
+ return ""
}
if len(bmcVersion.Spec.ServerMaintenanceRefs) != len(servers) {
log.V(1).Info("Not all servers have Maintenance", "ServerMaintenanceRefs", bmcVersion.Spec.ServerMaintenanceRefs, "Servers", servers)
- return false, ""
+ return ""
}
notInMaintenanceState := make(map[string]metalv1alpha1.ServerMaintenanceState, len(servers))
@@ -447,7 +449,8 @@ func (r *BMCVersionReconciler) checkIfMaintenanceGranted(
}
if serverMaintenance.Status.State == metalv1alpha1.ServerMaintenanceStateFailed {
// fail immediately if any of the server maintenance request failed, as we can not proceed further
- return false, metalv1alpha1.ServerMaintenanceStateFailed
+ log.V(1).Info("ServerMaintenance in failed state", "Server", server.Name, "ServerMaintenance", serverMaintenance.Name)
+ return metalv1alpha1.ServerMaintenanceStateFailed
}
// this gives us the waiting time for the server to be prepared for maintenance by ServerMaintenance controller
if serverMaintenance.Status.State != metalv1alpha1.ServerMaintenanceStateInMaintenance {
@@ -475,10 +478,11 @@ func (r *BMCVersionReconciler) checkIfMaintenanceGranted(
if len(notInMaintenanceState) > 0 {
log.V(1).Info("Some servers not yet in maintenance", "req maintenances on servers", bmcVersion.Spec.ServerMaintenanceRefs)
- return false, metalv1alpha1.ServerMaintenanceStatePreapareMaintenance
+ // waiting for all servers to be in maintenance
+ return metalv1alpha1.ServerMaintenanceStatePending
}
-
- return true, metalv1alpha1.ServerMaintenanceStateInMaintenance
+ // all servers in maintenance
+ return metalv1alpha1.ServerMaintenanceStateInMaintenance
}
func (r *BMCVersionReconciler) checkVersionAndTransistionState(
diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go
index 9d2fd409..8c4d6b9e 100644
--- a/internal/controller/servermaintenance_controller.go
+++ b/internal/controller/servermaintenance_controller.go
@@ -7,6 +7,7 @@ import (
"context"
"fmt"
"reflect"
+ "slices"
"strings"
"time"
@@ -43,6 +44,11 @@ type ServerMaintenanceReconciler struct {
BMCOptions bmc.Options
}
+type bootConfigChangedStatus struct {
+ changed bool
+ bootConfig *metalv1alpha1.ServerBootConfiguration
+}
+
// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servermaintenances,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servermaintenances/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servermaintenances/finalizers,verbs=update
@@ -91,9 +97,8 @@ func (r *ServerMaintenanceReconciler) reconcile(ctx context.Context, log logr.Lo
}
// set the servermaintenance state to pending if it is not set
if serverMaintenance.Status.State == "" {
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStatePending); err != nil || modified {
- return ctrl.Result{}, err
- }
+ err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStatePending)
+ return ctrl.Result{}, err
}
return r.ensureServerMaintenanceStateTransition(ctx, log, serverMaintenance)
}
@@ -102,7 +107,7 @@ func (r *ServerMaintenanceReconciler) ensureServerMaintenanceStateTransition(ctx
switch serverMaintenance.Status.State {
case metalv1alpha1.ServerMaintenanceStatePending:
return r.handlePendingState(ctx, log, serverMaintenance)
- case metalv1alpha1.ServerMaintenanceStatePreapareMaintenance:
+ case metalv1alpha1.ServerMaintenanceStatePreparing:
return r.handlePrepareMaintenanceState(ctx, log, serverMaintenance)
case metalv1alpha1.ServerMaintenanceStateInMaintenance:
log.V(1).Info("Server is under maintenance", "ServerMaintenance", serverMaintenance.Name)
@@ -111,9 +116,9 @@ func (r *ServerMaintenanceReconciler) ensureServerMaintenanceStateTransition(ctx
return ctrl.Result{}, err
}
- if changed, _, err := r.isBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
+ if configStatus, err := r.isBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
return ctrl.Result{}, err
- } else if changed {
+ } else if configStatus.changed {
return r.moveToPrepareMaintenanceState(ctx, log, serverMaintenance, server)
}
err = r.setAndPatchServerPowerState(ctx, log, server, serverMaintenance.Spec.ServerPower)
@@ -190,10 +195,11 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
}
var config *metalv1alpha1.ServerBootConfiguration
- changed := false
- if changed, config, err = r.isBootConfigurationChanged(ctx, log, serverMaintenance); err != nil {
+ configState, err := r.isBootConfigurationChanged(ctx, log, serverMaintenance)
+ if err != nil {
return ctrl.Result{}, err
- } else if changed || config == nil {
+ }
+ if configState.changed || configState.bootConfig == nil {
// turn server power off before applying new boot configuration
// this will help schedule the job to change the boot order for the server
// note: if the server is already off, this will be a no-op
@@ -209,15 +215,17 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
if err != nil {
return ctrl.Result{}, err
}
+ } else {
+ config = configState.bootConfig
}
// Note: this is to check skip of BootConfiguration
// implication of skipping is that the server will still continue to run the OS during Maintenance
// might be subjected to reboots
- val, found := serverMaintenance.GetAnnotations()[metalv1alpha1.OperationSkipBootConfiguration]
+ val, found := serverMaintenance.GetAnnotations()[metalv1alpha1.OperationAnnotation]
if config == nil && (!found || val != metalv1alpha1.SkipBootConfiguration) {
log.V(1).Info("No ServerBootConfigurationTemplate boot configuration", "Server", server.Name)
- _, err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed)
+ err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed)
return ctrl.Result{}, err
}
@@ -227,10 +235,8 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
return ctrl.Result{}, nil
}
if config.Status.State == metalv1alpha1.ServerBootConfigurationStateError {
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed); err != nil || modified {
- return ctrl.Result{}, err
- }
- return ctrl.Result{}, nil
+ err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateFailed)
+ return ctrl.Result{}, err
}
if config.Status.State == metalv1alpha1.ServerBootConfigurationStateReady {
log.V(1).Info("Server maintenance boot configuration is ready", "Server", server.Name)
@@ -247,7 +253,7 @@ func (r *ServerMaintenanceReconciler) handlePrepareMaintenanceState(ctx context.
return ctrl.Result{}, err
}
}
- _, err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance)
+ err = r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStateInMaintenance)
return ctrl.Result{}, err
}
@@ -432,10 +438,8 @@ func (r *ServerMaintenanceReconciler) moveToPrepareMaintenanceState(
return ctrl.Result{}, err
}
- if modified, err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStatePreapareMaintenance); err != nil || modified {
- return ctrl.Result{}, err
- }
- return ctrl.Result{}, nil
+ err := r.patchMaintenanceState(ctx, serverMaintenance, metalv1alpha1.ServerMaintenanceStatePreparing)
+ return ctrl.Result{}, err
}
func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server, maintenance *metalv1alpha1.ServerMaintenance) (bool, error) {
@@ -502,7 +506,7 @@ func (r *ServerMaintenanceReconciler) configureBootOrder(ctx context.Context, lo
bootOrderChanged := rearrangeBootOrder(bootOrder, bootOptions, isPxeBootOption, isDiskBootOption)
- if reflect.DeepEqual(bootOrder, bootOrderChanged) {
+ if slices.Equal(bootOrder, bootOrderChanged) {
// boot order is already set to pxe first, nothing to do
log.V(1).Info("boot order for server is set to pxe first",
"Server", server.Name,
@@ -594,7 +598,7 @@ func (r *ServerMaintenanceReconciler) revertMaintenanceBootConfig(
// sanitize the boot order to remove any invalid boot options
// this case can happen if the bios settings were changed boot options during maintenance
// we remove only the invalid boot options and keep the rest in same order
- // because the settings will take care of new added boot device and apend it to end
+ // because the settings will take care of new added boot device and append it to end
currentBootDeviceMap := make(map[string]struct{})
for _, bootDevice := range bootOrder {
currentBootDeviceMap[bootDevice] = struct{}{}
@@ -616,9 +620,9 @@ func (r *ServerMaintenanceReconciler) revertMaintenanceBootConfig(
if err != nil {
return false, fmt.Errorf("failed to revert boot order: %w", err)
}
- // save the current boot order to revert back after maintenance
+ // save the current expected (sanitized) boot order to revert back after maintenance
status := &metalv1alpha1.ServerMaintenanceBootOrder{
- DefaultBootOrder: serverMaintenance.Status.BootOrderStatus.DefaultBootOrder,
+ DefaultBootOrder: sanitizedBootOrder,
State: metalv1alpha1.BootOrderConfigSuccessRevertInProgress,
}
if err := r.patchBootOrderStatus(ctx, log, serverMaintenance, status); err != nil {
@@ -639,7 +643,7 @@ func (r *ServerMaintenanceReconciler) revertMaintenanceBootConfig(
}
revertCompleted := false
- if reflect.DeepEqual(bootOrder, serverMaintenance.Status.BootOrderStatus.DefaultBootOrder) {
+ if slices.Equal(bootOrder, serverMaintenance.Status.BootOrderStatus.DefaultBootOrder) {
revertCompleted = true
} else {
// sometimes, changing biosSettings leads to change in number of boot options
@@ -720,16 +724,16 @@ func (r *ServerMaintenanceReconciler) removeMaintenanceRefFromServer(ctx context
return nil
}
-func (r *ServerMaintenanceReconciler) patchMaintenanceState(ctx context.Context, serverMaintenance *metalv1alpha1.ServerMaintenance, state metalv1alpha1.ServerMaintenanceState) (bool, error) {
+func (r *ServerMaintenanceReconciler) patchMaintenanceState(ctx context.Context, serverMaintenance *metalv1alpha1.ServerMaintenance, state metalv1alpha1.ServerMaintenanceState) error {
if serverMaintenance.Status.State == state {
- return false, nil
+ return nil
}
base := serverMaintenance.DeepCopy()
serverMaintenance.Status.State = state
if err := r.Status().Patch(ctx, serverMaintenance, client.MergeFrom(base)); err != nil {
- return false, fmt.Errorf("failed to patch serverMaintenance state: %w", err)
+ return fmt.Errorf("failed to patch serverMaintenance state: %w", err)
}
- return true, nil
+ return nil
}
func (r *ServerMaintenanceReconciler) patchServerClaimAnnotation(ctx context.Context, log logr.Logger, serverClaim *metalv1alpha1.ServerClaim, set map[string]string) error {
@@ -775,20 +779,26 @@ func (r *ServerMaintenanceReconciler) isBootConfigurationChanged(
ctx context.Context,
log logr.Logger,
serverMaintenance *metalv1alpha1.ServerMaintenance,
-) (bool, *metalv1alpha1.ServerBootConfiguration, error) {
+) (*bootConfigChangedStatus, error) {
+ status := bootConfigChangedStatus{
+ changed: false,
+ }
config, err := r.getMaintenanceBootConfigurationRef(ctx, log, serverMaintenance)
+ status.bootConfig = config
if err != nil {
- return false, config, err
+ return &status, err
}
if config == nil {
- return false, config, nil
+ return &status, nil
}
if reflect.DeepEqual(config.Spec, serverMaintenance.Spec.ServerBootConfigurationTemplate.Spec) {
// no changes
log.V(1).Info("No changes in ServerBootConfigurationTemplate")
- return false, config, nil
+ return &status, nil
}
- return true, config, nil
+ status.changed = true
+ log.V(1).Info("Detected changes in ServerBootConfigurationTemplate")
+ return &status, nil
}
// SetupWithManager sets up the controller with the Manager.
diff --git a/internal/controller/servermaintenance_controller_test.go b/internal/controller/servermaintenance_controller_test.go
index d64a2543..0df93a5c 100644
--- a/internal/controller/servermaintenance_controller_test.go
+++ b/internal/controller/servermaintenance_controller_test.go
@@ -96,7 +96,7 @@ var _ = Describe("ServerMaintenance Controller", func() {
}
Expect(k8sClient.Create(ctx, serverMaintenance)).To(Succeed())
Eventually(Object(serverMaintenance)).Should(SatisfyAll(
- HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreapareMaintenance),
+ HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreparing),
))
By("Checking the Server is in maintenance")
@@ -247,7 +247,7 @@ var _ = Describe("ServerMaintenance Controller", func() {
))
By("Checking the ServerMaintenance state is PrepareMaintenance")
Eventually(Object(serverMaintenance01)).Should(SatisfyAll(
- HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreapareMaintenance),
+ HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreparing),
))
MarkBootConfigReady(ctx, k8sClient, server.Spec.BootConfigurationRef.Name, server.Spec.BootConfigurationRef.Namespace)
@@ -276,7 +276,7 @@ var _ = Describe("ServerMaintenance Controller", func() {
By("Checking the second ServerMaintenance is now PreparingMaintenance")
Eventually(Object(serverMaintenance02)).Should(SatisfyAll(
- HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreapareMaintenance),
+ HaveField("Status.State", metalv1alpha1.ServerMaintenanceStatePreparing),
))
By("Checking the ServerMaintenanceRef and BootConfigurationRef")