diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index 7e9fed71..c0db7358 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -26,11 +26,11 @@ import ( "time" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -45,6 +45,7 @@ import ( "github.com/gophercloud/utils/v2/openstack/clientconfig" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + apiv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/applyconfigurations/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) @@ -99,28 +100,30 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) // We bail here out, because the openstack api is not the best to poll if hv.Status.HypervisorID == "" || hv.Status.ServiceID == "" { - if err := r.ensureNovaProperties(ctx, hv); err != nil { + hypervisorID, serviceID, err := r.lookupNovaProperties(ctx, hv) + if err != nil { if errors.Is(err, errRequeue) { return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } return ctrl.Result{}, err } + statusCfg := apiv1.HypervisorStatus(). + WithHypervisorID(hypervisorID). + WithServiceID(serviceID) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonInitial). + WithMessage("Initial onboarding")) + return ctrl.Result{}, r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) } // check condition reason status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) - if status == nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonInitial, - Message: "Initial onboarding", - } - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) - } - switch status.Reason { case kvmv1.ConditionReasonInitial: return ctrl.Result{}, r.initialOnboarding(ctx, hv) @@ -138,6 +141,19 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) } } +// applyOnboardingCondition applies a single onboarding condition via SSA, +// carrying all existing conditions and scalar fields forward. +func (r *OnboardingController) applyOnboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, cond k8sacmetav1.ConditionApplyConfiguration) error { + statusCfg := apiv1.HypervisorStatus(). + WithHypervisorID(hv.Status.HypervisorID). + WithServiceID(hv.Status.ServiceID) + statusCfg.Conditions = utils.ConditionsFromStatus(hv.Status.Conditions) + utils.SetApplyConfigurationStatusCondition(&statusCfg.Conditions, cond) + return r.Status().Apply(ctx, + apiv1.Hypervisor(hv.Name, "").WithStatus(statusCfg), + k8sclient.ForceOwnership, k8sclient.FieldOwner(OnboardingControllerName)) +} + func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hypervisor, computeHost, message string) error { status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) // Never onboarded @@ -145,42 +161,28 @@ func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hy return nil } - base := hv.DeepCopy() - - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonAborted, - Message: message, - }) - - if equality.Semantic.DeepEqual(hv, base) { - // Already aborted + // Already aborted with the same message — nothing to do + if status.Status == metav1.ConditionFalse && + status.Reason == kvmv1.ConditionReasonAborted && + status.Message == message { return nil } - condition := *meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if err := r.deleteTestServers(ctx, computeHost); err != nil { - condition = metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" - Reason: kvmv1.ConditionReasonAborted, - Message: err.Error(), - } - - meta.SetStatusCondition(&hv.Status.Conditions, condition) - if equality.Semantic.DeepEqual(hv, base) { - return err - } - - return errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - })) - } - - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return errors.Join(err, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(err.Error()))) + } + + return r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(message)) } func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1.Hypervisor) error { @@ -190,7 +192,6 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. } // Wait for aggregates controller to apply the desired state (zone and test aggregate) - // Extract aggregate names for comparison currentAggregateNames := make([]string, len(hv.Status.Aggregates)) for i, agg := range hv.Status.Aggregates { currentAggregateNames[i] = agg.Name @@ -212,15 +213,12 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. return result.Err } - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: "Running onboarding tests", - } - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage("Running onboarding tests")) } func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervisor, host string) (ctrl.Result, error) { @@ -228,7 +226,19 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis zone := hv.Labels[corev1.LabelTopologyZone] server, err := r.createOrGetTestServer(ctx, zone, host, hv.UID) if err != nil { - return ctrl.Result{}, fmt.Errorf("failed to create or get test instance: %w", err) + // Surface the failure in the Onboarding condition so the user can see + // the latest error and confirm the controller is still retrying. + // Returning a bare error here would leave the previously-set message + // in place and the resource looks frozen even though we keep retrying. + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("failed to create or get test instance: %v", err))); err2 != nil { + return ctrl.Result{}, err2 + } + return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } switch server.Status { @@ -237,21 +247,20 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis id := server.ID server, err = servers.Get(ctx, r.testComputeClient, id).Extract() if err != nil { - // should not happened log.Error(err, "failed to get test instance, instance vanished", "id", id) return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } - // Set condition back to testing - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: "Server ended up in error state: " + server.Fault.Message, - } - if err = utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { + // Set condition back to testing. Include the server ID so each retry + // (which creates a fresh instance with a new UUID) yields a different + // message — otherwise SetApplyConfigurationStatusCondition would treat + // the update as a no-op when Nova keeps reporting the same fault text. + if err = r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("Server %s ended up in error state: %s", id, server.Fault.Message))); err != nil { return ctrl.Result{}, err } @@ -261,37 +270,32 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil + case "ACTIVE": consoleOutput, err := servers. ShowConsoleOutput(ctx, r.testComputeClient, server.ID, servers.ShowConsoleOutputOpts{Length: 11}). Extract() if err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("could not get console output %v", err), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("could not get console output for server %s: %v", server.ID, err))); err2 != nil { + return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } if !strings.Contains(consoleOutput, server.Name) { if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("timeout waiting for console output of server %s since %v", server.ID, server.LaunchedAt))); err2 != nil { + return ctrl.Result{}, err2 } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { @@ -303,28 +307,25 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonTesting, - Message: fmt.Sprintf("failed to terminate instance %v", err), - } - if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }); err != nil { - return ctrl.Result{}, err + if err2 := r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonTesting). + WithMessage(fmt.Sprintf("failed to terminate instance %s: %v", server.ID, err))); err2 != nil { + return ctrl.Result{}, err2 } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } return r.completeOnboarding(ctx, host, hv) + default: return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } } func (r *OnboardingController) completeOnboarding(ctx context.Context, host string, hv *kvmv1.Hypervisor) (ctrl.Result, error) { - // Check if we're in the RemovingTestAggregate phase onboardingCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if onboardingCondition != nil && onboardingCondition.Reason == kvmv1.ConditionReasonHandover { // We're waiting for aggregates and traits controllers to sync @@ -340,44 +341,32 @@ func (r *OnboardingController) completeOnboarding(ctx context.Context, host stri return ctrl.Result{}, nil } - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "Onboarding completed", - } - - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionFalse). + WithReason(kvmv1.ConditionReasonSucceeded). + WithMessage("Onboarding completed")) } // First time in completeOnboarding - clean up and prepare for aggregate sync - err := r.deleteTestServers(ctx, host) - if err != nil { - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" - Reason: kvmv1.ConditionReasonAborted, - Message: err.Error(), - } - return ctrl.Result{}, errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - })) + if err := r.deleteTestServers(ctx, host); err != nil { + return ctrl.Result{}, errors.Join(err, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). // No cleanup, so we are still "onboarding" + WithReason(kvmv1.ConditionReasonAborted). + WithMessage(err.Error()))) } // Mark onboarding as almost complete, triggers other controllers to do their part - condition := metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonHandover, - Message: "Waiting for other controllers to take over", - } - // Patch status to signal aggregates controller - return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - meta.SetStatusCondition(&h.Status.Conditions, condition) - }) + return ctrl.Result{}, r.applyOnboardingCondition(ctx, hv, + *k8sacmetav1.Condition(). + WithType(kvmv1.ConditionTypeOnboarding). + WithStatus(metav1.ConditionTrue). + WithReason(kvmv1.ConditionReasonHandover). + WithMessage("Waiting for other controllers to take over")) } func (r *OnboardingController) deleteTestServers(ctx context.Context, host string) error { @@ -409,10 +398,10 @@ func (r *OnboardingController) deleteTestServers(ctx context.Context, host strin return errors.Join(errs...) } -func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvmv1.Hypervisor) error { +func (r *OnboardingController) lookupNovaProperties(ctx context.Context, hv *kvmv1.Hypervisor) (hypervisorID, serviceID string, err error) { hypervisorAddress := hv.Labels[corev1.LabelHostname] if hypervisorAddress == "" { - return errRequeue + return "", "", errRequeue } shortHypervisorAddress := strings.SplitN(hypervisorAddress, ".", 2)[0] @@ -421,41 +410,27 @@ func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvm hypervisorPages, err := hypervisors.List(r.computeClient, hypervisorQuery).AllPages(ctx) if err != nil { if gophercloud.ResponseCodeIs(err, http.StatusNotFound) { - return errRequeue + return "", "", errRequeue } - return err + return "", "", err } hs, err := hypervisors.ExtractHypervisors(hypervisorPages) if err != nil { - return err + return "", "", err } if len(hs) < 1 { - return errRequeue + return "", "", errRequeue } - var found = false - var myHypervisor hypervisors.Hypervisor for _, h := range hs { - short := strings.SplitN(h.HypervisorHostname, ".", 2)[0] - if short == shortHypervisorAddress { - myHypervisor = h - found = true - break + if strings.SplitN(h.HypervisorHostname, ".", 2)[0] == shortHypervisorAddress { + return h.ID, h.Service.ID, nil } } - if !found { - return fmt.Errorf("could not find exact match for %v", shortHypervisorAddress) - } - - hypervisorID := myHypervisor.ID - serviceID := myHypervisor.Service.ID - return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { - h.Status.HypervisorID = hypervisorID - h.Status.ServiceID = serviceID - }) + return "", "", fmt.Errorf("could not find exact match for %v", shortHypervisorAddress) } func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, computeHost string, nodeUid types.UID) (*servers.Server, error) { @@ -476,9 +451,9 @@ func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, } log := logger.FromContext(ctx) + var foundServer *servers.Server // Cleanup all other server with the same test prefix, except for the exact match // as the cleanup after onboarding may leak resources - var foundServer *servers.Server for _, server := range serverList { // The query is a substring search, we are looking for a prefix if !strings.HasPrefix(server.Name, serverPrefix) { diff --git a/internal/controller/onboarding_controller_test.go b/internal/controller/onboarding_controller_test.go index 8c0003d0..3afd6129 100644 --- a/internal/controller/onboarding_controller_test.go +++ b/internal/controller/onboarding_controller_test.go @@ -336,6 +336,7 @@ var _ = Describe("Onboarding Controller", func() { Context("running tests after initial setup", func() { var ( serverActionHandler func(http.ResponseWriter, *http.Request) + serverCreateHandler func(http.ResponseWriter, *http.Request) serverDeleteHandler func(http.ResponseWriter, *http.Request) serverDetailHandler func(http.ResponseWriter, *http.Request) ) @@ -397,11 +398,14 @@ var _ = Describe("Onboarding Controller", func() { Expect(err).NotTo(HaveOccurred()) }) - fakeServer.Mux.HandleFunc("POST /servers", func(w http.ResponseWriter, r *http.Request) { + serverCreateHandler = func(w http.ResponseWriter, _ *http.Request) { w.Header().Add("Content-Type", "application/json") w.WriteHeader(http.StatusOK) _, err := fmt.Fprint(w, createServerBody) Expect(err).NotTo(HaveOccurred()) + } + fakeServer.Mux.HandleFunc("POST /servers", func(w http.ResponseWriter, r *http.Request) { + serverCreateHandler(w, r) }) serverActionHandler = func(w http.ResponseWriter, _ *http.Request) { @@ -632,13 +636,126 @@ var _ = Describe("Onboarding Controller", func() { By("Verifying the timed-out server was deleted") Expect(serverDeletedCalled).To(BeTrue()) - By("Verifying the onboarding condition message indicates a timeout") + By("Verifying the onboarding condition message indicates a timeout and includes the server ID") hv := &kvmv1.Hypervisor{} Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) Expect(onboardingCond).NotTo(BeNil()) Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting)) Expect(onboardingCond.Message).To(ContainSubstring("timeout")) + Expect(onboardingCond.Message).To(ContainSubstring("server-id")) + }) + }) + + When("test server creation fails", func() { + BeforeEach(func(ctx SpecContext) { + By("Overriding HV status to Testing state") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOnboarding, + Status: metav1.ConditionTrue, + Reason: kvmv1.ConditionReasonTesting, + Message: "previously stuck on something else", + }) + Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) + }) + + It("should surface the error in the Onboarding condition and requeue without returning an error", func(ctx SpecContext) { + By("Overriding the POST /servers handler to return 500") + serverCreateHandler = func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, err := fmt.Fprint(w, `{"computeFault": {"message": "Cinder volume backend is degraded"}}`) + Expect(err).NotTo(HaveOccurred()) + } + + By("Reconciling: createOrGetTestServer should fail at POST /servers") + result, err := onboardingReconciler.Reconcile(ctx, reconcileReq) + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).To(Equal(defaultWaitTime)) + + By("Verifying the Onboarding condition reflects the create error") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) + Expect(onboardingCond).NotTo(BeNil()) + Expect(onboardingCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting)) + Expect(onboardingCond.Message).To(ContainSubstring("failed to create or get test instance")) + }) + }) + + When("test server is in ERROR state", func() { + var ( + serverGetCalled bool + serverDeleteCalled bool + ) + + BeforeEach(func(ctx SpecContext) { + By("Overriding HV status to Testing state") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOnboarding, + Status: metav1.ConditionTrue, + Reason: kvmv1.ConditionReasonTesting, + Message: "Running onboarding tests", + }) + Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) + + serverName := fmt.Sprintf("%s-%s-%s", testPrefixName, hypervisorName, string(hv.UID)) + serverGetCalled = false + serverDeleteCalled = false + + // GET /servers/detail returns a single ERROR-state server with the + // expected name so createOrGetTestServer returns it as foundServer. + serverDetailHandler = func(w http.ResponseWriter, _ *http.Request) { + _, err := fmt.Fprintf(w, + `{"servers": [{"id": "server-id", "name": %q, "status": "ERROR"}], "servers_links": []}`, + serverName) + Expect(err).NotTo(HaveOccurred()) + } + + // GET /servers/server-id returns the ERROR server with a fault + // message so smokeTest can record it. + fakeServer.Mux.HandleFunc("GET /servers/server-id", func(w http.ResponseWriter, _ *http.Request) { + serverGetCalled = true + w.Header().Add("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _, err := fmt.Fprintf(w, + `{"server": {"id": "server-id", "name": %q, "status": "ERROR", "fault": {"message": "Build of instance aborted: volume creation failed"}}}`, + serverName) + Expect(err).NotTo(HaveOccurred()) + }) + + serverDeleteHandler = func(w http.ResponseWriter, _ *http.Request) { + serverDeleteCalled = true + w.WriteHeader(http.StatusAccepted) + } + }) + + It("should record the failure with the server UUID and issue a delete, without returning an error", func(ctx SpecContext) { + By("Reconciling once to enter the ERROR branch") + result, err := onboardingReconciler.Reconcile(ctx, reconcileReq) + Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).To(Equal(defaultWaitTime)) + + By("Verifying GET on the server happened so the fault could be read") + Expect(serverGetCalled).To(BeTrue()) + + By("Verifying the server delete was attempted") + Expect(serverDeleteCalled).To(BeTrue()) + + By("Verifying the Onboarding condition message includes the server UUID and the fault text") + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, namespacedName, hv)).To(Succeed()) + onboardingCond := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) + Expect(onboardingCond).NotTo(BeNil()) + Expect(onboardingCond.Status).To(Equal(metav1.ConditionTrue)) + Expect(onboardingCond.Reason).To(Equal(kvmv1.ConditionReasonTesting)) + Expect(onboardingCond.Message).To(ContainSubstring("server-id")) + Expect(onboardingCond.Message).To(ContainSubstring("ended up in error state")) + Expect(onboardingCond.Message).To(ContainSubstring("Build of instance aborted")) }) }) diff --git a/internal/utils/conditions.go b/internal/utils/conditions.go new file mode 100644 index 00000000..2eef3a24 --- /dev/null +++ b/internal/utils/conditions.go @@ -0,0 +1,116 @@ +/* +SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" +) + +// ConditionsFromStatus converts a []metav1.Condition (as stored in a status +// subresource) into a []k8sacmetav1.ConditionApplyConfiguration suitable for +// use with server-side apply. All fields including LastTransitionTime are +// preserved verbatim. +func ConditionsFromStatus(conditions []metav1.Condition) []k8sacmetav1.ConditionApplyConfiguration { + result := make([]k8sacmetav1.ConditionApplyConfiguration, len(conditions)) + for i := range conditions { + c := &conditions[i] + result[i] = k8sacmetav1.ConditionApplyConfiguration{ + Type: &c.Type, + Status: &c.Status, + Reason: &c.Reason, + Message: &c.Message, + LastTransitionTime: &c.LastTransitionTime, + } + } + return result +} + +// SetApplyConfigurationStatusCondition sets the corresponding condition in +// conditions to newCondition and returns true if the conditions are changed by +// this call. It mirrors the behaviour of k8s.io/apimachinery/pkg/api/meta.SetStatusCondition: +// +// 1. If a condition of the specified type does not exist, newCondition is +// appended. LastTransitionTime is set to now if not provided. +// 2. If a condition of the specified type already exists, all fields are +// updated. LastTransitionTime is updated to now (or the provided value) +// only when Status changes; otherwise the existing LastTransitionTime is +// preserved. +func SetApplyConfigurationStatusCondition(conditions *[]k8sacmetav1.ConditionApplyConfiguration, newCondition k8sacmetav1.ConditionApplyConfiguration) (changed bool) { + if conditions == nil { + return false + } + + // Find existing entry by type + var existing *k8sacmetav1.ConditionApplyConfiguration + for i := range *conditions { + if (*conditions)[i].Type != nil && *(*conditions)[i].Type == *newCondition.Type { + existing = &(*conditions)[i] + break + } + } + + if existing == nil { + // New condition: set LastTransitionTime if not provided + if newCondition.LastTransitionTime == nil { + now := metav1.Now() + newCondition.LastTransitionTime = &now + } + *conditions = append(*conditions, newCondition) + return true + } + + // Existing condition: update fields, handle LastTransitionTime + statusChanged := existing.Status == nil || newCondition.Status == nil || + *existing.Status != *newCondition.Status + + if statusChanged { + existing.Status = newCondition.Status + if newCondition.LastTransitionTime != nil { + existing.LastTransitionTime = newCondition.LastTransitionTime + } else { + now := metav1.Now() + existing.LastTransitionTime = &now + } + changed = true + } + // When status is unchanged, LastTransitionTime is intentionally preserved. + + if strChanged(&existing.Reason, newCondition.Reason) { + existing.Reason = newCondition.Reason + changed = true + } + if strChanged(&existing.Message, newCondition.Message) { + existing.Message = newCondition.Message + changed = true + } + + return changed +} + +// strChanged reports whether the pointer value of b differs from the current +// value at a. +func strChanged(a **string, b *string) bool { + if *a == nil && b == nil { + return false + } + if *a == nil || b == nil { + return true + } + return **a != *b +} diff --git a/internal/utils/conditions_test.go b/internal/utils/conditions_test.go new file mode 100644 index 00000000..fcca9447 --- /dev/null +++ b/internal/utils/conditions_test.go @@ -0,0 +1,308 @@ +/* +SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils_test + +import ( + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sacmetav1 "k8s.io/client-go/applyconfigurations/meta/v1" + + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" +) + +// ptr returns a pointer to v; used to construct ConditionApplyConfiguration literals. +func ptr[T any](v T) *T { return &v } + +// --- ConditionsFromStatus --- + +func TestConditionsFromStatus_Empty(t *testing.T) { + result := utils.ConditionsFromStatus(nil) + if len(result) != 0 { + t.Fatalf("expected empty slice, got %d elements", len(result)) + } + + result = utils.ConditionsFromStatus([]metav1.Condition{}) + if len(result) != 0 { + t.Fatalf("expected empty slice, got %d elements", len(result)) + } +} + +func TestConditionsFromStatus_ConvertsAllFields(t *testing.T) { + ts := metav1.NewTime(time.Date(2024, 1, 2, 3, 4, 5, 0, time.UTC)) + input := []metav1.Condition{ + { + Type: "Ready", + Status: metav1.ConditionTrue, + Reason: "AllGood", + Message: "everything is fine", + LastTransitionTime: ts, + ObservedGeneration: 7, + }, + } + + result := utils.ConditionsFromStatus(input) + if len(result) != 1 { + t.Fatalf("expected 1 element, got %d", len(result)) + } + + c := result[0] + if c.Type == nil || *c.Type != "Ready" { + t.Errorf("Type: got %v, want Ready", c.Type) + } + if c.Status == nil || *c.Status != metav1.ConditionTrue { + t.Errorf("Status: got %v, want True", c.Status) + } + if c.Reason == nil || *c.Reason != "AllGood" { + t.Errorf("Reason: got %v, want AllGood", c.Reason) + } + if c.Message == nil || *c.Message != "everything is fine" { + t.Errorf("Message: got %v, want 'everything is fine'", c.Message) + } + if c.LastTransitionTime == nil || !c.LastTransitionTime.Equal(&ts) { + t.Errorf("LastTransitionTime: got %v, want %v", c.LastTransitionTime, ts) + } +} + +func TestConditionsFromStatus_MultipleConditions(t *testing.T) { + input := []metav1.Condition{ + {Type: "A", Status: metav1.ConditionTrue, Reason: "r", Message: "m", LastTransitionTime: metav1.Now()}, + {Type: "B", Status: metav1.ConditionFalse, Reason: "r2", Message: "m2", LastTransitionTime: metav1.Now()}, + } + result := utils.ConditionsFromStatus(input) + if len(result) != 2 { + t.Fatalf("expected 2 elements, got %d", len(result)) + } + if result[0].Type == nil || *result[0].Type != "A" { + t.Errorf("first condition type: got %v, want A", result[0].Type) + } + if result[1].Type == nil || *result[1].Type != "B" { + t.Errorf("second condition type: got %v, want B", result[1].Type) + } +} + +// --- SetApplyConfigurationStatusCondition --- + +func TestSetApplyConfigurationStatusCondition_NilSlicePointer(t *testing.T) { + changed := utils.SetApplyConfigurationStatusCondition(nil, + *k8sacmetav1.Condition().WithType("T").WithStatus(metav1.ConditionTrue).WithReason("R").WithMessage("M")) + if changed { + t.Error("expected false for nil conditions pointer") + } +} + +func TestSetApplyConfigurationStatusCondition_AppendNew(t *testing.T) { + var conditions []k8sacmetav1.ConditionApplyConfiguration + before := time.Now() + + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("AllGood"). + WithMessage("fine")) + + after := time.Now() + + if !changed { + t.Error("expected changed=true for new condition") + } + if len(conditions) != 1 { + t.Fatalf("expected 1 condition, got %d", len(conditions)) + } + c := conditions[0] + if c.Type == nil || *c.Type != "Ready" { + t.Errorf("Type: got %v", c.Type) + } + if c.Status == nil || *c.Status != metav1.ConditionTrue { + t.Errorf("Status: got %v", c.Status) + } + if c.LastTransitionTime == nil { + t.Fatal("LastTransitionTime must be set for a new condition") + } + ltt := c.LastTransitionTime.Time + if ltt.Before(before) || ltt.After(after) { + t.Errorf("LastTransitionTime %v not in [%v, %v]", ltt, before, after) + } +} + +func TestSetApplyConfigurationStatusCondition_AppendNew_ExplicitLastTransitionTime(t *testing.T) { + var conditions []k8sacmetav1.ConditionApplyConfiguration + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + + utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(ts)) + + if conditions[0].LastTransitionTime == nil || !conditions[0].LastTransitionTime.Equal(&ts) { + t.Errorf("expected explicit LastTransitionTime %v, got %v", ts, conditions[0].LastTransitionTime) + } +} + +func TestSetApplyConfigurationStatusCondition_StatusUnchanged_PreservesLastTransitionTime(t *testing.T) { + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("OldReason"). + WithMessage("old message"). + WithLastTransitionTime(ts), + } + + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("NewReason"). + WithMessage("new message")) + + if !changed { + t.Error("expected changed=true because Reason/Message changed") + } + c := conditions[0] + if c.Reason == nil || *c.Reason != "NewReason" { + t.Errorf("Reason not updated: got %v", c.Reason) + } + if c.Message == nil || *c.Message != "new message" { + t.Errorf("Message not updated: got %v", c.Message) + } + // LastTransitionTime must NOT have changed + if c.LastTransitionTime == nil || !c.LastTransitionTime.Equal(&ts) { + t.Errorf("LastTransitionTime must be preserved when status unchanged: got %v, want %v", + c.LastTransitionTime, ts) + } +} + +func TestSetApplyConfigurationStatusCondition_StatusUnchanged_NoFieldChange_ReturnsFalse(t *testing.T) { + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(ts), + } + + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M")) + + if changed { + t.Error("expected changed=false when nothing changed") + } +} + +func TestSetApplyConfigurationStatusCondition_StatusChanged_UpdatesLastTransitionTime(t *testing.T) { + old := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(old), + } + + before := time.Now() + changed := utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionFalse). + WithReason("Broken"). + WithMessage("it broke")) + after := time.Now() + + if !changed { + t.Error("expected changed=true for status change") + } + c := conditions[0] + if c.Status == nil || *c.Status != metav1.ConditionFalse { + t.Errorf("Status not updated: got %v", c.Status) + } + if c.LastTransitionTime == nil { + t.Fatal("LastTransitionTime must be set after status change") + } + ltt := c.LastTransitionTime.Time + if ltt.Before(before) || ltt.After(after) { + t.Errorf("LastTransitionTime %v not in [%v, %v]", ltt, before, after) + } +} + +func TestSetApplyConfigurationStatusCondition_StatusChanged_ExplicitLastTransitionTime(t *testing.T) { + old := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + explicit := metav1.NewTime(time.Date(2021, 6, 15, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionTrue). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(old), + } + + utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionFalse). + WithReason("R"). + WithMessage("M"). + WithLastTransitionTime(explicit)) + + if conditions[0].LastTransitionTime == nil || !conditions[0].LastTransitionTime.Equal(&explicit) { + t.Errorf("expected explicit LastTransitionTime %v, got %v", explicit, conditions[0].LastTransitionTime) + } +} + +func TestSetApplyConfigurationStatusCondition_OtherConditionsUntouched(t *testing.T) { + ts := metav1.NewTime(time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)) + conditions := []k8sacmetav1.ConditionApplyConfiguration{ + *k8sacmetav1.Condition().WithType("Other").WithStatus(metav1.ConditionTrue). + WithReason("R").WithMessage("M").WithLastTransitionTime(ts), + *k8sacmetav1.Condition().WithType("Ready").WithStatus(metav1.ConditionTrue). + WithReason("R").WithMessage("M").WithLastTransitionTime(ts), + } + + utils.SetApplyConfigurationStatusCondition(&conditions, + *k8sacmetav1.Condition(). + WithType("Ready"). + WithStatus(metav1.ConditionFalse). + WithReason("Broken"). + WithMessage("broke")) + + // "Other" must be completely unchanged + other := conditions[0] + if other.Type == nil || *other.Type != "Other" { + t.Errorf("Other condition type changed: %v", other.Type) + } + if other.Status == nil || *other.Status != metav1.ConditionTrue { + t.Errorf("Other condition status changed: %v", other.Status) + } + if other.LastTransitionTime == nil || !other.LastTransitionTime.Equal(&ts) { + t.Errorf("Other condition LastTransitionTime changed: %v", other.LastTransitionTime) + } +}