From f06870a08ffeeddca629dadd46dc7d34a596df6c Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Wed, 27 May 2026 13:33:33 +0200 Subject: [PATCH] capacity: subtract VM allocations when counting placeable slots probeScheduler always used raw effectiveCapacity for slot counting, making running VMs invisible in usage = totalSlots - placeableSlots. The total probe correctly ignores allocations (empty-datacenter view), but the placeable probe must subtract hv.Status.Allocation so that each host's remaining capacity reflects slots still available after running VMs are accounted for. --- .../reservations/capacity/controller.go | 19 ++++++-- .../reservations/capacity/controller_test.go | 45 ++++++++++++++++++- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/internal/scheduling/reservations/capacity/controller.go b/internal/scheduling/reservations/capacity/controller.go index 1746a3377..8f7992ca1 100644 --- a/internal/scheduling/reservations/capacity/controller.go +++ b/internal/scheduling/reservations/capacity/controller.go @@ -162,8 +162,8 @@ func (c *Controller) reconcileOne( cur := existingByName[flavor.Name] cur.FlavorName = flavor.Name - totalVMSlots, totalHosts, totalErr := c.probeScheduler(ctx, flavor, az, c.config.TotalPipeline, hvByName) - placeableVMs, placeableHosts, placeableErr := c.probeScheduler(ctx, flavor, az, c.config.PlaceablePipeline, hvByName) + totalVMSlots, totalHosts, totalErr := c.probeScheduler(ctx, flavor, az, c.config.TotalPipeline, hvByName, true) + placeableVMs, placeableHosts, placeableErr := c.probeScheduler(ctx, flavor, az, c.config.PlaceablePipeline, hvByName, false) if totalErr != nil { allFresh = false @@ -257,11 +257,15 @@ func (c *Controller) reconcileOne( // probeScheduler calls the scheduler with the given pipeline and returns VM slots + host count. // Capacity is computed as sum of floor(hostMemory / flavorMemory) across returned hosts. +// When ignoreAllocations is true (total/empty-datacenter probe), raw effective capacity is used. +// When false (placeable probe), hv.Status.Allocation is subtracted first so that slots reflect +// remaining capacity after running VMs. func (c *Controller) probeScheduler( ctx context.Context, flavor compute.FlavorInGroup, az, pipeline string, hvByName map[string]hv1.Hypervisor, + ignoreAllocations bool, ) (capacity, hosts int64, err error) { flavorBytes := int64(flavor.MemoryMB) * 1024 * 1024 //nolint:gosec @@ -309,7 +313,16 @@ func (c *Controller) probeScheduler( if !ok { continue } - if capBytes := memCap.Value(); capBytes > 0 { + capBytes := memCap.Value() + if !ignoreAllocations { + if alloc, ok := hv.Status.Allocation[hv1.ResourceMemory]; ok { + capBytes -= alloc.Value() + } + if capBytes < 0 { + capBytes = 0 + } + } + if capBytes > 0 { capacity += capBytes / flavorBytes } } diff --git a/internal/scheduling/reservations/capacity/controller_test.go b/internal/scheduling/reservations/capacity/controller_test.go index 69a4e80bb..8938b8564 100644 --- a/internal/scheduling/reservations/capacity/controller_test.go +++ b/internal/scheduling/reservations/capacity/controller_test.go @@ -429,7 +429,7 @@ func TestProbeScheduler_CapacityCalculation(t *testing.T) { } flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB} - capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName) + capacity, hosts, err := c.probeScheduler(context.Background(), flavor, "az-a", "test-pipeline", hvByName, true) if err != nil { t.Fatalf("probeScheduler failed: %v", err) } @@ -442,6 +442,49 @@ func TestProbeScheduler_CapacityCalculation(t *testing.T) { } } +// TestProbeScheduler_SubtractsAllocationsWhenNotIgnored verifies that placeable-probe slot +// counting uses remaining capacity (effectiveCapacity − allocation) while the total-probe uses +// raw capacity. This is the regression test for the bug where both probes used raw capacity, +// making running VMs invisible in the usage = total − placeable calculation. +func TestProbeScheduler_SubtractsAllocationsWhenNotIgnored(t *testing.T) { + const memMB = 4096 + const memBytes = int64(memMB) * 1024 * 1024 + + scheme := newTestScheme(t) + + // Host has 2-slot capacity (2 × flavor), with 1 slot already used by a running VM. + hv := newHypervisor("host-1", "az-a", memBytes*2) + hv.Status.Allocation = map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: *resource.NewQuantity(memBytes, resource.BinarySI), + } + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() + srv := newMockSchedulerServer(t, []string{"host-1"}) + defer srv.Close() + + c := NewController(fakeClient, Config{SchedulerURL: srv.URL}) + hvByName := map[string]hv1.Hypervisor{"host-1": *hv} + flavor := compute.FlavorInGroup{Name: "test-flavor", MemoryMB: memMB} + + // Total probe (ignoreAllocations=true): raw capacity → 2 slots. + totalCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "total-pipeline", hvByName, true) + if err != nil { + t.Fatalf("probeScheduler (total) failed: %v", err) + } + if totalCap != 2 { + t.Errorf("total capacity = %d, want 2 (raw slots)", totalCap) + } + + // Placeable probe (ignoreAllocations=false): capacity − allocation → 1 slot. + placeableCap, _, err := c.probeScheduler(context.Background(), flavor, "az-a", "placeable-pipeline", hvByName, false) + if err != nil { + t.Fatalf("probeScheduler (placeable) failed: %v", err) + } + if placeableCap != 1 { + t.Errorf("placeable capacity = %d, want 1 (remaining slot after running VM)", placeableCap) + } +} + func TestReconcileAll_MultipleGroupsAndAZs(t *testing.T) { scheme := newTestScheme(t)