Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions internal/controller/nodeagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ func (r *DataProtectionApplicationReconciler) ReconcileNodeAgentDaemonset(log lo
terms = append(terms, a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms...)
}
}
common.SortNodeSelectorTerms(terms)
Comment thread
kaovilai marked this conversation as resolved.
if len(terms) > 0 {
ds.Spec.Template.Spec.Affinity = &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
Expand Down
66 changes: 66 additions & 0 deletions internal/controller/nodeagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2771,3 +2771,69 @@ func TestDPAReconciler_buildNodeAgentDaemonsetWithAzureWorkloadIdentity(t *testi
})
}
}

// TestNodeAgentAffinityMatchExpressionsDeterministicOrder verifies that
// matchExpressions built from a multi-label NodeSelector are always sorted
// by key, preventing non-deterministic DaemonSet spec changes that cause
// unnecessary node-agent pod restarts. See OADP-7541.
func TestNodeAgentAffinityMatchExpressionsDeterministicOrder(t *testing.T) {
tests := []struct {
name string
matchLabels map[string]string
wantKeys []string
}{
{
name: "two labels matching customer config from OADP-7541",
matchLabels: map[string]string{
"network": "int",
"node-role.kubernetes.io/infra": "",
},
wantKeys: []string{"network", "node-role.kubernetes.io/infra"},
},
{
name: "three labels to verify sorting with more keys",
matchLabels: map[string]string{
"zone": "us-east-1a",
"disk-type": "ssd",
"node-role.kubernetes.io/worker": "",
},
wantKeys: []string{"disk-type", "node-role.kubernetes.io/worker", "zone"},
},
{
name: "single label is trivially stable",
matchLabels: map[string]string{
"node-role.kubernetes.io/infra": "",
},
wantKeys: []string{"node-role.kubernetes.io/infra"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
for i := 0; i < 100; i++ {
la := &kube.LoadAffinity{
NodeSelector: metav1.LabelSelector{
MatchLabels: tt.matchLabels,
},
}
affinity := kube.ToSystemAffinity(la, nil)
require.NotNil(t, affinity, "iteration %d: affinity should not be nil", i)

terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
require.Len(t, terms, 1, "iteration %d: expected 1 NodeSelectorTerm", i)

common.SortNodeSelectorTerms(terms)

exprs := terms[0].MatchExpressions
require.Len(t, exprs, len(tt.wantKeys), "iteration %d: wrong number of matchExpressions", i)

gotKeys := make([]string, len(exprs))
for j, expr := range exprs {
gotKeys[j] = expr.Key
}
require.Equal(t, tt.wantKeys, gotKeys,
"iteration %d: matchExpressions keys not in sorted order", i)
}
})
}
}
1 change: 1 addition & 0 deletions internal/controller/velero.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ func (r *DataProtectionApplicationReconciler) customizeVeleroDeployment(veleroDe
terms = append(terms, a.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms...)
}
}
common.SortNodeSelectorTerms(terms)
if len(terms) > 0 {
veleroDeployment.Spec.Template.Spec.Affinity = &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
Expand Down
58 changes: 48 additions & 10 deletions internal/controller/velero_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ import (
"github.com/onsi/gomega"
"github.com/operator-framework/operator-lib/proxy"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/util/kube"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -2095,11 +2097,6 @@ func TestDPAReconciler_buildVeleroDeployment(t *testing.T) {
loadAffinity: []corev1.NodeSelectorTerm{
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "zone",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"east"},
},
{
Key: "disk-type",
Operator: corev1.NodeSelectorOpIn,
Expand All @@ -2109,20 +2106,25 @@ func TestDPAReconciler_buildVeleroDeployment(t *testing.T) {
Key: "gpu",
Operator: corev1.NodeSelectorOpDoesNotExist,
},
{
Key: "zone",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"east"},
},
},
},
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "instance-type",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"m5.large", "m5.xlarge"},
},
{
Key: "environment",
Operator: corev1.NodeSelectorOpNotIn,
Values: []string{"dev"},
},
{
Key: "instance-type",
Operator: corev1.NodeSelectorOpIn,
Values: []string{"m5.large", "m5.xlarge"},
},
},
},
},
Expand Down Expand Up @@ -4188,3 +4190,39 @@ func TestApplyResourceAnnotations(t *testing.T) {
})
}
}

// TestVeleroAffinityMatchExpressionsDeterministicOrder verifies that
// matchExpressions built from a multi-label NodeSelector are always sorted
// by key, preventing non-deterministic Deployment spec changes. See OADP-7541.
func TestVeleroAffinityMatchExpressionsDeterministicOrder(t *testing.T) {
matchLabels := map[string]string{
"network": "int",
"node-role.kubernetes.io/infra": "",
}
wantKeys := []string{"network", "node-role.kubernetes.io/infra"}

for i := 0; i < 100; i++ {
la := &kube.LoadAffinity{
NodeSelector: metav1.LabelSelector{
MatchLabels: matchLabels,
},
}
affinity := kube.ToSystemAffinity(la, nil)
require.NotNil(t, affinity, "iteration %d: affinity should not be nil", i)

terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
require.Len(t, terms, 1, "iteration %d: expected 1 NodeSelectorTerm", i)

common.SortNodeSelectorTerms(terms)

exprs := terms[0].MatchExpressions
require.Len(t, exprs, len(wantKeys), "iteration %d: wrong number of matchExpressions", i)

gotKeys := make([]string, len(exprs))
for j, expr := range exprs {
gotKeys[j] = expr.Key
}
require.Equal(t, wantKeys, gotKeys,
"iteration %d: matchExpressions keys not in sorted order", i)
}
}
17 changes: 17 additions & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package common

import (
"cmp"
"fmt"
"regexp"
"slices"
"sort"
"strings"

Expand Down Expand Up @@ -374,3 +376,18 @@ func UpdateBackupStorageLocation(bsl *velerov1.BackupStorageLocation, bslSpec ve

return nil
}

// SortNodeSelectorTerms sorts MatchExpressions and MatchFields by key
// within each NodeSelectorTerm to ensure deterministic ordering. Without
// this, Go map iteration randomness in upstream kube.ToSystemAffinity
// produces non-deterministic ordering, which Kubernetes treats as a spec
// change and triggers unnecessary DaemonSet rollouts.
func SortNodeSelectorTerms(terms []corev1.NodeSelectorTerm) {
Comment thread
kaovilai marked this conversation as resolved.
cmpKey := func(a, b corev1.NodeSelectorRequirement) int {
return cmp.Compare(a.Key, b.Key)
}
for i := range terms {
slices.SortFunc(terms[i].MatchExpressions, cmpKey)
slices.SortFunc(terms[i].MatchFields, cmpKey)
}
}