Skip to content

Commit f1f48ba

Browse files
Fix workload deletion during StatefulSet queue name change
When changing the queue name on a StatefulSet, the webhook propagates the change to the pod template, triggering a rolling update. Previously, the workload was only owned by pods, so when all pods were deleted during the rolling update, Kubernetes GC would delete the workload. This fix adds the StatefulSet as an additional owner of the workload, preventing GC from deleting it when pods are replaced. This is similar to how LeaderWorkerSet sets itself as the owner of the workload. Signed-off-by: Sohan Kunkerkar <[email protected]>
1 parent a469237 commit f1f48ba

File tree

1 file changed

+71
-12
lines changed

1 file changed

+71
-12
lines changed

pkg/controller/jobs/statefulset/statefulset_reconciler.go

Lines changed: 71 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/go-logr/logr"
2424
appsv1 "k8s.io/api/apps/v1"
2525
corev1 "k8s.io/api/core/v1"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2627
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2728
"k8s.io/apimachinery/pkg/labels"
2829
"k8s.io/apimachinery/pkg/types"
@@ -37,6 +38,7 @@ import (
3738
"sigs.k8s.io/controller-runtime/pkg/predicate"
3839
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3940

41+
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
4042
"sigs.k8s.io/kueue/pkg/controller/jobframework"
4143
podcontroller "sigs.k8s.io/kueue/pkg/controller/jobs/pod/constants"
4244
clientutil "sigs.k8s.io/kueue/pkg/util/client"
@@ -65,11 +67,78 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
6567
log := ctrl.LoggerFrom(ctx)
6668
log.V(2).Info("Reconcile StatefulSet")
6769

68-
err := r.fetchAndFinalizePods(ctx, req)
70+
sts := &appsv1.StatefulSet{}
71+
if err := r.client.Get(ctx, req.NamespacedName, sts); err != nil {
72+
if client.IgnoreNotFound(err) != nil {
73+
return ctrl.Result{}, err
74+
}
75+
// StatefulSet was deleted, finalize orphaned pods.
76+
return ctrl.Result{}, r.fetchAndFinalizePods(ctx, req, nil)
77+
}
78+
79+
// Ensure the StatefulSet is an owner of the workload to prevent
80+
// Kubernetes GC from deleting it when pods are replaced during rolling updates.
81+
// We log errors but don't block pod finalization, as ownership is best-effort.
82+
if err := r.ensureWorkloadOwnership(ctx, sts); err != nil {
83+
log.Error(err, "Failed to ensure workload ownership, will retry")
84+
}
85+
86+
err := r.fetchAndFinalizePods(ctx, req, sts)
6987
return ctrl.Result{}, err
7088
}
7189

72-
func (r *Reconciler) fetchAndFinalizePods(ctx context.Context, req reconcile.Request) error {
90+
// ensureWorkloadOwnership ensures the StatefulSet is an owner of the workload.
91+
// This prevents Kubernetes GC from deleting the workload when pods are replaced
92+
// during rolling updates.
93+
func (r *Reconciler) ensureWorkloadOwnership(ctx context.Context, sts *appsv1.StatefulSet) error {
94+
if sts == nil {
95+
return nil
96+
}
97+
98+
log := ctrl.LoggerFrom(ctx)
99+
workloadName := GetWorkloadName(sts.Name)
100+
101+
wl := &kueue.Workload{}
102+
if err := r.client.Get(ctx, types.NamespacedName{Name: workloadName, Namespace: sts.Namespace}, wl); err != nil {
103+
if apierrors.IsNotFound(err) {
104+
return nil
105+
}
106+
return err
107+
}
108+
109+
// Check if StatefulSet is already an owner.
110+
for _, ref := range wl.GetOwnerReferences() {
111+
if ref.UID == sts.UID {
112+
return nil
113+
}
114+
}
115+
116+
// Add StatefulSet as an owner.
117+
if err := controllerutil.SetOwnerReference(sts, wl, r.client.Scheme()); err != nil {
118+
return err
119+
}
120+
121+
if err := r.client.Update(ctx, wl); err != nil {
122+
if apierrors.IsConflict(err) {
123+
// Conflict means the workload was modified concurrently; ownership may not be set.
124+
// Will retry on next reconciliation.
125+
log.V(4).Info("Conflict updating workload ownership, will retry on next reconciliation",
126+
"statefulset", klog.KObj(sts),
127+
"workload", klog.KObj(wl),
128+
)
129+
return nil
130+
}
131+
return err
132+
}
133+
log.V(3).Info("Added StatefulSet as owner of workload",
134+
"statefulset", klog.KObj(sts),
135+
"workload", klog.KObj(wl),
136+
)
137+
138+
return nil
139+
}
140+
141+
func (r *Reconciler) fetchAndFinalizePods(ctx context.Context, req reconcile.Request, sts *appsv1.StatefulSet) error {
73142
podList := &corev1.PodList{}
74143
if err := r.client.List(ctx, podList, client.InNamespace(req.Namespace), client.MatchingLabels{
75144
podcontroller.GroupNameLabel: GetWorkloadName(req.Name),
@@ -82,16 +151,6 @@ func (r *Reconciler) fetchAndFinalizePods(ctx context.Context, req reconcile.Req
82151
return nil
83152
}
84153

85-
sts := &appsv1.StatefulSet{}
86-
err := r.client.Get(ctx, req.NamespacedName, sts)
87-
if client.IgnoreNotFound(err) != nil {
88-
return err
89-
}
90-
91-
if err != nil {
92-
sts = nil
93-
}
94-
95154
return r.finalizePods(ctx, sts, podList.Items)
96155
}
97156

0 commit comments

Comments
 (0)