Skip to content

Commit af875ee

Browse files
committed
feat: add sandbox_creation_latency metric
Adds a new histogram metric to track the time it takes for a sandbox to become ready after it has been created. Fixes #123
1 parent e568f6c commit af875ee

File tree

3 files changed

+142
-1
lines changed

3 files changed

+142
-1
lines changed

controllers/sandbox_controller.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"reflect"
2323
"time"
2424

25+
"github.com/prometheus/client_golang/prometheus"
2526
corev1 "k8s.io/api/core/v1"
2627
k8serrors "k8s.io/apimachinery/pkg/api/errors"
2728
"k8s.io/apimachinery/pkg/api/meta"
@@ -35,6 +36,7 @@ import (
3536
"sigs.k8s.io/controller-runtime/pkg/client"
3637
"sigs.k8s.io/controller-runtime/pkg/handler"
3738
"sigs.k8s.io/controller-runtime/pkg/log"
39+
"sigs.k8s.io/controller-runtime/pkg/metrics"
3840
"sigs.k8s.io/controller-runtime/pkg/predicate"
3941

4042
sandboxv1alpha1 "sigs.k8s.io/agent-sandbox/api/v1alpha1"
@@ -43,16 +45,27 @@ import (
4345
const (
4446
sandboxLabel = "agents.x-k8s.io/sandbox-name-hash"
4547
sandboxControllerFieldOwner = "sandbox-controller"
48+
readinessObserved = "agents.x-k8s.io/readiness-observed"
4649
)
4750

4851
var (
4952
// Scheme for use by sandbox controllers. Registers required types for client.
5053
Scheme = runtime.NewScheme()
54+
55+
sandboxCreationLatency = prometheus.NewHistogram(
56+
prometheus.HistogramOpts{
57+
Name: "sandbox_creation_latency",
58+
Help: "Time taken from sandbox creation to sandbox ready",
59+
// 1, 2, 3, 5, 7, 10, 15, 20, 30, 45, 60, 90, 120, 180, 300
60+
Buckets: []float64{1, 2, 3, 5, 7, 10, 15, 20, 30, 45, 60, 90, 120, 180, 300},
61+
},
62+
)
5163
)
5264

5365
func init() {
5466
utilruntime.Must(clientgoscheme.AddToScheme(Scheme))
5567
utilruntime.Must(sandboxv1alpha1.AddToScheme(Scheme))
68+
metrics.Registry.MustRegister(sandboxCreationLatency)
5669
}
5770

5871
// SandboxReconciler reconciles a Sandbox object
@@ -114,13 +127,48 @@ func (r *SandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
114127
// Update status
115128
if statusUpdateErr := r.updateStatus(ctx, oldStatus, sandbox); statusUpdateErr != nil {
116129
// Surface update error
130+
statusUpdateErr = fmt.Errorf("faild to update status: %w", statusUpdateErr)
117131
err = errors.Join(err, statusUpdateErr)
118132
}
119133

134+
if recordErr := r.recordFirstReadyMetric(ctx, sandbox); recordErr != nil {
135+
err = errors.Join(err, recordErr)
136+
}
137+
120138
// return errors seen
121139
return ctrl.Result{RequeueAfter: requeueAfter}, err
122140
}
123141

142+
func (r *SandboxReconciler) recordFirstReadyMetric(ctx context.Context, sandbox *sandboxv1alpha1.Sandbox) error {
143+
log := log.FromContext(ctx)
144+
145+
// If already recorded ready dont record metric
146+
if sandbox.Annotations != nil && sandbox.Annotations[readinessObserved] != "" {
147+
return nil
148+
}
149+
150+
// If not ready dont record metric
151+
if !meta.IsStatusConditionTrue(sandbox.Status.Conditions, string(sandboxv1alpha1.SandboxConditionReady)) {
152+
return nil
153+
}
154+
155+
// record metric
156+
latency := time.Since(sandbox.CreationTimestamp.Time).Seconds()
157+
sandboxCreationLatency.Observe(latency)
158+
159+
// add annotation
160+
patch := client.MergeFrom(sandbox.DeepCopy())
161+
if sandbox.Annotations == nil {
162+
sandbox.Annotations = make(map[string]string)
163+
}
164+
sandbox.Annotations[readinessObserved] = "true"
165+
if err := r.Patch(ctx, sandbox, patch); err != nil {
166+
log.Error(err, "Failed to add first-ready-metric annotation")
167+
return err
168+
}
169+
return nil
170+
}
171+
124172
func (r *SandboxReconciler) reconcileChildResources(ctx context.Context, sandbox *sandboxv1alpha1.Sandbox) error {
125173
// Create a hash from the sandbox.Name and use it as label value
126174
nameHash := NameHash(sandbox.Name)

controllers/sandbox_controller_test.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,17 @@ package controllers
1616

1717
import (
1818
"errors"
19+
"strings"
1920
"testing"
2021
"time"
2122

2223
"github.com/google/go-cmp/cmp"
2324
"github.com/google/go-cmp/cmp/cmpopts"
25+
"github.com/prometheus/client_golang/prometheus/testutil"
2426
"github.com/stretchr/testify/require"
2527
corev1 "k8s.io/api/core/v1"
2628
k8serrors "k8s.io/apimachinery/pkg/api/errors"
29+
"k8s.io/apimachinery/pkg/api/meta"
2730
"k8s.io/apimachinery/pkg/api/resource"
2831
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2932
"k8s.io/apimachinery/pkg/runtime"
@@ -649,3 +652,92 @@ func TestSandboxExpiry(t *testing.T) {
649652
})
650653
}
651654
}
655+
656+
func TestSandboxCreationLatencyMetric(t *testing.T) {
657+
sandboxName := "sandbox-name"
658+
sandboxNs := "sandbox-ns"
659+
sb := &sandboxv1alpha1.Sandbox{}
660+
sb.Name = sandboxName
661+
sb.Namespace = sandboxNs
662+
sb.Generation = 1
663+
sb.CreationTimestamp = metav1.NewTime(time.Now())
664+
sb.Spec = sandboxv1alpha1.SandboxSpec{
665+
PodTemplate: sandboxv1alpha1.PodTemplate{
666+
Spec: corev1.PodSpec{
667+
Containers: []corev1.Container{
668+
{
669+
Name: "test-container",
670+
},
671+
},
672+
},
673+
},
674+
}
675+
676+
r := SandboxReconciler{
677+
Client: newFakeClient(sb),
678+
Scheme: Scheme,
679+
}
680+
681+
_, err := r.Reconcile(t.Context(), ctrl.Request{
682+
NamespacedName: types.NamespacedName{
683+
Name: sandboxName,
684+
Namespace: sandboxNs,
685+
},
686+
})
687+
require.NoError(t, err)
688+
689+
// get pod and mark it ready
690+
pod := &corev1.Pod{}
691+
require.NoError(t, r.Get(t.Context(), types.NamespacedName{Name: sandboxName, Namespace: sandboxNs}, pod))
692+
pod.Status.Phase = corev1.PodRunning
693+
pod.Status.Conditions = []corev1.PodCondition{
694+
{
695+
Type: corev1.PodReady,
696+
Status: corev1.ConditionTrue,
697+
},
698+
}
699+
require.NoError(t, r.Status().Update(t.Context(), pod))
700+
701+
_, err = r.Reconcile(t.Context(), ctrl.Request{
702+
NamespacedName: types.NamespacedName{
703+
Name: sandboxName,
704+
Namespace: sandboxNs,
705+
},
706+
})
707+
require.NoError(t, err)
708+
709+
// Validate Sandbox status
710+
liveSandbox := &sandboxv1alpha1.Sandbox{}
711+
require.NoError(t, r.Get(t.Context(), types.NamespacedName{Name: sandboxName, Namespace: sandboxNs}, liveSandbox))
712+
require.True(t, meta.IsStatusConditionTrue(liveSandbox.Status.Conditions, "Ready"))
713+
require.NotNil(t, liveSandbox.Annotations)
714+
require.Equal(t, "true", liveSandbox.Annotations[readinessObserved])
715+
716+
// Check metric
717+
expected := `
718+
# HELP sandbox_creation_latency Time taken from sandbox creation to sandbox ready
719+
# TYPE sandbox_creation_latency histogram
720+
sandbox_creation_latency_bucket{le="1"} 1
721+
sandbox_creation_latency_bucket{le="2"} 1
722+
sandbox_creation_latency_bucket{le="3"} 1
723+
sandbox_creation_latency_bucket{le="5"} 1
724+
sandbox_creation_latency_bucket{le="7"} 1
725+
sandbox_creation_latency_bucket{le="10"} 1
726+
sandbox_creation_latency_bucket{le="15"} 1
727+
sandbox_creation_latency_bucket{le="20"} 1
728+
sandbox_creation_latency_bucket{le="30"} 1
729+
sandbox_creation_latency_bucket{le="45"} 1
730+
sandbox_creation_latency_bucket{le="60"} 1
731+
sandbox_creation_latency_bucket{le="90"} 1
732+
sandbox_creation_latency_bucket{le="120"} 1
733+
sandbox_creation_latency_bucket{le="180"} 1
734+
sandbox_creation_latency_bucket{le="300"} 1
735+
sandbox_creation_latency_bucket{le="+Inf"} 1
736+
sandbox_creation_latency_count 1
737+
`
738+
err = testutil.CollectAndCompare(sandboxCreationLatency, strings.NewReader(expected), "sandbox_creation_latency")
739+
// We ignore the error because the sum is not deterministic
740+
if err != nil && !strings.Contains(err.Error(), "sandbox_creation_latency_sum") {
741+
require.NoError(t, err)
742+
}
743+
}

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go 1.24.4
44

55
require (
66
github.com/google/go-cmp v0.7.0
7+
github.com/prometheus/client_golang v1.23.2
78
github.com/stretchr/testify v1.11.1
89
k8s.io/api v0.34.1
910
k8s.io/apiextensions-apiserver v0.34.1
@@ -43,13 +44,13 @@ require (
4344
github.com/google/gnostic-models v0.7.0 // indirect
4445
github.com/google/uuid v1.6.0 // indirect
4546
github.com/json-iterator/go v1.1.12 // indirect
47+
github.com/kylelemons/godebug v1.1.0 // indirect
4648
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
4749
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
4850
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
4951
github.com/onsi/ginkgo/v2 v2.23.3 // indirect
5052
github.com/onsi/gomega v1.37.0 // indirect
5153
github.com/pmezard/go-difflib v1.0.0 // indirect
52-
github.com/prometheus/client_golang v1.23.2 // indirect
5354
github.com/prometheus/client_model v0.6.2 // indirect
5455
github.com/prometheus/common v0.67.1 // indirect
5556
github.com/prometheus/procfs v0.17.0 // indirect

0 commit comments

Comments
 (0)