Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions controllers/sandbox_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"reflect"
"time"

"github.com/prometheus/client_golang/prometheus"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
Expand All @@ -36,6 +37,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/controller-runtime/pkg/predicate"

sandboxv1alpha1 "sigs.k8s.io/agent-sandbox/api/v1alpha1"
Expand All @@ -45,16 +47,26 @@ const (
sandboxLabel = "agents.x-k8s.io/sandbox-name-hash"
SanboxPodNameAnnotation = "agents.x-k8s.io/pod-name"
sandboxControllerFieldOwner = "sandbox-controller"
readinessObserved = "agents.x-k8s.io/readiness-observed"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we look at oldStatus instead? This looks redundant with the information in status.conditions

)

var (
// Scheme for use by sandbox controllers. Registers required types for client.
Scheme = runtime.NewScheme()

sandboxCreationLatency = prometheus.NewHistogram(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we pull this out into a separate metrics directory or file?

prometheus.HistogramOpts{
Name: "sandbox_creation_latency_ms",
Help: "Time taken from sandbox creation to sandbox ready in milliseconds",
Buckets: []float64{50, 100, 200, 300, 500, 700, 1000, 1500, 2000, 3000, 4500, 6000, 9000, 12000, 18000, 30000},
},
)
)

func init() {
utilruntime.Must(clientgoscheme.AddToScheme(Scheme))
utilruntime.Must(sandboxv1alpha1.AddToScheme(Scheme))
metrics.Registry.MustRegister(sandboxCreationLatency)
}

// SandboxReconciler reconciles a Sandbox object
Expand Down Expand Up @@ -116,13 +128,48 @@ func (r *SandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
// Update status
if statusUpdateErr := r.updateStatus(ctx, oldStatus, sandbox); statusUpdateErr != nil {
// Surface update error
statusUpdateErr = fmt.Errorf("faild to update status: %w", statusUpdateErr)
err = errors.Join(err, statusUpdateErr)
}

if recordErr := r.recordFirstReadyMetric(ctx, sandbox); recordErr != nil {
err = errors.Join(err, recordErr)
}

// return errors seen
return ctrl.Result{RequeueAfter: requeueAfter}, err
}

func (r *SandboxReconciler) recordFirstReadyMetric(ctx context.Context, sandbox *sandboxv1alpha1.Sandbox) error {
log := log.FromContext(ctx)

// If readiness was observed already dont re-record the metric
if sandbox.Annotations != nil && sandbox.Annotations[readinessObserved] != "" {
return nil
}

// If not ready dont record metric
if !meta.IsStatusConditionTrue(sandbox.Status.Conditions, string(sandboxv1alpha1.SandboxConditionReady)) {
return nil
}

// record metric
latency := time.Since(sandbox.CreationTimestamp.Time).Milliseconds()
sandboxCreationLatency.Observe(float64(latency))

// add annotation
patch := client.MergeFrom(sandbox.DeepCopy())
if sandbox.Annotations == nil {
sandbox.Annotations = make(map[string]string)
}
sandbox.Annotations[readinessObserved] = fmt.Sprintf("%d", latency)
if err := r.Patch(ctx, sandbox, patch); err != nil {
log.Error(err, "Failed to add first-ready-metric annotation")
return err
}
return nil
}

func (r *SandboxReconciler) reconcileChildResources(ctx context.Context, sandbox *sandboxv1alpha1.Sandbox) error {
// Create a hash from the sandbox.Name and use it as label value
nameHash := NameHash(sandbox.Name)
Expand Down
94 changes: 94 additions & 0 deletions controllers/sandbox_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@ package controllers

import (
"errors"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -846,3 +849,94 @@ func TestSandboxExpiry(t *testing.T) {
})
}
}

func TestSandboxCreationLatencyMetric(t *testing.T) {
sandboxName := "sandbox-name"
sandboxNs := "sandbox-ns"
sb := &sandboxv1alpha1.Sandbox{}
sb.Name = sandboxName
sb.Namespace = sandboxNs
sb.Generation = 1
sb.CreationTimestamp = metav1.NewTime(time.Now())
sb.Spec = sandboxv1alpha1.SandboxSpec{
PodTemplate: sandboxv1alpha1.PodTemplate{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test-container",
},
},
},
},
}

r := SandboxReconciler{
Client: newFakeClient(sb),
Scheme: Scheme,
}

_, err := r.Reconcile(t.Context(), ctrl.Request{
NamespacedName: types.NamespacedName{
Name: sandboxName,
Namespace: sandboxNs,
},
})
require.NoError(t, err)

// get pod and mark it ready
pod := &corev1.Pod{}
require.NoError(t, r.Get(t.Context(), types.NamespacedName{Name: sandboxName, Namespace: sandboxNs}, pod))
pod.Status.Phase = corev1.PodRunning
pod.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodReady,
Status: corev1.ConditionTrue,
},
}
require.NoError(t, r.Status().Update(t.Context(), pod))

_, err = r.Reconcile(t.Context(), ctrl.Request{
NamespacedName: types.NamespacedName{
Name: sandboxName,
Namespace: sandboxNs,
},
})
require.NoError(t, err)

// Validate Sandbox status
liveSandbox := &sandboxv1alpha1.Sandbox{}
require.NoError(t, r.Get(t.Context(), types.NamespacedName{Name: sandboxName, Namespace: sandboxNs}, liveSandbox))
require.True(t, meta.IsStatusConditionTrue(liveSandbox.Status.Conditions, "Ready"))
require.NotNil(t, liveSandbox.Annotations)
require.NotNil(t, liveSandbox.Annotations[readinessObserved])

// Check metric
expected := `
# HELP sandbox_creation_latency Time taken from sandbox creation to sandbox ready in milliseconds
# TYPE sandbox_creation_latency histogram
sandbox_creation_latency_bucket{le="50"} 1

sandbox_creation_latency_bucket{le="100"} 1
sandbox_creation_latency_bucket{le="200"} 1
sandbox_creation_latency_bucket{le="300"} 1
sandbox_creation_latency_bucket{le="500"} 1
sandbox_creation_latency_bucket{le="700"} 1
sandbox_creation_latency_bucket{le="1000"} 1
sandbox_creation_latency_bucket{le="1500"} 1
sandbox_creation_latency_bucket{le="2000"} 1
sandbox_creation_latency_bucket{le="3000"} 1
sandbox_creation_latency_bucket{le="4500"} 1
sandbox_creation_latency_bucket{le="6000"} 1
sandbox_creation_latency_bucket{le="9000"} 1
sandbox_creation_latency_bucket{le="12000"} 1
sandbox_creation_latency_bucket{le="18000"} 1
sandbox_creation_latency_bucket{le="30000"} 1
sandbox_creation_latency_bucket{le="+Inf"} 1
sandbox_creation_latency_count 1
`
err = testutil.CollectAndCompare(sandboxCreationLatency, strings.NewReader(expected), "sandbox_creation_latency")
// We ignore the error because the sum is not deterministic
if err != nil && !strings.Contains(err.Error(), "sandbox_creation_latency_sum") {
require.NoError(t, err)
}
}
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.24.4

require (
github.com/google/go-cmp v0.7.0
github.com/prometheus/client_golang v1.23.2
github.com/stretchr/testify v1.11.1
k8s.io/api v0.34.1
k8s.io/apiextensions-apiserver v0.34.1
Expand Down Expand Up @@ -43,13 +44,13 @@ require (
github.com/google/gnostic-models v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/onsi/ginkgo/v2 v2.23.3 // indirect
github.com/onsi/gomega v1.37.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.23.2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.1 // indirect
github.com/prometheus/procfs v0.17.0 // indirect
Expand Down