diff --git a/acceptance/bundle/telemetry/deploy/script b/acceptance/bundle/telemetry/deploy/script index de0ac667187..6b9fa7dcb98 100644 --- a/acceptance/bundle/telemetry/deploy/script +++ b/acceptance/bundle/telemetry/deploy/script @@ -10,7 +10,10 @@ trace cat telemetry.json | jq ' .entry.databricks_cli_log.bundle_deploy_event.ex # bundle_mutator_execution_time_ms can have variable number of entries depending upon the runtime of the mutators. Thus we omit it from # being asserted here. -cat telemetry.json | jq 'del(.entry.databricks_cli_log.bundle_deploy_event.experimental.bundle_mutator_execution_time_ms)' > out.telemetry.txt +# resources_metadata is only emitted for direct deploys (it reads the direct +# engine's resources.json), so it diverges across the DATABRICKS_BUNDLE_ENGINE +# matrix; omit it here to keep this golden engine-agnostic. +cat telemetry.json | jq 'del(.entry.databricks_cli_log.bundle_deploy_event.experimental.bundle_mutator_execution_time_ms, .entry.databricks_cli_log.bundle_deploy_event.resources_metadata)' > out.telemetry.txt cmd_exec_id=$(extract_command_exec_id.py) deployment_id=$(cat .databricks/bundle/default/deployment.json | jq -r .id) diff --git a/bundle/phases/resources_metadata.go b/bundle/phases/resources_metadata.go new file mode 100644 index 00000000000..2411d117850 --- /dev/null +++ b/bundle/phases/resources_metadata.go @@ -0,0 +1,147 @@ +package phases + +import ( + "context" + "encoding/json" + "errors" + "io/fs" + "os" + "slices" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/direct/dstate" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/telemetry/protos" +) + +// collectResourcesMetadata builds a BundleResourcesMetadata from the direct +// engine's deployment state file. +// +// Only direct deploys are measured. The direct engine persists each resource's +// state as a JSON blob in resources.json, so per-resource sizes are read off +// directly as len(entry.State) — no serialization or config-walking is done at +// telemetry time. Terraform stores state in a different shape and is not +// collected; for those deploys this returns nil. +// +// Telemetry must never fail a deploy, so a missing or unparseable state file is +// logged at debug level and treated as no data (returns nil). +func collectResourcesMetadata(ctx context.Context, b *bundle.Bundle) *protos.BundleResourcesMetadata { + // resource_*_count covers both engines; this state-size metadata only + // applies to the direct engine's resources.json. + if resolveDeployEngine(ctx, b) != string(engine.EngineDirect) { + return nil + } + + // A target must be selected to resolve the local state path. + if b.Target == nil { + return nil + } + + _, localPath := b.StateFilenameDirect(ctx) + raw, err := os.ReadFile(localPath) + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + log.Debugf(ctx, "resources-metadata telemetry: skipping direct state at %s: %s", localPath, err) + } + return nil + } + + var db dstate.Database + if err := json.Unmarshal(raw, &db); err != nil { + log.Debugf(ctx, "resources-metadata telemetry: failed to parse direct state: %s", err) + return nil + } + + counts := make(map[string]int64) + sizesByType := make(map[string][]int64) + for key, entry := range db.State { + t := resourceTypeFromKey(key) + if t == "" { + continue + } + counts[t]++ + sizesByType[t] = append(sizesByType[t], int64(len(entry.State))) + } + if len(counts) == 0 { + return nil + } + + types := make([]string, 0, len(counts)) + for t := range counts { + types = append(types, t) + } + slices.Sort(types) + + resources := make([]protos.ResourceMetadata, 0, len(types)) + for _, t := range types { + sizes := sizesByType[t] + slices.Sort(sizes) + resources = append(resources, protos.ResourceMetadata{ + ResourceType: t, + Count: counts[t], + StateSizeMaxBytes: statMax(sizes), + StateSizeMeanBytes: statMean(sizes), + StateSizeMedianBytes: statMedian(sizes), + }) + } + + return &protos.BundleResourcesMetadata{ + StateEngine: string(engine.EngineDirect), + StateFileSizeBytes: int64(len(raw)), + Resources: resources, + } +} + +// resourceTypeFromKey extracts the resource type from a direct-engine state +// key. Keys are "resources..", or "resources..." +// for sub-resources like permissions / grants / secret_acls. Sub-resources are +// tracked under the sub-resource type so they aggregate across resource +// families. Returns "" for keys that don't match. +func resourceTypeFromKey(key string) string { + parts := strings.SplitN(key, ".", 4) + if len(parts) < 3 || parts[0] != "resources" { + return "" + } + if len(parts) == 4 { + return parts[3] + } + return parts[1] +} + +// resolveDeployEngine returns the effective deploy engine ("direct" or +// "terraform"). Mirrors cmd/bundle/utils.ResolveEngineSetting but is inlined +// here to avoid a layering import (bundle/phases must not depend on cmd/). +func resolveDeployEngine(ctx context.Context, b *bundle.Bundle) string { + if b.Config.Bundle.Engine != engine.EngineNotSet { + return string(b.Config.Bundle.Engine.ThisOrDefault()) + } + envEngine, _ := engine.FromEnv(ctx) + return string(envEngine.ThisOrDefault()) +} + +func statMax(sortedSizes []int64) int64 { + if len(sortedSizes) == 0 { + return 0 + } + return sortedSizes[len(sortedSizes)-1] +} + +func statMean(sortedSizes []int64) int64 { + if len(sortedSizes) == 0 { + return 0 + } + var total int64 + for _, s := range sortedSizes { + total += s + } + return total / int64(len(sortedSizes)) +} + +func statMedian(sortedSizes []int64) int64 { + if len(sortedSizes) == 0 { + return 0 + } + return sortedSizes[(len(sortedSizes)-1)/2] +} diff --git a/bundle/phases/resources_metadata_test.go b/bundle/phases/resources_metadata_test.go new file mode 100644 index 00000000000..1089e0b4700 --- /dev/null +++ b/bundle/phases/resources_metadata_test.go @@ -0,0 +1,151 @@ +package phases + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/direct/dstate" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/telemetry/protos" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestResourceTypeFromKey(t *testing.T) { + cases := []struct { + in string + want string + }{ + {"resources.jobs.foo", "jobs"}, + {"resources.pipelines.bar", "pipelines"}, + {"resources.jobs.foo.permissions", "permissions"}, + {"resources.secret_scopes.s.permissions", "permissions"}, + {"not-a-state-key", ""}, + {"resources.jobs", ""}, + } + for _, c := range cases { + assert.Equal(t, c.want, resourceTypeFromKey(c.in), "key=%q", c.in) + } +} + +func TestResolveDeployEngine(t *testing.T) { + cases := []struct { + name string + configEng engine.EngineType + envEng string + want string + }{ + {"config wins over env", engine.EngineDirect, "terraform", "direct"}, + {"env used when config unset", engine.EngineNotSet, "direct", "direct"}, + {"default when neither set", engine.EngineNotSet, "", "terraform"}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + b := &bundle.Bundle{} + b.Config.Bundle.Engine = c.configEng + ctx := env.Set(t.Context(), engine.EnvVar, c.envEng) + assert.Equal(t, c.want, resolveDeployEngine(ctx, b)) + }) + } +} + +func TestStatHelpers(t *testing.T) { + assert.Equal(t, int64(3), statMax([]int64{1, 2, 3})) + assert.Equal(t, int64(2), statMean([]int64{1, 2, 3})) + assert.Equal(t, int64(2), statMedian([]int64{1, 2, 3})) + // Lower-middle for even count: sorted [1,2,3,4] -> index (4-1)/2 = 1 -> 2. + assert.Equal(t, int64(2), statMedian([]int64{1, 2, 3, 4})) + assert.Equal(t, int64(0), statMax(nil)) + assert.Equal(t, int64(0), statMean(nil)) + assert.Equal(t, int64(0), statMedian(nil)) +} + +// directStateBundle writes a resources.json with the given per-key state blobs +// and returns a bundle wired to read it via StateFilenameDirect. +func directStateBundle(t *testing.T, state map[string]dstate.ResourceEntry) *bundle.Bundle { + t.Helper() + b := &bundle.Bundle{ + BundleRootPath: t.TempDir(), + Config: config.Root{ + Bundle: config.Bundle{ + Engine: engine.EngineDirect, + Target: "default", + }, + Workspace: config.Workspace{ + StatePath: "/Workspace/state", + }, + }, + Target: &config.Target{}, + } + _, localPath := b.StateFilenameDirect(t.Context()) + require.NoError(t, os.MkdirAll(filepath.Dir(localPath), 0o755)) + raw, err := json.Marshal(dstate.Database{State: state}) + require.NoError(t, err) + require.NoError(t, os.WriteFile(localPath, raw, 0o600)) + return b +} + +func TestCollectResourcesMetadata_GroupsByTypeFromState(t *testing.T) { + b := directStateBundle(t, map[string]dstate.ResourceEntry{ + "resources.jobs.foo": {State: json.RawMessage(`{"name":"foo","x":1}`)}, // 20 + "resources.jobs.bar": {State: json.RawMessage(`{"n":"bar"}`)}, // 11 + "resources.jobs.foo.permissions": {State: json.RawMessage(`[]`)}, // 2 + "resources.pipelines.qux": {State: json.RawMessage(`{"name":"qux"}`)}, // 14 + }) + + md := collectResourcesMetadata(t.Context(), b) + require.NotNil(t, md) + assert.Equal(t, "direct", md.StateEngine) + assert.Positive(t, md.StateFileSizeBytes) + + byType := make(map[string]protos.ResourceMetadata) + for _, r := range md.Resources { + byType[r.ResourceType] = r + } + assert.Equal(t, int64(2), byType["jobs"].Count) + assert.Equal(t, int64(20), byType["jobs"].StateSizeMaxBytes) + assert.Equal(t, int64(15), byType["jobs"].StateSizeMeanBytes) // (20+11)/2 + assert.Equal(t, int64(11), byType["jobs"].StateSizeMedianBytes) + assert.Equal(t, int64(1), byType["pipelines"].Count) + assert.Equal(t, int64(1), byType["permissions"].Count) +} + +func TestCollectResourcesMetadata_NilForTerraform(t *testing.T) { + b := directStateBundle(t, map[string]dstate.ResourceEntry{ + "resources.jobs.foo": {State: json.RawMessage(`{}`)}, + }) + b.Config.Bundle.Engine = engine.EngineTerraform + assert.Nil(t, collectResourcesMetadata(t.Context(), b)) +} + +func TestCollectResourcesMetadata_NilWhenNoStateFile(t *testing.T) { + b := &bundle.Bundle{ + BundleRootPath: t.TempDir(), + Config: config.Root{ + Bundle: config.Bundle{Engine: engine.EngineDirect, Target: "default"}, + Workspace: config.Workspace{StatePath: "/Workspace/state"}, + }, + Target: &config.Target{}, + } + assert.Nil(t, collectResourcesMetadata(t.Context(), b)) +} + +func TestCollectResourcesMetadata_NilOnMalformedState(t *testing.T) { + b := &bundle.Bundle{ + BundleRootPath: t.TempDir(), + Config: config.Root{ + Bundle: config.Bundle{Engine: engine.EngineDirect, Target: "default"}, + Workspace: config.Workspace{StatePath: "/Workspace/state"}, + }, + Target: &config.Target{}, + } + _, localPath := b.StateFilenameDirect(t.Context()) + require.NoError(t, os.MkdirAll(filepath.Dir(localPath), 0o755)) + require.NoError(t, os.WriteFile(localPath, []byte("not json"), 0o600)) + assert.Nil(t, collectResourcesMetadata(t.Context(), b)) +} diff --git a/bundle/phases/telemetry.go b/bundle/phases/telemetry.go index e64b736fc72..cac25eceee1 100644 --- a/bundle/phases/telemetry.go +++ b/bundle/phases/telemetry.go @@ -202,6 +202,8 @@ func LogDeployTelemetry(ctx context.Context, b *bundle.Bundle, errMsg string) { ResourceClusterIDs: clusterIds, ResourceDashboardIDs: dashboardIds, + ResourcesMetadata: collectResourcesMetadata(ctx, b), + Experimental: &protos.BundleDeployExperimental{ BundleMode: mode, ConfigurationFileCount: b.Metrics.ConfigurationFileCount, diff --git a/libs/telemetry/protos/bundle_deploy.go b/libs/telemetry/protos/bundle_deploy.go index d9439437d9b..73562782f1a 100644 --- a/libs/telemetry/protos/bundle_deploy.go +++ b/libs/telemetry/protos/bundle_deploy.go @@ -32,6 +32,9 @@ type BundleDeployEvent struct { ResourceClusterIDs []string `json:"resource_cluster_ids,omitempty"` ResourceDashboardIDs []string `json:"resource_dashboard_ids,omitempty"` + // Per-resource-type metadata (counts and state-size statistics). + ResourcesMetadata *BundleResourcesMetadata `json:"resources_metadata,omitempty"` + Experimental *BundleDeployExperimental `json:"experimental,omitempty"` } @@ -88,6 +91,40 @@ type BundleDeployExperimental struct { LocalCacheMeasurementsMs []IntMapEntry `json:"local_cache_measurements_ms,omitempty"` } +// BundleResourcesMetadata mirrors the universe proto. Per-resource-type +// state-size metadata for one bundle deployment. +// +// Only direct deploys are measured: the direct engine persists each resource's +// state as a JSON blob in resources.json, so sizes are read off directly as +// len(state) — no serialization happens at telemetry time. Terraform stores +// state in a different shape and is not collected (the field is absent there). +type BundleResourcesMetadata struct { + // Always "direct"; terraform deploys do not populate this message. + StateEngine string `json:"state_engine,omitempty"` + + // Size in bytes of the direct engine's resources.json state file on disk. + StateFileSizeBytes int64 `json:"state_file_size_bytes,omitempty"` + + // One entry per resource type present in the deployment state. + Resources []ResourceMetadata `json:"resources,omitempty"` +} + +// ResourceMetadata holds metadata about resources of a single type within one +// bundle deployment. +type ResourceMetadata struct { + // Resource type name: "jobs", "pipelines", "schemas", ... + ResourceType string `json:"resource_type,omitempty"` + + // Number of resources of this type tracked in the deployment state. + Count int64 `json:"count,omitempty"` + + // State-size statistics across resources of this type, each measured as + // len(state) of the JSON blob stored in resources.json. + StateSizeMaxBytes int64 `json:"state_size_max_bytes,omitempty"` + StateSizeMeanBytes int64 `json:"state_size_mean_bytes,omitempty"` + StateSizeMedianBytes int64 `json:"state_size_median_bytes,omitempty"` +} + type BoolMapEntry struct { Key string `json:"key,omitempty"` Value bool `json:"value"`