diff --git a/pkg/cmd/root/root.go b/pkg/cmd/root/root.go index 399535e1..92b7e5b6 100644 --- a/pkg/cmd/root/root.go +++ b/pkg/cmd/root/root.go @@ -13,6 +13,7 @@ import ( "os/exec" "path/filepath" "strings" + "time" "github.com/AlecAivazis/survey/v2/terminal" "github.com/MakeNowJust/heredoc" @@ -121,7 +122,8 @@ func NewRootCmd(f *cmdutil.Factory) *cobra.Command { return cmd } -func Execute() exitCode { +func Execute() (code exitCode) { + start := time.Now() hasDebug := os.Getenv("DEBUG") != "" hasTelemetry := os.Getenv("ALGOLIA_CLI_TELEMETRY") != "0" @@ -192,25 +194,31 @@ func Execute() exitCode { } // Send telemetry. - err = telemetryClient.Track(ctx, "Command Invoked") + err = telemetryClient.Track(ctx, telemetry.EventCommandInvoked, nil) if err != nil && hasDebug { fmt.Fprintf(stderr, "Error tracking telemetry: %s\n", err) } - go telemetryClient.Close() // flush telemetry events - return nil } // Command context is used to pass information to the telemetry client. - ctx, err := createContext(rootCmd, stderr, hasDebug, hasTelemetry) - if err != nil { - printError(stderr, err, rootCmd, hasDebug) - return exitError - } + ctx := createContext(rootCmd, stderr, hasDebug, hasTelemetry) + defer closeTelemetry(ctx) + + // Report how the command ended just before the final flush (deferred + // functions run last-in-first-out). + var executedCmd *cobra.Command + var executeErr error + var elapsed time.Duration + defer func() { + trackCommandCompleted(ctx, executedCmd, code, executeErr, elapsed) + }() - // Run the command. + // Run the command. The duration is measured right away so it never + // includes the update-notifier wait below. cmd, err := rootCmd.ExecuteContextC(ctx) + executedCmd, executeErr, elapsed = cmd, err, time.Since(start) // Handle eventual errors. if err != nil { if err == cmdutil.ErrSilent { @@ -248,31 +256,86 @@ func Execute() exitCode { return exitOK } +// trackCommandCompleted reports how the command ended: success, failure (with +// the class of the error) or user cancellation. +func trackCommandCompleted( + ctx context.Context, + cmd *cobra.Command, + code exitCode, + err error, + elapsed time.Duration, +) { + if cmd == nil || !cmdutil.ShouldTrackUsage(cmd) { + return + } + // An empty command path means PersistentPreRunE never ran (--help, + // --version, unknown flag or command, failed auth check): no Command + // Invoked was sent, so don't send an orphan Command Completed either. + metadata := telemetry.GetEventMetadata(ctx) + if metadata == nil || metadata.CommandPath == "" { + return + } + client := telemetry.GetTelemetryClient(ctx) + if client == nil { + return + } + + props := map[string]any{ + "succeeded": code == exitOK, + "exit_code": int(code), + "duration_ms": elapsed.Milliseconds(), + } + if err != nil { + props["error_class"] = telemetry.ErrorClass(err) + props["user_cancelled"] = cmdutil.IsUserCancellation(err) + } + + _ = client.Track(ctx, telemetry.EventCommandCompleted, props) +} + +// closeTelemetry flushes the pending telemetry events, giving up after a +// short timeout so an unreachable telemetry endpoint never delays exit. +func closeTelemetry(ctx context.Context) { + client := telemetry.GetTelemetryClient(ctx) + if client == nil { + return + } + done := make(chan struct{}) + go func() { + client.Close() + close(done) + }() + select { + case <-done: + case <-time.After(3 * time.Second): + } +} + // createContext creates a context with telemetry. func createContext( cmd *cobra.Command, stderr io.Writer, hasDebug bool, hasTelemetry bool, -) (context.Context, error) { +) context.Context { ctx := context.Background() telemetryMetadata := telemetry.NewEventMetadata() updatedCtx := telemetry.WithEventMetadata(ctx, telemetryMetadata) - var telemetryClient telemetry.TelemetryClient - var err error + var telemetryClient telemetry.TelemetryClient = &telemetry.NoOpTelemetryClient{} if hasTelemetry { - telemetryClient, err = telemetry.NewAnalyticsTelemetryClient(hasDebug) - // Fail silently if telemetry is not available unless in debug mode. - if err != nil && hasDebug { - fmt.Fprintf(stderr, "Error creating telemetry client: %s\n", err) - return nil, err + client, err := telemetry.NewAnalyticsTelemetryClient(hasDebug) + if err != nil { + // Fail silently (fall back to no-op telemetry) unless in debug mode. + if hasDebug { + fmt.Fprintf(stderr, "Error creating telemetry client: %s\n", err) + } + } else { + telemetryClient = client } - } else { - telemetryClient = &telemetry.NoOpTelemetryClient{} } contextWithTelemetry := telemetry.WithTelemetryClient(updatedCtx, telemetryClient) - return contextWithTelemetry, nil + return contextWithTelemetry } // printError prints an error to the stderr, with additional information if applicable. diff --git a/pkg/cmd/root/root_test.go b/pkg/cmd/root/root_test.go index 5e38dca9..da949661 100644 --- a/pkg/cmd/root/root_test.go +++ b/pkg/cmd/root/root_test.go @@ -2,16 +2,113 @@ package root import ( "bytes" + "context" "errors" "fmt" "net" "testing" + "time" "github.com/spf13/cobra" "github.com/algolia/cli/pkg/cmdutil" + "github.com/algolia/cli/pkg/telemetry" ) +// recordingTelemetryClient captures the tracked events so tests can assert on +// them without hitting the network. +type recordingTelemetryClient struct { + events []recordedEvent +} + +type recordedEvent struct { + name string + props map[string]any +} + +func (r *recordingTelemetryClient) Identify(ctx context.Context) error { return nil } + +func (r *recordingTelemetryClient) Track( + ctx context.Context, + event string, + properties map[string]any, +) error { + r.events = append(r.events, recordedEvent{event, properties}) + return nil +} + +func (r *recordingTelemetryClient) Close() {} + +func newTelemetryContext(client telemetry.TelemetryClient, commandPath string) context.Context { + metadata := telemetry.NewEventMetadata() + metadata.SetCommandPath(commandPath) + ctx := telemetry.WithEventMetadata(context.Background(), metadata) + return telemetry.WithTelemetryClient(ctx, client) +} + +func TestTrackCommandCompleted_SkipsWhenPreRunNeverRan(t *testing.T) { + client := &recordingTelemetryClient{} + // An empty command path means PersistentPreRunE never ran. + ctx := newTelemetryContext(client, "") + + trackCommandCompleted(ctx, &cobra.Command{Use: "algolia"}, exitOK, nil, time.Second) + + if len(client.events) != 0 { + t.Errorf("expected no event, got %d", len(client.events)) + } +} + +func TestTrackCommandCompleted_ReportsSuccess(t *testing.T) { + client := &recordingTelemetryClient{} + ctx := newTelemetryContext(client, "algolia indices list") + + trackCommandCompleted(ctx, &cobra.Command{Use: "list"}, exitOK, nil, 1500*time.Millisecond) + + if len(client.events) != 1 { + t.Fatalf("expected 1 event, got %d", len(client.events)) + } + event := client.events[0] + if event.name != telemetry.EventCommandCompleted { + t.Errorf("event = %q, want %q", event.name, telemetry.EventCommandCompleted) + } + if event.props["succeeded"] != true { + t.Errorf("succeeded = %v, want true", event.props["succeeded"]) + } + if event.props["exit_code"] != 0 { + t.Errorf("exit_code = %v, want 0", event.props["exit_code"]) + } + if event.props["duration_ms"] != int64(1500) { + t.Errorf("duration_ms = %v, want 1500", event.props["duration_ms"]) + } + if _, ok := event.props["error_class"]; ok { + t.Error("unexpected error_class on success") + } +} + +func TestTrackCommandCompleted_ReportsFailure(t *testing.T) { + client := &recordingTelemetryClient{} + ctx := newTelemetryContext(client, "algolia indices list") + + trackCommandCompleted(ctx, &cobra.Command{Use: "list"}, exitError, errors.New("boom"), time.Second) + + if len(client.events) != 1 { + t.Fatalf("expected 1 event, got %d", len(client.events)) + } + props := client.events[0].props + if props["succeeded"] != false { + t.Errorf("succeeded = %v, want false", props["succeeded"]) + } + if props["exit_code"] != 1 { + t.Errorf("exit_code = %v, want 1", props["exit_code"]) + } + if props["error_class"] != "*errors.errorString" { + t.Errorf("error_class = %v, want *errors.errorString", props["error_class"]) + } + if props["user_cancelled"] != false { + t.Errorf("user_cancelled = %v, want false", props["user_cancelled"]) + } +} + func TestPrintError(t *testing.T) { cmd := &cobra.Command{} diff --git a/pkg/telemetry/events.go b/pkg/telemetry/events.go new file mode 100644 index 00000000..6d18201f --- /dev/null +++ b/pkg/telemetry/events.go @@ -0,0 +1,127 @@ +package telemetry + +import ( + "errors" + "fmt" + "time" +) + +// Event names. New flow events follow the `CLI ` convention; +// the command lifecycle events stay unprefixed for consistency with the +// historical "Command Invoked". +const ( + EventCommandInvoked = "Command Invoked" + EventCommandCompleted = "Command Completed" + + EventAuthStarted = "CLI Auth Started" + EventAuthCompleted = "CLI Auth Completed" + EventAuthFailed = "CLI Auth Failed" + EventAuthAborted = "CLI Auth Aborted" + + EventApplicationCreateStarted = "CLI Application Create Started" + EventApplicationCreateAcceptedTerms = "CLI Application Create Accepted Terms" + EventApplicationCreateDeclinedTerms = "CLI Application Create Declined Terms" + EventApplicationCreateCompleted = "CLI Application Create Completed" + EventApplicationCreateFailed = "CLI Application Create Failed" + EventApplicationCreateAborted = "CLI Application Create Aborted" + + EventApplicationPlanChangeStarted = "CLI Application Plan Change Started" + EventApplicationPlanChangeAcceptedTerms = "CLI Application Plan Change Accepted Terms" + EventApplicationPlanChangeDeclinedTerms = "CLI Application Plan Change Declined Terms" + EventApplicationPlanChangeCompleted = "CLI Application Plan Change Completed" + EventApplicationPlanChangeFailed = "CLI Application Plan Change Failed" + EventApplicationPlanChangeAborted = "CLI Application Plan Change Aborted" +) + +// Flow is the kind of auth flow the user is going through. +type Flow string + +const ( + FlowLogin Flow = "login" + FlowSignup Flow = "signup" +) + +// Step locates where the user is inside an interactive flow, so aborts and +// failures can tell where the user stopped. +type Step string + +const ( + // Auth flow steps. + StepBrowserWait Step = "browser_wait" + StepCodeExchange Step = "code_exchange" + StepAppsFetch Step = "apps_fetch" + StepAppSelect Step = "app_select" + StepAppCreate Step = "app_create" + StepProfileConfigure Step = "profile_configure" + + // Application create and plan change flow steps. + StepName Step = "name" + StepPlan Step = "plan" + StepTerms Step = "terms" + StepRegion Step = "region" + StepAPICall Step = "api_call" + StepApplyPlan Step = "apply_plan" +) + +// Direction is the direction of a plan change. +type Direction string + +const ( + DirectionUpgrade Direction = "upgrade" + DirectionDowngrade Direction = "downgrade" +) + +// FlowTracker carries the state of one interactive flow: the step the user is +// currently in and the flow start time, to compute durations. All its methods +// are safe on a nil tracker, so helpers shared by several flows can take an +// optional tracker. +type FlowTracker struct { + start time.Time + step Step +} + +func NewFlowTracker() *FlowTracker { + return &FlowTracker{start: time.Now()} +} + +// SetStep records the step the flow is entering. +func (f *FlowTracker) SetStep(step Step) { + if f == nil { + return + } + f.step = step +} + +// Step returns the step the flow is currently in. +func (f *FlowTracker) Step() Step { + if f == nil { + return "" + } + return f.step +} + +// DurationMS returns the time elapsed since the flow started, in milliseconds. +func (f *FlowTracker) DurationMS() int64 { + if f == nil { + return 0 + } + return time.Since(f.start).Milliseconds() +} + +// ErrorClass returns the type of the first informative error of the chain, +// skipping the anonymous wrappers created by fmt.Errorf. It never returns an +// error message, which could contain user data. +func ErrorClass(err error) string { + for err != nil { + class := fmt.Sprintf("%T", err) + switch class { + case "*fmt.wrapError", "*fmt.wrapErrors", "*errors.joinError": + if unwrapped := errors.Unwrap(err); unwrapped != nil { + err = unwrapped + continue + } + } + return class + } + return "" +} diff --git a/pkg/telemetry/events_test.go b/pkg/telemetry/events_test.go new file mode 100644 index 00000000..61c5a9ee --- /dev/null +++ b/pkg/telemetry/events_test.go @@ -0,0 +1,45 @@ +package telemetry + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +type testRootError struct{} + +func (testRootError) Error() string { return "boom" } + +type testWrapperError struct{ inner error } + +func (e testWrapperError) Error() string { return "wrap" } +func (e testWrapperError) Unwrap() error { return e.inner } + +func TestErrorClass_SkipsFmtWrappers(t *testing.T) { + wrapped := fmt.Errorf("outer: %w", fmt.Errorf("inner: %w", testRootError{})) + assert.Equal(t, "telemetry.testRootError", ErrorClass(wrapped)) +} + +func TestErrorClass_KeepsInformativeWrapperType(t *testing.T) { + wrapped := fmt.Errorf("outer: %w", testWrapperError{inner: testRootError{}}) + assert.Equal(t, "telemetry.testWrapperError", ErrorClass(wrapped)) +} + +func TestErrorClass_NilError(t *testing.T) { + assert.Equal(t, "", ErrorClass(nil)) +} + +func TestFlowTracker_NilTrackerIsSafe(t *testing.T) { + var tracker *FlowTracker + tracker.SetStep(StepTerms) + assert.Equal(t, Step(""), tracker.Step()) + assert.Equal(t, int64(0), tracker.DurationMS()) +} + +func TestFlowTracker_TracksStepAndDuration(t *testing.T) { + tracker := NewFlowTracker() + tracker.SetStep(StepPlan) + assert.Equal(t, StepPlan, tracker.Step()) + assert.GreaterOrEqual(t, tracker.DurationMS(), int64(0)) +} diff --git a/pkg/telemetry/telemetry.go b/pkg/telemetry/telemetry.go index f81abe75..9330d055 100644 --- a/pkg/telemetry/telemetry.go +++ b/pkg/telemetry/telemetry.go @@ -8,6 +8,7 @@ import ( "net" "os" "runtime" + "sync/atomic" "github.com/segmentio/analytics-go/v3" "github.com/spf13/cobra" @@ -29,12 +30,16 @@ type telemetryClientKey struct{} type TelemetryClient interface { Identify(ctx context.Context) error - Track(ctx context.Context, event string) error + Track(ctx context.Context, event string, properties map[string]any) error Close() } type AnalyticsTelemetryClient struct { client analytics.Client + // sequence numbers the Track events of one invocation so their order can + // be reconstructed downstream: Segment stores timestamps with millisecond + // precision, so back-to-back events can tie. + sequence atomic.Int64 } type AnalyticsTelemetryLogger struct { @@ -63,6 +68,7 @@ func NewAnalyticsTelemetryClient(debug bool) (TelemetryClient, error) { client, err := analytics.NewWithConfig("", analytics.Config{ Endpoint: telemetryAnalyticsURL, Logger: newTelemetryLogger(debug), + Verbose: debug, }) if err != nil { return nil, err @@ -236,19 +242,30 @@ func (a *AnalyticsTelemetryClient) Identify(ctx context.Context) error { return a.client.Enqueue(identify) } -// Track tracks the event with the provided properties -func (a *AnalyticsTelemetryClient) Track(ctx context.Context, event string) error { +// Track tracks the event with the provided custom properties, merged with the +// base properties of the invocation +func (a *AnalyticsTelemetryClient) Track( + ctx context.Context, + event string, + properties map[string]any, +) error { metadata := GetEventMetadata(ctx) + props := make(map[string]any, len(properties)+5) + for k, v := range properties { + props[k] = v + } + // Base properties are set last so custom ones can never override them. + props["invocation_id"] = metadata.InvocationID + props["app_id"] = metadata.AppID + props["command"] = metadata.CommandPath + props["flags"] = metadata.CommandFlags + props["sequence"] = a.sequence.Add(1) + track := analytics.Track{ Event: event, AnonymousId: metadata.AnonymousID, - Properties: map[string]interface{}{ - "invocation_id": metadata.InvocationID, - "app_id": metadata.AppID, - "command": metadata.CommandPath, - "flags": metadata.CommandFlags, - }, + Properties: props, Context: &analytics.Context{ Device: analytics.DeviceInfo{ Id: metadata.AnonymousID, @@ -268,6 +285,14 @@ func (a *AnalyticsTelemetryClient) Close() { _ = a.client.Close() } -func (a *NoOpTelemetryClient) Identify(ctx context.Context) error { return nil } -func (a *NoOpTelemetryClient) Track(ctx context.Context, event string) error { return nil } -func (a *NoOpTelemetryClient) Close() {} +func (a *NoOpTelemetryClient) Identify(ctx context.Context) error { return nil } + +func (a *NoOpTelemetryClient) Track( + ctx context.Context, + event string, + properties map[string]any, +) error { + return nil +} + +func (a *NoOpTelemetryClient) Close() {} diff --git a/pkg/telemetry/telemetry_test.go b/pkg/telemetry/telemetry_test.go index 8e381464..d9445ce2 100644 --- a/pkg/telemetry/telemetry_test.go +++ b/pkg/telemetry/telemetry_test.go @@ -2,6 +2,7 @@ package telemetry import ( "context" + "sync" "testing" "github.com/segmentio/analytics-go/v3" @@ -76,10 +77,13 @@ func TestSetUser(t *testing.T) { // fakeAnalyticsClient captures the messages enqueued by the telemetry client so // tests can assert on the payload without hitting the network. type fakeAnalyticsClient struct { + mu sync.Mutex messages []analytics.Message } func (f *fakeAnalyticsClient) Enqueue(msg analytics.Message) error { + f.mu.Lock() + defer f.mu.Unlock() f.messages = append(f.messages, msg) return nil } @@ -130,7 +134,7 @@ func TestTrack_IncludesUserWhenAuthenticated(t *testing.T) { metadata.SetUser("user-42", "user@test.com", "Test User") ctx := WithEventMetadata(context.Background(), metadata) - require.NoError(t, client.Track(ctx, "Command Invoked")) + require.NoError(t, client.Track(ctx, "Command Invoked", nil)) require.Len(t, fake.messages, 1) track, ok := fake.messages[0].(analytics.Track) @@ -146,10 +150,96 @@ func TestTrack_OmitsUserWhenAnonymous(t *testing.T) { metadata := NewEventMetadata() ctx := WithEventMetadata(context.Background(), metadata) - require.NoError(t, client.Track(ctx, "Command Invoked")) + require.NoError(t, client.Track(ctx, "Command Invoked", nil)) require.Len(t, fake.messages, 1) track, ok := fake.messages[0].(analytics.Track) require.True(t, ok) assert.Empty(t, track.UserId) } + +func TestTrack_MergesCustomProperties(t *testing.T) { + fake := &fakeAnalyticsClient{} + client := &AnalyticsTelemetryClient{client: fake} + + metadata := NewEventMetadata() + metadata.SetAppID("app-id") + ctx := WithEventMetadata(context.Background(), metadata) + + require.NoError(t, client.Track(ctx, "CLI Auth Started", map[string]any{"flow": "login"})) + require.Len(t, fake.messages, 1) + + track, ok := fake.messages[0].(analytics.Track) + require.True(t, ok) + assert.Equal(t, "login", track.Properties["flow"]) + assert.Equal(t, metadata.InvocationID, track.Properties["invocation_id"]) + assert.Equal(t, "app-id", track.Properties["app_id"]) +} + +func TestTrack_SequenceIsMonotonic(t *testing.T) { + fake := &fakeAnalyticsClient{} + client := &AnalyticsTelemetryClient{client: fake} + + metadata := NewEventMetadata() + ctx := WithEventMetadata(context.Background(), metadata) + + for i := 0; i < 3; i++ { + require.NoError(t, client.Track(ctx, "Command Invoked", nil)) + } + require.Len(t, fake.messages, 3) + + for i, msg := range fake.messages { + track, ok := msg.(analytics.Track) + require.True(t, ok) + assert.Equal(t, int64(i+1), track.Properties["sequence"]) + } +} + +func TestTrack_SequenceIsUniqueUnderConcurrency(t *testing.T) { + fake := &fakeAnalyticsClient{} + client := &AnalyticsTelemetryClient{client: fake} + + metadata := NewEventMetadata() + ctx := WithEventMetadata(context.Background(), metadata) + + const n = 100 + var wg sync.WaitGroup + for i := 0; i < n; i++ { + wg.Add(1) + go func() { + defer wg.Done() + _ = client.Track(ctx, "Command Invoked", nil) + }() + } + wg.Wait() + + require.Len(t, fake.messages, n) + seen := make(map[int64]bool, n) + for _, msg := range fake.messages { + track, ok := msg.(analytics.Track) + require.True(t, ok) + seq, ok := track.Properties["sequence"].(int64) + require.True(t, ok) + assert.False(t, seen[seq], "duplicate sequence %d", seq) + seen[seq] = true + } +} + +func TestTrack_CustomPropertiesCannotOverrideBase(t *testing.T) { + fake := &fakeAnalyticsClient{} + client := &AnalyticsTelemetryClient{client: fake} + + metadata := NewEventMetadata() + ctx := WithEventMetadata(context.Background(), metadata) + + require.NoError(t, client.Track(ctx, "Command Invoked", map[string]any{ + "invocation_id": "spoofed", + "sequence": int64(999), + })) + require.Len(t, fake.messages, 1) + + track, ok := fake.messages[0].(analytics.Track) + require.True(t, ok) + assert.Equal(t, metadata.InvocationID, track.Properties["invocation_id"]) + assert.Equal(t, int64(1), track.Properties["sequence"]) +}