Skip to content

Commit 2eededb

Browse files
Add trace context propagation in atomic steps
2 parents b310419 + 2941b44 commit 2eededb

12 files changed

+400
-42
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright The Original Author or Authors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package io.jenkins.plugins.opentelemetry.init;
7+
8+
import hudson.Extension;
9+
import hudson.util.ClassLoaderSanityThreadFactory;
10+
import hudson.util.DaemonThreadFactory;
11+
import hudson.util.NamingThreadFactory;
12+
import io.jenkins.plugins.opentelemetry.api.OpenTelemetryLifecycleListener;
13+
import io.opentelemetry.context.Context;
14+
import io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties;
15+
import org.jenkinsci.plugins.workflow.steps.SynchronousNonBlockingStepExecution;
16+
17+
import javax.annotation.Nonnull;
18+
import java.lang.reflect.Field;
19+
import java.util.Arrays;
20+
import java.util.Optional;
21+
import java.util.concurrent.ExecutorService;
22+
import java.util.concurrent.Executors;
23+
import java.util.logging.Level;
24+
import java.util.logging.Logger;
25+
26+
@Extension
27+
public class StepExecutionInstrumentationInitializer implements OpenTelemetryLifecycleListener {
28+
29+
final static Logger logger = Logger.getLogger(StepExecutionInstrumentationInitializer.class.getName());
30+
31+
@Override
32+
public void afterConfiguration(@Nonnull ConfigProperties configProperties) {
33+
try {
34+
logger.log(Level.FINE, () -> "Instrumenting " + SynchronousNonBlockingStepExecution.class.getName() + "...");
35+
Class<SynchronousNonBlockingStepExecution> synchronousNonBlockingStepExecutionClass = SynchronousNonBlockingStepExecution.class;
36+
Arrays.stream(synchronousNonBlockingStepExecutionClass.getDeclaredFields()).forEach(field -> logger.log(Level.FINE, () -> "Field: " + field.getName()));
37+
Field executorServiceField = synchronousNonBlockingStepExecutionClass.getDeclaredField("executorService");
38+
executorServiceField.setAccessible(true);
39+
ExecutorService executorService = (ExecutorService) Optional.ofNullable(executorServiceField.get(null)).orElseGet(() -> Executors.newCachedThreadPool(new NamingThreadFactory(new ClassLoaderSanityThreadFactory(new DaemonThreadFactory()), "org.jenkinsci.plugins.workflow.steps.SynchronousNonBlockingStepExecution")));
40+
ExecutorService instrumentedExecutorService = Context.taskWrapping(executorService);
41+
executorServiceField.set(null, instrumentedExecutorService);
42+
43+
// org.jenkinsci.plugins.workflow.cps.CpsThreadGroup.runner
44+
} catch (NoSuchFieldException | IllegalAccessException e) {
45+
throw new RuntimeException(e);
46+
}
47+
}
48+
}

src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public class MonitoringAction extends AbstractMonitoringAction implements Action
5050
private transient Run run;
5151

5252
public MonitoringAction(Span span) {
53-
super(span);
53+
super(span, Collections.emptyList());
5454
this.rootSpanName = super.getSpanName();
5555
this.rootContext = super.getW3cTraceContext();
5656
}

src/main/java/io/jenkins/plugins/opentelemetry/job/MonitoringPipelineListener.java

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -200,38 +200,37 @@ public void onAtomicStep(@NonNull StepAtomNode node, @NonNull WorkflowRun run) {
200200
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - don't create span for step '" + node.getDisplayFunctionName() + "'");
201201
return;
202202
}
203-
try (Scope ignored = setupContext(run, node)) {
204-
verifyNotNull(ignored, "%s - No span found for node %s", run, node);
203+
Scope encapsulatingNodeScope = setupContext(run, node);
205204

206-
String principal = Objects.toString(node.getExecution().getAuthentication().getPrincipal(), "#null#");
207-
LOGGER.log(Level.FINE, () -> node.getDisplayFunctionName() + " - principal: " + principal);
205+
verifyNotNull(encapsulatingNodeScope, "%s - No span found for node %s", run, node);
208206

209-
StepHandler stepHandler = getStepHandlers().stream().filter(sh -> sh.canCreateSpanBuilder(node, run)).findFirst()
210-
.orElseThrow((Supplier<RuntimeException>) () ->
211-
new IllegalStateException("No StepHandler found for node " + node.getClass() + " - " + node + " on " + run));
212-
SpanBuilder spanBuilder = stepHandler.createSpanBuilder(node, run, getTracer());
207+
String principal = Objects.toString(node.getExecution().getAuthentication().getPrincipal(), "#null#");
213208

214-
String stepType = getStepType(node, node.getDescriptor(), JenkinsOtelSemanticAttributes.STEP_NAME);
215-
JenkinsOpenTelemetryPluginConfiguration.StepPlugin stepPlugin = JenkinsOpenTelemetryPluginConfiguration.get().findStepPluginOrDefault(stepType, node);
209+
StepHandler stepHandler = getStepHandlers().stream().filter(sh -> sh.canCreateSpanBuilder(node, run)).findFirst()
210+
.orElseThrow((Supplier<RuntimeException>) () ->
211+
new IllegalStateException("No StepHandler found for node " + node.getClass() + " - " + node + " on " + run));
212+
SpanBuilder spanBuilder = stepHandler.createSpanBuilder(node, run, getTracer());
216213

217-
spanBuilder
218-
.setParent(Context.current()) // TODO can we remove this call?
219-
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_TYPE, stepType)
220-
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_ID, node.getId())
221-
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_NAME, getStepName(node, JenkinsOtelSemanticAttributes.STEP_NAME))
222-
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_USER, principal)
223-
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_PLUGIN_NAME, stepPlugin.getName())
224-
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_PLUGIN_VERSION, stepPlugin.getVersion());
214+
String stepType = getStepType(node, node.getDescriptor(), JenkinsOtelSemanticAttributes.STEP_NAME);
215+
JenkinsOpenTelemetryPluginConfiguration.StepPlugin stepPlugin = JenkinsOpenTelemetryPluginConfiguration.get().findStepPluginOrDefault(stepType, node);
225216

226-
Span atomicStepSpan = spanBuilder.startSpan();
227-
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - > " + node.getDisplayFunctionName() + " - begin " + OtelUtils.toDebugString(atomicStepSpan));
228-
try (Scope ignored2 = atomicStepSpan.makeCurrent()) {
229-
stepHandler.afterSpanCreated(node, run);
230-
}
231-
getTracerService().putSpan(run, atomicStepSpan, node);
232-
}
217+
spanBuilder
218+
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_TYPE, stepType)
219+
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_ID, node.getId())
220+
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_NAME, getStepName(node, JenkinsOtelSemanticAttributes.STEP_NAME))
221+
.setAttribute(JenkinsOtelSemanticAttributes.CI_PIPELINE_RUN_USER, principal)
222+
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_PLUGIN_NAME, stepPlugin.getName())
223+
.setAttribute(JenkinsOtelSemanticAttributes.JENKINS_STEP_PLUGIN_VERSION, stepPlugin.getVersion());
224+
225+
Span atomicStepSpan = spanBuilder.startSpan();
226+
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - > " + node.getDisplayFunctionName() + " - begin " + OtelUtils.toDebugString(atomicStepSpan));
227+
Scope atomicStepScope = atomicStepSpan.makeCurrent();
228+
stepHandler.afterSpanCreated(node, run);
229+
230+
getTracerService().putSpanAndScopes(run, atomicStepSpan, node, Arrays.asList(encapsulatingNodeScope, atomicStepScope));
233231
}
234232

233+
235234
@Override
236235
public void onAfterAtomicStep(@NonNull StepAtomNode node, FlowNode nextNode, @NonNull WorkflowRun run) {
237236
if (isIgnoredStep(node.getDescriptor())){
@@ -371,7 +370,7 @@ private void endCurrentSpan(FlowNode node, WorkflowRun run, GenericStatus status
371370
span.end();
372371
LOGGER.log(Level.FINE, () -> run.getFullDisplayName() + " - < " + node.getDisplayFunctionName() + " - end " + OtelUtils.toDebugString(span));
373372

374-
getTracerService().removePipelineStepSpan(run, node, span);
373+
getTracerService().removePipelineStepSpanAndCloseAssociatedScopes(run, node, span);
375374
}
376375
}
377376

src/main/java/io/jenkins/plugins/opentelemetry/job/OtelTraceService.java

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import io.jenkins.plugins.opentelemetry.semconv.JenkinsOtelSemanticAttributes;
2424
import io.opentelemetry.api.common.AttributeKey;
2525
import io.opentelemetry.api.trace.Span;
26+
import io.opentelemetry.context.Scope;
2627
import org.jenkinsci.plugins.workflow.cps.nodes.StepEndNode;
2728
import org.jenkinsci.plugins.workflow.cps.nodes.StepStartNode;
2829
import org.jenkinsci.plugins.workflow.flow.FlowExecution;
@@ -155,7 +156,7 @@ private Iterable<FlowNode> getAncestors(@NonNull final FlowNode flowNode) {
155156
return ancestors;
156157
}
157158

158-
public void removePipelineStepSpan(@NonNull WorkflowRun run, @NonNull FlowNode flowNode, @NonNull Span span) {
159+
public void removePipelineStepSpanAndCloseAssociatedScopes(@NonNull WorkflowRun run, @NonNull FlowNode flowNode, @NonNull Span span) {
159160
FlowNode startSpanNode;
160161
if (flowNode instanceof AtomNode) {
161162
startSpanNode = flowNode;
@@ -183,7 +184,7 @@ public void removePipelineStepSpan(@NonNull WorkflowRun run, @NonNull FlowNode f
183184
.filter(flowNodeMonitoringAction -> Objects.equals(flowNodeMonitoringAction.getSpanId(), span.getSpanContext().getSpanId()))
184185
.findFirst()
185186
.ifPresentOrElse(
186-
FlowNodeMonitoringAction::purgeSpan,
187+
FlowNodeMonitoringAction::purgeSpanAndCloseAssociatedScopes,
187188
() -> {
188189
String msg = "span not found to be purged: " + OtelUtils.toDebugString(span) +
189190
" ending " + OtelUtils.toDebugString(startSpanNode) + " in " + run;
@@ -204,20 +205,20 @@ public void removeBuildStepSpan(@NonNull AbstractBuild build, @NonNull BuildStep
204205
.reverse()
205206
.stream()
206207
.filter(buildStepMonitoringAction -> Objects.equals(buildStepMonitoringAction.getSpanId(), span.getSpanContext().getSpanId()))
207-
.findFirst().ifPresentOrElse(BuildStepMonitoringAction::purgeSpan, () -> {
208+
.findFirst().ifPresentOrElse(BuildStepMonitoringAction::purgeSpanAndCloseAssociatedScopes, () -> {
208209
throw new IllegalStateException("span not found to be purged: " + span + " for " + buildStep);
209210
});
210211
}
211212

212213
public void purgeRun(@NonNull Run run) {
213-
run.getActions(OtelMonitoringAction.class).forEach(OtelMonitoringAction::purgeSpan);
214+
run.getActions(OtelMonitoringAction.class).forEach(OtelMonitoringAction::purgeSpanAndCloseAssociatedScopes);
214215
// TODO verify we don't need this cleanup
215216
if (run instanceof WorkflowRun) {
216217
WorkflowRun workflowRun = (WorkflowRun) run;
217218
List<FlowNode> flowNodesHeads = Optional.ofNullable(workflowRun.getExecution()).map(FlowExecution::getCurrentHeads).orElse(Collections.emptyList());
218219
ForkScanner scanner = new ForkScanner();
219220
scanner.setup(flowNodesHeads);
220-
StreamSupport.stream(scanner.spliterator(), false).forEach(flowNode -> flowNode.getActions(OtelMonitoringAction.class).forEach(OtelMonitoringAction::purgeSpan));
221+
StreamSupport.stream(scanner.spliterator(), false).forEach(flowNode -> flowNode.getActions(OtelMonitoringAction.class).forEach(OtelMonitoringAction::purgeSpanAndCloseAssociatedScopes));
221222
}
222223
}
223224

@@ -260,6 +261,14 @@ public void putSpan(@NonNull Run run, @NonNull Span span, @NonNull FlowNode flow
260261
OtelUtils.toDebugString(flowNode) + ", " + OtelUtils.toDebugString(span) + ")");
261262
}
262263

264+
public void putSpanAndScopes(@NonNull Run run, @NonNull Span span, @NonNull FlowNode flowNode, List<Scope> scopes) {
265+
// FYI for agent allocation, we have 2 FlowNodeMonitoringAction to track the agent allocation duration
266+
flowNode.addAction(new FlowNodeMonitoringAction(span, scopes));
267+
268+
LOGGER.log(Level.FINE, () -> "putSpan(" + run.getFullDisplayName() + ", " +
269+
OtelUtils.toDebugString(flowNode) + ", " + OtelUtils.toDebugString(span) + ")");
270+
}
271+
263272
private void setAttributesToSpan(@NonNull Span span, OpenTelemetryAttributesAction openTelemetryAttributesAction) {
264273
if (openTelemetryAttributesAction == null) {
265274
return;

src/main/java/io/jenkins/plugins/opentelemetry/job/action/AbstractInvisibleMonitoringAction.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,19 @@
66
package io.jenkins.plugins.opentelemetry.job.action;
77

88
import io.opentelemetry.api.trace.Span;
9+
import io.opentelemetry.context.Scope;
10+
11+
import java.util.Collections;
12+
import java.util.List;
913

1014
public abstract class AbstractInvisibleMonitoringAction extends AbstractMonitoringAction {
1115

1216
public AbstractInvisibleMonitoringAction(Span span) {
13-
super(span);
17+
super(span, Collections.emptyList());
18+
}
19+
20+
public AbstractInvisibleMonitoringAction(Span span, List<Scope> scopes) {
21+
super(span, scopes);
1422
}
1523

1624
@Override

src/main/java/io/jenkins/plugins/opentelemetry/job/action/AbstractMonitoringAction.java

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package io.jenkins.plugins.opentelemetry.job.action;
77

8+
import com.google.common.collect.ImmutableList;
89
import edu.umd.cs.findbugs.annotations.CheckForNull;
910
import edu.umd.cs.findbugs.annotations.NonNull;
1011
import hudson.model.Action;
@@ -17,6 +18,7 @@
1718

1819
import java.util.Collections;
1920
import java.util.HashMap;
21+
import java.util.List;
2022
import java.util.Map;
2123
import java.util.Optional;
2224
import java.util.logging.Level;
@@ -25,14 +27,20 @@
2527
public abstract class AbstractMonitoringAction implements Action, OtelMonitoringAction {
2628
private final static Logger LOGGER = Logger.getLogger(AbstractMonitoringAction.class.getName());
2729

28-
private transient Span span;
30+
transient SpanAndScopes spanAndScopes;
31+
32+
2933
final String traceId;
3034
final String spanId;
3135
protected String spanName;
3236
protected Map<String, String> w3cTraceContext;
3337

34-
public AbstractMonitoringAction(Span span) {
35-
this.span = span;
38+
/**
39+
* @param span span of this action
40+
* @param scopes scope and underlying scopes associated with the span.
41+
*/
42+
public AbstractMonitoringAction(Span span, List<Scope> scopes) {
43+
this.spanAndScopes = new SpanAndScopes(span, scopes, Thread.currentThread().getName());
3644
this.traceId = span.getSpanContext().getTraceId();
3745
this.spanId = span.getSpanContext().getSpanId();
3846
this.spanName = span instanceof ReadWriteSpan ? ((ReadWriteSpan) span).getName() : null; // when tracer is no-op, span is NOT a ReadWriteSpan
@@ -41,6 +49,8 @@ public AbstractMonitoringAction(Span span) {
4149
W3CTraceContextPropagator.getInstance().inject(Context.current(), w3cTraceContext, (carrier, key, value) -> carrier.put(key, value));
4250
this.w3cTraceContext = w3cTraceContext;
4351
}
52+
53+
LOGGER.log(Level.FINE, () -> "Span " + getSpanName() + ", thread=" + spanAndScopes.scopeStartThreadName + " opened " + spanAndScopes.scopes.size() + " scopes");
4454
}
4555

4656
public String getSpanName() {
@@ -55,7 +65,7 @@ public Map<String, String> getW3cTraceContext() {
5565
@Override
5666
@CheckForNull
5767
public Span getSpan() {
58-
return span;
68+
return spanAndScopes.span;
5969
}
6070

6171
public String getTraceId() {
@@ -67,9 +77,14 @@ public String getSpanId() {
6777
}
6878

6979
@Override
70-
public void purgeSpan() {
71-
LOGGER.log(Level.FINE, () -> "Purge span='" + spanName + "', spanId=" + spanId + ", traceId=" + traceId + ": " + (span == null ? "#null#" : "purged"));
72-
this.span = null;
80+
public void purgeSpanAndCloseAssociatedScopes() {
81+
LOGGER.log(Level.FINE, () -> "Purge span='" + spanName + "', spanId=" + spanId + ", traceId=" + traceId + ": " + spanAndScopes);
82+
Optional.ofNullable(spanAndScopes)
83+
.map(spanAndScopes -> spanAndScopes.scopes)
84+
.map(ImmutableList::copyOf)
85+
.map(ImmutableList::reverse)
86+
.ifPresent(scopes -> scopes.forEach(Scope::close));
87+
this.spanAndScopes = null;
7388
}
7489

7590
@Override
@@ -85,8 +100,40 @@ public String toString() {
85100
public boolean hasEnded() {
86101
return
87102
Optional
88-
.ofNullable(span).map(s -> s instanceof ReadableSpan ? (ReadableSpan) s : null) // cast to ReadableSpan
103+
.ofNullable(spanAndScopes)
104+
.map(sac -> sac.span)
105+
.filter(s -> s instanceof ReadableSpan)
106+
.map(s -> (ReadableSpan) s)
89107
.map(ReadableSpan::hasEnded)
90108
.orElse(true);
91109
}
110+
111+
/**
112+
* Scopes associated with the span and the underlying scopes instantiated to create the span.
113+
* Underlying scopes can be the scope of the underlying wrapping pipeline step (eg a `stage` step).
114+
* Thread name when the scope was opened. Used for debugging, to identify potential leaks.
115+
*/
116+
static class SpanAndScopes {
117+
@NonNull
118+
final Span span;
119+
@NonNull
120+
final List<Scope> scopes;
121+
@NonNull
122+
final String scopeStartThreadName;
123+
124+
public SpanAndScopes(@NonNull Span span, @NonNull List<Scope> scopes, @NonNull String scopeStartThreadName) {
125+
this.span = span;
126+
this.scopes = scopes;
127+
this.scopeStartThreadName = scopeStartThreadName;
128+
}
129+
130+
@Override
131+
public String toString() {
132+
return "SpanAndScopes{" +
133+
"span=" + span +
134+
", scopes=" + scopes.size() +
135+
", scopeStartThreadName='" + scopeStartThreadName + '\'' +
136+
'}';
137+
}
138+
}
92139
}

src/main/java/io/jenkins/plugins/opentelemetry/job/action/FlowNodeMonitoringAction.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,20 @@
66
package io.jenkins.plugins.opentelemetry.job.action;
77

88
import io.opentelemetry.api.trace.Span;
9+
import io.opentelemetry.context.Scope;
10+
11+
import java.util.List;
912

1013
/**
1114
* Span reference associate with a {@link org.jenkinsci.plugins.workflow.graph.FlowNode}
1215
*/
1316
public class FlowNodeMonitoringAction extends AbstractInvisibleMonitoringAction {
17+
1418
public FlowNodeMonitoringAction(Span span) {
1519
super(span);
1620
}
21+
22+
public FlowNodeMonitoringAction(Span span, List<Scope> scopes) {
23+
super(span, scopes);
24+
}
1725
}

src/main/java/io/jenkins/plugins/opentelemetry/job/action/OtelMonitoringAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public interface OtelMonitoringAction extends Action {
2222
@CheckForNull
2323
Span getSpan();
2424

25-
void purgeSpan();
25+
void purgeSpanAndCloseAssociatedScopes();
2626

2727
/**
2828
* @return {@code true} if the associated {@link Span} has ended

0 commit comments

Comments
 (0)