Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import datadog.instrument.utils.ClassLoaderValue;
import datadog.metrics.api.statsd.StatsDClientManager;
import datadog.trace.api.Config;
import datadog.trace.api.InstrumenterConfig;
import datadog.trace.api.Platform;
import datadog.trace.api.WithGlobalTracer;
import datadog.trace.api.appsec.AppSecEventTracker;
Expand Down Expand Up @@ -846,6 +847,17 @@ private static synchronized void installDatadogTracer(
initTelemetry.onFatalError(ex);
}

// Register JVM runtime metric callbacks against the OtelMeterProvider after
// CoreTracer has started OtlpMetricsService. Skip when OTEL_METRICS_EXPORTER=none
// since there's no exporter to collect against. Done here (not in the delayed
// startJmx) so callbacks are in place before the exporter's first flush.
Config cfg = Config.get();
if (cfg.isRuntimeMetricsEnabled()
&& InstrumenterConfig.get().isMetricsOtelEnabled()
&& cfg.isMetricsOtlpExporterEnabled()) {
startOtlpRuntimeMetrics();
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JMX has some unfortunate side-effects which mean we can't start it at the same time as the tracer.

I would move JvmOtlpRuntimeMetrics out from otel-shim and into the agent-jmxfetch module. That way you can start in from JMXFetch along with the other runtime metrics. This would also let you benefit from the existing code that delays starting JMXFetch until the appropriate time.

}

StaticEventLogger.end("GlobalTracer");
}

Expand Down Expand Up @@ -989,6 +1001,27 @@ private static synchronized void initializeJmxSystemAccessProvider(
}
}

/**
* Registers OTLP runtime metric callbacks (JVM heap, CPU, threads, classes, etc.) with the
* agent's OtelMeterProvider. The periodic OTLP exporter started by CoreTracer then collects and
* exports them — this is the same pattern Node and .NET use to start their runtime metrics
* unconditionally during tracer init, independent of any app-side OTel API usage.
*/
private static synchronized void startOtlpRuntimeMetrics() {
final ClassLoader contextLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(AGENT_CLASSLOADER);
final Class<?> jvmOtlpClass =
AGENT_CLASSLOADER.loadClass("datadog.opentelemetry.shim.metrics.JvmOtlpRuntimeMetrics");
final Method startMethod = jvmOtlpClass.getMethod("start");
startMethod.invoke(null);
} catch (final Throwable ex) {
log.error("Throwable thrown while starting OTLP runtime metrics", ex);
} finally {
safelySetContextClassLoader(contextLoader);
}
}

private static synchronized void startJmxFetch() {
final ClassLoader contextLoader = Thread.currentThread().getContextClassLoader();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,5 +218,12 @@
"methods": [
{"name": "<init>", "parameterTypes": []}
]
},
{
"name": "datadog.trace.bootstrap.otel.shim.metrics.JvmOtlpRuntimeMetrics",
"methods": [
{"name": "<init>", "parameterTypes": []},
{"name": "start", "parameterTypes": []}
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
package datadog.opentelemetry.shim.metrics;

import com.sun.management.OperatingSystemMXBean;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.Meter;
import java.lang.management.BufferPoolMXBean;
import java.lang.management.ClassLoadingMXBean;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.lang.management.MemoryPoolMXBean;
import java.lang.management.MemoryUsage;
import java.lang.management.ThreadMXBean;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.ToLongFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Registers JVM runtime metrics with OTel-native names against the agent's MeterProvider. See
* https://opentelemetry.io/docs/specs/semconv/runtime/jvm-metrics/.
*/
public final class JvmOtlpRuntimeMetrics {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to move this class to another module. The responsibility of the otel-shim module is to bridge between the OTel API and internal services. This means there will be multiple copies of the otel-shim code at runtime - one for the bootstrap class-path to support extensions and internal code, and one or more for every class-loader that needs this shim.

The best place atm to put this is under the agent-jmxfetch module - you'll need to add otel-bootstrap as a dependency (at build time we vendor-in/repackage the OTel API for anything using otel-bootstrap so this won't conflict with anything else in the customer app)


private static final Logger log = LoggerFactory.getLogger(JvmOtlpRuntimeMetrics.class);
private static final String INSTRUMENTATION_SCOPE = "datadog.jvm.runtime";
private static final AttributeKey<String> MEMORY_TYPE = AttributeKey.stringKey("jvm.memory.type");
private static final AttributeKey<String> MEMORY_POOL =
AttributeKey.stringKey("jvm.memory.pool.name");
private static final AttributeKey<String> BUFFER_POOL =
AttributeKey.stringKey("jvm.buffer.pool.name");
private static final Attributes HEAP_ATTRS = Attributes.of(MEMORY_TYPE, "heap");
private static final Attributes NON_HEAP_ATTRS = Attributes.of(MEMORY_TYPE, "non_heap");

private static final AtomicBoolean started = new AtomicBoolean(false);

/** Registers all JVM runtime metric instruments on the OTel MeterProvider. */
public static void start() {
if (!started.compareAndSet(false, true)) {
return;
}

try {
Meter meter = OtelMeterProvider.INSTANCE.get(INSTRUMENTATION_SCOPE);
registerMemoryMetrics(meter);
registerBufferMetrics(meter);
registerThreadMetrics(meter);
registerClassLoadingMetrics(meter);
registerCpuMetrics(meter);
log.debug("Started OTLP runtime metrics with OTel-native naming (jvm.*)");
} catch (Exception e) {
log.error("Failed to start JVM OTLP runtime metrics", e);
}
}

// jvm.gc.duration is excluded — spec requires Histogram, JMX only exposes cumulative time.

/**
* jvm.memory.used, jvm.memory.committed, jvm.memory.limit, jvm.memory.init,
* jvm.memory.used_after_last_gc — all UpDownCounter per spec.
*/
private static void registerMemoryMetrics(Meter meter) {
MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
List<MemoryPoolMXBean> pools = ManagementFactory.getMemoryPoolMXBeans();

meter
.upDownCounterBuilder("jvm.memory.used")
.setDescription("Measure of memory used.")
.setUnit("By")
.buildWithCallback(
Comment on lines +68 to +72
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Fix runtime metrics accumulating on every export

When OTLP runtime metrics are enabled (DD_RUNTIME_METRICS_ENABLED=true, DD_METRICS_OTEL_ENABLED=true, DD_METRICS_OTEL_EXPORTER=otlp), these MXBean callbacks record point-in-time values through observable sum instruments. In the current shim, OtelMetricStorage.shouldResetOnCollect leaves OBSERVABLE_UP_DOWN_COUNTER non-resetting and OtelLongSum adds each observation, so every OTLP collection adds the current heap/thread/etc. value to the previous export instead of replacing it; the same pattern affects observable counters like class and CPU time under the existing temporality handling. This makes exported JVM metrics monotonically inflate after the first flush, so the callbacks need value/last-observation semantics or the observable storage needs to handle async sum instruments correctly.

Useful? React with 👍 / 👎.

measurement -> {
measurement.record(memoryBean.getHeapMemoryUsage().getUsed(), HEAP_ATTRS);
measurement.record(memoryBean.getNonHeapMemoryUsage().getUsed(), NON_HEAP_ATTRS);
for (MemoryPoolMXBean pool : pools) {
measurement.record(pool.getUsage().getUsed(), poolAttributes(pool));
}
});

meter
.upDownCounterBuilder("jvm.memory.committed")
.setDescription("Measure of memory committed.")
.setUnit("By")
.buildWithCallback(
measurement -> {
measurement.record(memoryBean.getHeapMemoryUsage().getCommitted(), HEAP_ATTRS);
measurement.record(memoryBean.getNonHeapMemoryUsage().getCommitted(), NON_HEAP_ATTRS);
for (MemoryPoolMXBean pool : pools) {
measurement.record(pool.getUsage().getCommitted(), poolAttributes(pool));
}
});

meter
.upDownCounterBuilder("jvm.memory.limit")
.setDescription("Measure of max obtainable memory.")
.setUnit("By")
.buildWithCallback(
measurement -> {
long heapMax = memoryBean.getHeapMemoryUsage().getMax();
if (heapMax > 0) {
measurement.record(heapMax, HEAP_ATTRS);
}
long nonHeapMax = memoryBean.getNonHeapMemoryUsage().getMax();
if (nonHeapMax > 0) {
measurement.record(nonHeapMax, NON_HEAP_ATTRS);
}
for (MemoryPoolMXBean pool : pools) {
long max = pool.getUsage().getMax();
if (max > 0) {
measurement.record(max, poolAttributes(pool));
}
}
});

meter
.upDownCounterBuilder("jvm.memory.init")
.setDescription("Measure of initial memory requested.")
.setUnit("By")
.buildWithCallback(
measurement -> {
long heapInit = memoryBean.getHeapMemoryUsage().getInit();
if (heapInit > 0) {
measurement.record(heapInit, HEAP_ATTRS);
}
long nonHeapInit = memoryBean.getNonHeapMemoryUsage().getInit();
if (nonHeapInit > 0) {
measurement.record(nonHeapInit, NON_HEAP_ATTRS);
}
});

meter
.upDownCounterBuilder("jvm.memory.used_after_last_gc")
.setDescription("Measure of memory used after the most recent garbage collection event.")
.setUnit("By")
.buildWithCallback(
measurement -> {
for (MemoryPoolMXBean pool : pools) {
MemoryUsage collectionUsage = pool.getCollectionUsage();
if (collectionUsage != null && collectionUsage.getUsed() >= 0) {
measurement.record(collectionUsage.getUsed(), poolAttributes(pool));
}
}
});
}

/** jvm.buffer.* (UpDownCounter, Development) — direct + mapped pool metrics. */
private static void registerBufferMetrics(Meter meter) {
List<BufferPoolMXBean> bufferPools =
ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class);
bufferPoolMetric(
meter,
"jvm.buffer.memory.used",
"Measure of memory used by buffers.",
"By",
bufferPools,
BufferPoolMXBean::getMemoryUsed);
bufferPoolMetric(
meter,
"jvm.buffer.memory.limit",
"Measure of total memory capacity of buffers.",
"By",
bufferPools,
BufferPoolMXBean::getTotalCapacity);
bufferPoolMetric(
meter,
"jvm.buffer.count",
"Number of buffers in the pool.",
"{buffer}",
bufferPools,
BufferPoolMXBean::getCount);
}

/** jvm.thread.count (UpDownCounter, Stable). */
private static void registerThreadMetrics(Meter meter) {
ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
meter
.upDownCounterBuilder("jvm.thread.count")
.setDescription("Number of executing platform threads.")
.setUnit("{thread}")
.buildWithCallback(measurement -> measurement.record(threadBean.getThreadCount()));
}

/**
* jvm.class.loaded (Counter), jvm.class.unloaded (Counter), jvm.class.count (UpDownCounter) — all
* Stable per spec.
*/
private static void registerClassLoadingMetrics(Meter meter) {
ClassLoadingMXBean classLoadingBean = ManagementFactory.getClassLoadingMXBean();
meter
.counterBuilder("jvm.class.loaded")
.setDescription("Number of classes loaded since JVM start.")
.setUnit("{class}")
.buildWithCallback(
measurement -> measurement.record(classLoadingBean.getTotalLoadedClassCount()));

meter
.upDownCounterBuilder("jvm.class.count")
.setDescription("Number of classes currently loaded.")
.setUnit("{class}")
.buildWithCallback(
measurement -> measurement.record(classLoadingBean.getLoadedClassCount()));

meter
.counterBuilder("jvm.class.unloaded")
.setDescription("Number of classes unloaded since JVM start.")
.setUnit("{class}")
.buildWithCallback(
measurement -> measurement.record(classLoadingBean.getUnloadedClassCount()));
}

/**
* jvm.cpu.time (Counter), jvm.cpu.count (UpDownCounter), jvm.cpu.recent_utilization (Gauge) — all
* Stable per spec.
*/
private static void registerCpuMetrics(Meter meter) {
java.lang.management.OperatingSystemMXBean rawOsBean =
ManagementFactory.getOperatingSystemMXBean();
OperatingSystemMXBean osBean =
rawOsBean instanceof OperatingSystemMXBean ? (OperatingSystemMXBean) rawOsBean : null;

if (osBean != null) {
meter
.counterBuilder("jvm.cpu.time")
.ofDoubles()
.setDescription("CPU time used by the process as reported by the JVM.")
.setUnit("s")
.buildWithCallback(
measurement -> {
long nanos = osBean.getProcessCpuTime();
if (nanos >= 0) {
measurement.record(nanos / 1e9);
}
});

meter
.gaugeBuilder("jvm.cpu.recent_utilization")
.setDescription("Recent CPU utilization for the process as reported by the JVM.")
.setUnit("1")
.buildWithCallback(
measurement -> {
double cpuLoad = osBean.getProcessCpuLoad();
if (cpuLoad >= 0) {
measurement.record(cpuLoad);
}
});
}

meter
.upDownCounterBuilder("jvm.cpu.count")
.setDescription("Number of processors available to the JVM.")
.setUnit("{cpu}")
.buildWithCallback(
measurement -> measurement.record(Runtime.getRuntime().availableProcessors()));
}

/**
* Builds an UpDownCounter that iterates each platform buffer pool and records {@code getter} with
* the {@code jvm.buffer.pool.name} attribute. Skips negative readings.
*/
private static void bufferPoolMetric(
Meter meter,
String name,
String description,
String unit,
List<BufferPoolMXBean> bufferPools,
ToLongFunction<BufferPoolMXBean> getter) {
meter
.upDownCounterBuilder(name)
.setDescription(description)
.setUnit(unit)
.buildWithCallback(
measurement -> {
for (BufferPoolMXBean pool : bufferPools) {
long value = getter.applyAsLong(pool);
if (value >= 0) {
measurement.record(value, Attributes.of(BUFFER_POOL, pool.getName()));
}
}
});
}

/** Returns Attributes carrying jvm.memory.type and jvm.memory.pool.name for the given pool. */
private static Attributes poolAttributes(MemoryPoolMXBean pool) {
return Attributes.of(
MEMORY_TYPE, pool.getType().name().toLowerCase(Locale.ROOT),
MEMORY_POOL, pool.getName());
}

private JvmOtlpRuntimeMetrics() {}
}
Loading