diff --git a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java index ea2a79f49c2..6d5b7f21fa4 100644 --- a/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java +++ b/dd-java-agent/agent-jmxfetch/src/main/java/datadog/trace/agent/jmxfetch/JMXFetch.java @@ -116,6 +116,7 @@ private static void run(final StatsDClientManager statsDClientManager, final Con .refreshBeansPeriod(refreshBeansPeriod) .globalTags(globalTags) .reporter(reporter) + .jmxfetchTelemetry(config.isTelemetryJmxEnabled()) .connectionFactory(new AgentConnectionFactory()); if (config.isJmxFetchMultipleRuntimeServicesEnabled()) { diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java index 3d58a5d21ed..5275b166cc0 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/GeneralConfig.java @@ -101,6 +101,7 @@ public final class GeneralConfig { public static final String TELEMETRY_DEPENDENCY_RESOLUTION_QUEUE_SIZE = "telemetry.dependency-resolution.queue.size"; public static final String TELEMETRY_DEBUG_REQUESTS_ENABLED = "telemetry.debug.requests.enabled"; + public static final String TELEMETRY_JMX_ENABLED = "telemetry.jmx.enabled"; public static final String AGENTLESS_LOG_SUBMISSION_ENABLED = "agentless.log.submission.enabled"; public static final String AGENTLESS_LOG_SUBMISSION_QUEUE_SIZE = "agentless.log.submission.queue.size"; diff --git a/internal-api/src/main/java/datadog/trace/api/Config.java b/internal-api/src/main/java/datadog/trace/api/Config.java index b275e2a936c..27244778823 100644 --- a/internal-api/src/main/java/datadog/trace/api/Config.java +++ b/internal-api/src/main/java/datadog/trace/api/Config.java @@ -393,6 +393,7 @@ import static datadog.trace.api.config.GeneralConfig.TELEMETRY_DEPENDENCY_RESOLUTION_QUEUE_SIZE; import static datadog.trace.api.config.GeneralConfig.TELEMETRY_EXTENDED_HEARTBEAT_INTERVAL; import static datadog.trace.api.config.GeneralConfig.TELEMETRY_HEARTBEAT_INTERVAL; +import static datadog.trace.api.config.GeneralConfig.TELEMETRY_JMX_ENABLED; import static datadog.trace.api.config.GeneralConfig.TELEMETRY_LOG_COLLECTION_ENABLED; import static datadog.trace.api.config.GeneralConfig.TELEMETRY_METRICS_ENABLED; import static datadog.trace.api.config.GeneralConfig.TELEMETRY_METRICS_INTERVAL; @@ -1243,6 +1244,7 @@ public static String getHostName() { private final boolean isTelemetryDependencyServiceEnabled; private final boolean telemetryMetricsEnabled; private final boolean isTelemetryLogCollectionEnabled; + private final boolean isTelemetryJmxEnabled; private final int telemetryDependencyResolutionQueueSize; private final boolean azureAppServices; @@ -2176,6 +2178,8 @@ PROFILING_DATADOG_PROFILER_ENABLED, isDatadogProfilerSafeInCurrentEnvironment()) && configProvider.getBoolean( TELEMETRY_LOG_COLLECTION_ENABLED, DEFAULT_TELEMETRY_LOG_COLLECTION_ENABLED); + isTelemetryJmxEnabled = configProvider.getBoolean(TELEMETRY_JMX_ENABLED, false); + isTelemetryDependencyServiceEnabled = configProvider.getBoolean( TELEMETRY_DEPENDENCY_COLLECTION_ENABLED, @@ -3748,6 +3752,10 @@ public boolean isTelemetryLogCollectionEnabled() { return isTelemetryLogCollectionEnabled; } + public boolean isTelemetryJmxEnabled() { + return isTelemetryJmxEnabled; + } + public int getTelemetryDependencyResolutionQueueSize() { return telemetryDependencyResolutionQueueSize; } diff --git a/internal-api/src/main/java/datadog/trace/api/flare/TracerFlare.java b/internal-api/src/main/java/datadog/trace/api/flare/TracerFlare.java index f2ba39041a4..907024f2cd9 100644 --- a/internal-api/src/main/java/datadog/trace/api/flare/TracerFlare.java +++ b/internal-api/src/main/java/datadog/trace/api/flare/TracerFlare.java @@ -4,6 +4,8 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -67,6 +69,19 @@ public static void addReporter(Reporter reporter) { reporters.put(reporter.getClass(), reporter); } + public static Collection getReporters() { + return Collections.unmodifiableCollection(reporters.values()); + } + + public static Reporter getReporter(String className) { + for (Reporter reporter : getReporters()) { + if (reporter.getClass().getName().equals(className)) { + return reporter; + } + } + return null; + } + public static void addText(ZipOutputStream zip, String section, String text) throws IOException { zip.putNextEntry(new ZipEntry(section)); if (null != text) { diff --git a/internal-api/src/test/groovy/datadog/trace/api/flare/TracerFlareTest.groovy b/internal-api/src/test/groovy/datadog/trace/api/flare/TracerFlareTest.groovy index 4a6b916b80a..a9dfc44161f 100644 --- a/internal-api/src/test/groovy/datadog/trace/api/flare/TracerFlareTest.groovy +++ b/internal-api/src/test/groovy/datadog/trace/api/flare/TracerFlareTest.groovy @@ -51,6 +51,34 @@ class TracerFlareTest extends DDSpecification { /^(java.lang.IllegalStateException: (bin|txt) \(expected\)\n){2}$/ } + def "test getReporter finds reporter by class name"() { + setup: + def reporter1 = Mock(Reporter1) + def reporter2 = Mock(Reporter2) + TracerFlare.addReporter(reporter1) + TracerFlare.addReporter(reporter2) + + when: + def found1 = TracerFlare.getReporter(reporter1.getClass().getName()) + def found2 = TracerFlare.getReporter(reporter2.getClass().getName()) + + then: + found1 == reporter1 + found2 == reporter2 + } + + def "test getReporter returns null for non-existent reporter"() { + setup: + def reporter = Mock(Reporter1) + TracerFlare.addReporter(reporter) + + when: + def found = TracerFlare.getReporter("com.example.NonExistentReporter") + + then: + found == null + } + def buildAndExtractZip() { TracerFlare.prepareForFlare() def out = new ByteArrayOutputStream() diff --git a/metadata/supported-configurations.json b/metadata/supported-configurations.json index c270af1c7d3..eaf64f76460 100644 --- a/metadata/supported-configurations.json +++ b/metadata/supported-configurations.json @@ -3737,6 +3737,14 @@ "aliases": [] } ], + "DD_TELEMETRY_JMX_ENABLED": [ + { + "version": "A", + "type": "boolean", + "default": "false", + "aliases": [] + } + ], "DD_TELEMETRY_LOG_COLLECTION_ENABLED": [ { "version": "A", diff --git a/utils/flare-utils/build.gradle.kts b/utils/flare-utils/build.gradle.kts index f718826c226..1952fea5cea 100644 --- a/utils/flare-utils/build.gradle.kts +++ b/utils/flare-utils/build.gradle.kts @@ -12,4 +12,7 @@ dependencies { implementation(project(":utils:version-utils")) implementation(project(":internal-api")) implementation(libs.slf4j) + + testImplementation(project(":utils:test-utils")) + testImplementation(project(":dd-trace-api")) } diff --git a/utils/flare-utils/src/main/java/datadog/flare/TracerFlareManager.java b/utils/flare-utils/src/main/java/datadog/flare/TracerFlareManager.java new file mode 100644 index 00000000000..b2928d0cdc2 --- /dev/null +++ b/utils/flare-utils/src/main/java/datadog/flare/TracerFlareManager.java @@ -0,0 +1,200 @@ +package datadog.flare; + +import datadog.trace.api.Config; +import datadog.trace.api.flare.TracerFlare; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.nio.charset.StandardCharsets; +import java.util.Base64; +import java.util.Map; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; +import javax.management.InstanceAlreadyExistsException; +import javax.management.MBeanRegistrationException; +import javax.management.MBeanServer; +import javax.management.MalformedObjectNameException; +import javax.management.NotCompliantMBeanException; +import javax.management.ObjectName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * MBean implementation for managing and accessing tracer flare data. + * + *

This class provides JMX operations to list flare data sources and retrieve flare data either + * for individual sources or a complete flare archive. See {@link TracerFlareManagerMBean} for + * documentation on the exposed operations. + */ +public class TracerFlareManager implements TracerFlareManagerMBean { + private static final Logger LOGGER = LoggerFactory.getLogger(TracerFlareManager.class); + + private final TracerFlareService flareService; + protected ObjectName mbeanName; + + public TracerFlareManager(TracerFlareService flareService) { + this.flareService = flareService; + } + + @Override + public String generateFullFlareZip() throws IOException { + TracerFlare.prepareForFlare(); + + long currentMillis = System.currentTimeMillis(); + boolean dumpThreads = Config.get().isTriageEnabled() || LOGGER.isDebugEnabled(); + byte[] zipBytes = flareService.buildFlareZip(currentMillis, currentMillis, dumpThreads); + return Base64.getEncoder().encodeToString(zipBytes); + } + + @Override + public String listFlareFiles() throws IOException { + TracerFlare.prepareForFlare(); + + StringBuilder result = new StringBuilder(); + + for (Map.Entry entry : TracerFlareService.BUILT_IN_SOURCES.entrySet()) { + String sourceName = entry.getKey(); + String[] files = entry.getValue(); + + for (String filename : files) { + result.append(sourceName).append(" ").append(filename).append("\n"); + } + } + + for (TracerFlare.Reporter reporter : TracerFlare.getReporters()) { + try (ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + ZipOutputStream zip = new ZipOutputStream(bytes)) { + reporter.addReportToFlare(zip); + zip.finish(); + + try (ByteArrayInputStream bais = new ByteArrayInputStream(bytes.toByteArray()); + ZipInputStream zis = new ZipInputStream(bais)) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + result + .append(reporter.getClass().getName()) + .append(" ") + .append(entry.getName()) + .append("\n"); + zis.closeEntry(); + } + } + } catch (IOException e) { + LOGGER.debug("Failed to inspect reporter {}", reporter.getClass().getName(), e); + } + } + + return result.toString(); + } + + @Override + public String getFlareFile(String sourceName, String filename) throws IOException { + final byte[] zipBytes; + if (isBuiltInSource(sourceName)) { + zipBytes = flareService.getBuiltInSourceZip(sourceName); + } else { + zipBytes = getReporterFile(sourceName); + } + return extractFileFromZip(zipBytes, filename); + } + + private boolean isBuiltInSource(String sourceName) { + return TracerFlareService.BUILT_IN_SOURCES.containsKey(sourceName); + } + + /** + * Generates flare data for a specific reporter. + * + *

The reporter's data is generated as a ZIP file, and the specified filename is extracted. If + * the file is text, it is returned as plain text; if binary, it is returned base64-encoded. + * + * @param reporterClassName the fully qualified class name of the reporter + * @return the zip file containing the reporter's content + * @throws IOException if an error occurs while generating the flare + */ + private byte[] getReporterFile(String reporterClassName) throws IOException { + TracerFlare.Reporter reporter = TracerFlare.getReporter(reporterClassName); + if (reporter == null) { + throw new IOException("Error: Reporter not found: " + reporterClassName); + } + + reporter.prepareForFlare(); + + try (ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + ZipOutputStream zip = new ZipOutputStream(bytes)) { + reporter.addReportToFlare(zip); + zip.finish(); + return bytes.toByteArray(); + } + } + + /** + * Extracts a specific file from a ZIP archive. + * + *

Searches through the ZIP entries for the specified filename and returns its content. If the + * file name ends in ".txt", it is returned as plain text; if binary, it is returned + * base64-encoded. + * + * @param zipBytes the ZIP file bytes + * @param filename the name of the file to extract + * @return the file content (plain text or base64-encoded binary) + * @throws IOException if an error occurs while reading the ZIP + */ + private String extractFileFromZip(byte[] zipBytes, String filename) throws IOException { + try (ByteArrayInputStream bais = new ByteArrayInputStream(zipBytes); + ZipInputStream zis = new ZipInputStream(bais)) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (entry.getName().equals(filename)) { + ByteArrayOutputStream content = new ByteArrayOutputStream(); + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = zis.read(buffer)) != -1) { + content.write(buffer, 0, bytesRead); + } + zis.closeEntry(); + + byte[] contentBytes = content.toByteArray(); + if (entry.getName().endsWith(".txt")) { + return new String(contentBytes, StandardCharsets.UTF_8); + } else { + return Base64.getEncoder().encodeToString(contentBytes); + } + } + zis.closeEntry(); + } + + throw new IOException("Failed to extract file: " + filename); + } + } + + void registerMBean() { + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + + try { + mbeanName = new ObjectName("datadog.flare:type=TracerFlare"); + mbs.registerMBean(this, mbeanName); + LOGGER.info("Registered TracerFlare MBean at {}", mbeanName); + } catch (MalformedObjectNameException + | InstanceAlreadyExistsException + | MBeanRegistrationException + | NotCompliantMBeanException e) { + LOGGER.warn("Failed to register TracerFlare MBean", e); + mbeanName = null; + } + } + + void unregisterMBean() { + if (mbeanName != null) { + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + try { + mbs.unregisterMBean(mbeanName); + LOGGER.debug("Unregistered TracerFlare MBean"); + } catch (Exception e) { + LOGGER.warn("Failed to unregister TracerFlare MBean", e); + } + } + } +} diff --git a/utils/flare-utils/src/main/java/datadog/flare/TracerFlareManagerMBean.java b/utils/flare-utils/src/main/java/datadog/flare/TracerFlareManagerMBean.java new file mode 100644 index 00000000000..010bb7df04b --- /dev/null +++ b/utils/flare-utils/src/main/java/datadog/flare/TracerFlareManagerMBean.java @@ -0,0 +1,53 @@ +package datadog.flare; + +import java.io.IOException; + +/** + * MBean interface for managing and accessing tracer flare data. + * + *

This interface provides JMX operations to inspect flare data sources and generate flare data + * either for individual sources or as a complete ZIP archive. Sources include both registered + * reporters and built-in data (config, runtime, flare prelude, etc.). + */ +public interface TracerFlareManagerMBean { + /** + * Lists all available flare files from all sources. + * + *

Returns a newline-separated string where each line is formatted as "<source> + * <file>". This format makes it easy to pass the source and filename to {@link + * #getFlareFile(String, String)}. + * + *

Example output: + * + *

+   * config initial_config.txt
+   * ...
+   * datadog.trace.agent.core.CoreTracer tracer_health.txt
+   * ...
+   * 
+ * + * @return newline-separated string listing all available files and their source name + * @throws IOException if an error occurs + */ + String listFlareFiles() throws IOException; + + /** + * Returns a specific flare file by source name and filename. + * + *

If the file is text, it is returned as plain text; if binary, it is returned base64-encoded. + * + * @param sourceName the name of the source (reporter class name or built-in source name) + * @param filename the name of the file to retrieve + * @return the file content (plain text or base64-encoded binary) + * @throws IOException if an error occurs while generating or extracting the data + */ + String getFlareFile(String sourceName, String filename) throws IOException; + + /** + * Generates a complete tracer flare as a ZIP file. + * + * @return base64-encoded ZIP file containing the complete flare data + * @throws IOException if an error occurs while generating the flare ZIP + */ + String generateFullFlareZip() throws IOException; +} diff --git a/utils/flare-utils/src/main/java/datadog/flare/TracerFlarePoller.java b/utils/flare-utils/src/main/java/datadog/flare/TracerFlarePoller.java index 49755cf24d0..b695f0112c5 100644 --- a/utils/flare-utils/src/main/java/datadog/flare/TracerFlarePoller.java +++ b/utils/flare-utils/src/main/java/datadog/flare/TracerFlarePoller.java @@ -54,6 +54,9 @@ private void doStop() { if (null != stopSubmitter) { stopSubmitter.run(); } + if (null != tracerFlareService) { + tracerFlareService.close(); + } } final class Preparer implements ProductListener { diff --git a/utils/flare-utils/src/main/java/datadog/flare/TracerFlareService.java b/utils/flare-utils/src/main/java/datadog/flare/TracerFlareService.java index dcb097287a3..e8b65f513d7 100644 --- a/utils/flare-utils/src/main/java/datadog/flare/TracerFlareService.java +++ b/utils/flare-utils/src/main/java/datadog/flare/TracerFlareService.java @@ -24,6 +24,8 @@ import java.time.ZoneOffset; import java.time.ZonedDateTime; import java.util.Collections; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.zip.ZipOutputStream; import okhttp3.HttpUrl; @@ -49,11 +51,23 @@ final class TracerFlareService { private static final int MAX_LOGFILE_SIZE_BYTES = MAX_LOGFILE_SIZE_MB << 20; + static final Map BUILT_IN_SOURCES = new HashMap<>(); + + static { + BUILT_IN_SOURCES.put("prelude", new String[] {"flare_info.txt", "tracer_version.txt"}); + BUILT_IN_SOURCES.put("config", new String[] {"initial_config.txt"}); + BUILT_IN_SOURCES.put( + "runtime", + new String[] {"jvm_args.txt", "classpath.txt", "library_path.txt", "boot_classpath.txt"}); + BUILT_IN_SOURCES.put("threads", new String[] {"threads.txt"}); + } + private final AgentTaskScheduler scheduler = new AgentTaskScheduler(TRACER_FLARE); private final Config config; private final OkHttpClient okHttpClient; private final HttpUrl flareUrl; + private final TracerFlareManager jmxManager; private boolean logLevelOverridden; private volatile long flareStartMillis; @@ -65,9 +79,22 @@ final class TracerFlareService { this.okHttpClient = okHttpClient; this.flareUrl = agentUrl.newBuilder().addPathSegments(FLARE_ENDPOINT).build(); + if (config.isTelemetryJmxEnabled()) { + jmxManager = new TracerFlareManager(this); + jmxManager.registerMBean(); + } else { + jmxManager = null; + } + applyTriageReportTrigger(config.getTriageReportTrigger()); } + public void close() { + if (jmxManager != null) { + jmxManager.unregisterMBean(); + } + } + private void applyTriageReportTrigger(String triageTrigger) { if (null != triageTrigger && !triageTrigger.isEmpty()) { long delay = TimeUtils.parseSimpleDelay(triageTrigger); @@ -194,11 +221,13 @@ private String getFlareName(long endMillis) { return REPORT_PREFIX + config.getRuntimeId() + "-" + endMillis + ".zip"; } - private byte[] buildFlareZip(long startMillis, long endMillis, boolean dumpThreads) + public byte[] buildFlareZip(long startMillis, long endMillis, boolean dumpThreads) throws IOException { try (ByteArrayOutputStream bytes = new ByteArrayOutputStream(); ZipOutputStream zip = new ZipOutputStream(bytes)) { + // Make sure to update BUILT_IN_SOURCES and getBuiltInSourceZip if this list of functions + // changes addPrelude(zip, startMillis, endMillis); addConfig(zip); addRuntime(zip); @@ -212,17 +241,47 @@ private byte[] buildFlareZip(long startMillis, long endMillis, boolean dumpThrea } } + /** Generates a ZIP file for JMX fetch for a built-in source. */ + byte[] getBuiltInSourceZip(String sourceName) throws IOException { + try (ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + ZipOutputStream zip = new ZipOutputStream(bytes)) { + + switch (sourceName) { + case "prelude": + addPrelude(zip, flareStartMillis, System.currentTimeMillis()); + break; + case "config": + addConfig(zip); + break; + case "runtime": + addRuntime(zip); + break; + case "threads": + addThreadDump(zip); + break; + default: + throw new IOException("Unknown source name: " + sourceName); + } + + zip.finish(); + return bytes.toByteArray(); + } + } + private void addPrelude(ZipOutputStream zip, long startMillis, long endMillis) throws IOException { + // Make sure to update BUILT_IN_SOURCES if the files change here. TracerFlare.addText(zip, "flare_info.txt", flareInfo(startMillis, endMillis)); TracerFlare.addText(zip, "tracer_version.txt", VERSION); } private void addConfig(ZipOutputStream zip) throws IOException { + // Make sure to update BUILT_IN_SOURCES if the files change here. TracerFlare.addText(zip, "initial_config.txt", config.toString()); } private void addRuntime(ZipOutputStream zip) throws IOException { + // Make sure to update BUILT_IN_SOURCES if the files change here. try { RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean(); TracerFlare.addText(zip, "jvm_args.txt", String.join(" ", runtimeMXBean.getInputArguments())); diff --git a/utils/flare-utils/src/test/groovy/datadog/flare/TracerFlareJmxTest.groovy b/utils/flare-utils/src/test/groovy/datadog/flare/TracerFlareJmxTest.groovy new file mode 100644 index 00000000000..030aade9a1c --- /dev/null +++ b/utils/flare-utils/src/test/groovy/datadog/flare/TracerFlareJmxTest.groovy @@ -0,0 +1,73 @@ +package datadog.flare + +import static datadog.trace.api.config.GeneralConfig.TELEMETRY_JMX_ENABLED + +import datadog.trace.api.Config +import datadog.trace.test.util.DDSpecification +import okhttp3.HttpUrl +import spock.lang.Timeout + +import javax.management.MBeanServer +import javax.management.ObjectName +import java.lang.management.ManagementFactory + +@Timeout(1) +class TracerFlareJmxTest extends DDSpecification { + static final ObjectName MBEAN_NAME = new ObjectName("datadog.flare:type=TracerFlare") + + final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer() + + TracerFlareService tracerFlareService + + def cleanup() { + if (tracerFlareService != null) { + tracerFlareService.close() + } + } + + private void createTracerFlareService() { + tracerFlareService = new TracerFlareService( + Config.get(), + null, // okHttpClient - not needed for JMX test + HttpUrl.get("http://localhost:8126") + ) + } + + def "TracerFlare MBean is registered when telemetry JMX is enabled"() { + given: + injectSysConfig(TELEMETRY_JMX_ENABLED, "true") + + when: + createTracerFlareService() + + then: + mbs.isRegistered(MBEAN_NAME) + } + + def "TracerFlare MBean is not registered when telemetry JMX is disabled"() { + given: + injectSysConfig(TELEMETRY_JMX_ENABLED, "false") + + when: + createTracerFlareService() + + then: + !mbs.isRegistered(MBEAN_NAME) + } + + def "TracerFlare MBean operations work when JMX is enabled"() { + given: + injectSysConfig(TELEMETRY_JMX_ENABLED, "true") + createTracerFlareService() + + when: + def fileList = mbs.invoke(MBEAN_NAME, "listFlareFiles", null, null) as String + + then: + mbs.isRegistered(MBEAN_NAME) + fileList != null + fileList.contains("flare_info.txt") + fileList.contains("tracer_version.txt") + fileList.contains("initial_config.txt") + } +} \ No newline at end of file