diff --git a/audit/audit.go b/audit/audit.go index 1733792635..35237f1165 100644 --- a/audit/audit.go +++ b/audit/audit.go @@ -23,9 +23,11 @@ import ( "context" "encoding/json" "fmt" - "github.com/sirupsen/logrus" "strings" "sync" + + "github.com/nuts-foundation/nuts-node/core" + "github.com/sirupsen/logrus" ) const ( @@ -61,6 +63,15 @@ const auditLogLevel = "audit" var auditLoggerInstance *logrus.Logger var initAuditLoggerOnce = &sync.Once{} +func init() { + // Register callback so core.SetupTracing can add hooks to the audit logger. + // This is needed because the audit logger is a separate logrus instance, + // and we can't import audit from core due to circular dependencies. + core.RegisterAuditLogHook = func(hook logrus.Hook) { + auditLogger().AddHook(hook) + } +} + // auditLogger returns the initialized logger instance intended for audit logging. func auditLogger() *logrus.Logger { initAuditLoggerOnce.Do(func() { diff --git a/auth/api/iam/api.go b/auth/api/iam/api.go index c3affbcf97..7eb0b5cf10 100644 --- a/auth/api/iam/api.go +++ b/auth/api/iam/api.go @@ -318,7 +318,7 @@ func callbackRequestToError(request CallbackRequestObject, redirectURI *url.URL) return requestErr } -func (r Wrapper) RetrieveAccessToken(_ context.Context, request RetrieveAccessTokenRequestObject) (RetrieveAccessTokenResponseObject, error) { +func (r Wrapper) RetrieveAccessToken(ctx context.Context, request RetrieveAccessTokenRequestObject) (RetrieveAccessTokenResponseObject, error) { // get access token from store var token TokenResponse err := r.accessTokenClientStore().Get(request.SessionID, &token) @@ -336,7 +336,7 @@ func (r Wrapper) RetrieveAccessToken(_ context.Context, request RetrieveAccessTo // change this when tokens can be cached err = r.accessTokenClientStore().Delete(request.SessionID) if err != nil { - log.Logger().WithError(err).Warn("Failed to delete access token") + log.Logger().WithContext(ctx).WithError(err).Warn("Failed to delete access token") } // return access token return RetrieveAccessToken200JSONResponse(token), nil diff --git a/cmd/root.go b/cmd/root.go index a14feab268..493e138ac8 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -122,7 +122,7 @@ func startServer(ctx context.Context, system *core.System) error { logrus.Info(fmt.Sprintf("Build info: \n%s", core.BuildInfo())) logrus.Info(fmt.Sprintf("Config: \n%s", system.Config.PrintConfig())) - // check config on all engines + // check config on all engines (also initializes tracing) if err := system.Configure(); err != nil { return err } diff --git a/core/engine.go b/core/engine.go index e209664c01..3402c8cf8e 100644 --- a/core/engine.go +++ b/core/engine.go @@ -22,10 +22,11 @@ package core import ( "context" "fmt" - "github.com/sirupsen/logrus" - "github.com/spf13/pflag" "os" "strings" + + "github.com/sirupsen/logrus" + "github.com/spf13/pflag" ) // Routable enables connecting a REST API to the echo server. The API wrappers should implement this interface @@ -57,6 +58,8 @@ type System struct { Context context.Context // ContextCancel is a function to signal the system should shut down. ContextCancel context.CancelFunc + // tracingShutdown is the shutdown function for OpenTelemetry tracing + tracingShutdown func(context.Context) error } var coreLogger = logrus.StandardLogger().WithField(LogFieldModule, "core") @@ -111,13 +114,25 @@ func (system *System) Shutdown() error { } coreLogger.Infof("Stopped %s", name) } + // Shutdown tracing last to ensure all logs are flushed + if system.tracingShutdown != nil { + if err := system.tracingShutdown(context.Background()); err != nil { + coreLogger.WithError(err).Error("Failed to shutdown tracing") + } + } return nil } // Configure configures all engines in the system. func (system *System) Configure() error { + // Set up tracing first, so all logs (including engine configuration) go to the configured destination + tracingShutdown, err := SetupTracing(system.Config.Tracing) + if err != nil { + return fmt.Errorf("failed to setup tracing: %w", err) + } + system.tracingShutdown = tracingShutdown + coreLogger.Debugf("Creating datadir: %s", system.Config.Datadir) - var err error if err = os.MkdirAll(system.Config.Datadir, os.ModePerm); err != nil { return fmt.Errorf("unable to create datadir (dir=%s): %w", system.Config.Datadir, err) } diff --git a/core/http_client.go b/core/http_client.go index 53dbc01918..2960fa327f 100644 --- a/core/http_client.go +++ b/core/http_client.go @@ -22,9 +22,11 @@ package core import ( "context" "fmt" - "github.com/sirupsen/logrus" "io" "net/http" + + "github.com/sirupsen/logrus" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // HttpResponseBodyLogClipAt is the maximum length of a response body to log. @@ -98,8 +100,17 @@ func (w httpRequestDoerAdapter) Do(req *http.Request) (*http.Response, error) { // If the given authorization token builder is non-nil, it calls it and passes the resulting token as bearer token with requests. func CreateHTTPInternalClient(cfg ClientConfig, generator AuthorizationTokenGenerator) (HTTPRequestDoer, error) { var result *httpRequestDoerAdapter - client := &http.Client{} - client.Timeout = cfg.Timeout + var transport http.RoundTripper = http.DefaultTransport + if TracingEnabled() { + transport = otelhttp.NewTransport(http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "internal-api: " + r.Method + " " + r.URL.Path + })) + } + client := &http.Client{ + Transport: transport, + Timeout: cfg.Timeout, + } result = &httpRequestDoerAdapter{ fn: client.Do, diff --git a/core/server_config.go b/core/server_config.go index c4474a7c45..cd833f3518 100644 --- a/core/server_config.go +++ b/core/server_config.go @@ -72,7 +72,8 @@ type ServerConfig struct { LegacyTLS TLSConfig `koanf:"network"` // HTTP exists to expose http.clientipheader to the nuts-network layer. // This header should contaisn the client IP address for logging. Can be removed together with the nuts-network - HTTP HTTPConfig `koanf:"http"` + HTTP HTTPConfig `koanf:"http"` + Tracing TracingConfig `koanf:"tracing"` configMap *koanf.Koanf } @@ -87,6 +88,15 @@ type HTTPClientConfig struct { Timeout time.Duration `koanf:"timeout"` } +// TracingConfig contains settings for OpenTelemetry tracing. +type TracingConfig struct { + // Endpoint is the OTLP collector endpoint (e.g., "localhost:4318" for HTTP). + // When empty, tracing is disabled. When set, logs are sent to both stdout and the OTLP endpoint. + Endpoint string `koanf:"endpoint"` + // Insecure disables TLS for the OTLP connection. + Insecure bool `koanf:"insecure"` +} + // TLSConfig specifies how TLS should be configured for connections. type TLSConfig struct { // Offload specifies the TLS offloading mode for incoming/outgoing traffic. @@ -274,6 +284,8 @@ func FlagSet() *pflag.FlagSet { flagSet.String("tls.offload", string(defaultCfg.TLS.Offload), fmt.Sprintf("Whether to enable TLS offloading for incoming gRPC connections. "+ "Enable by setting it to '%s'. If enabled 'tls.certheader' must be configured as well.", OffloadIncomingTLS)) flagSet.String("tls.certheader", defaultCfg.TLS.ClientCertHeaderName, "Name of the HTTP header that will contain the client certificate when TLS is offloaded for gRPC.") + flagSet.String("tracing.endpoint", defaultCfg.Tracing.Endpoint, "OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled.") + flagSet.Bool("tracing.insecure", defaultCfg.Tracing.Insecure, "Disable TLS for the OTLP connection.") return flagSet } diff --git a/core/tracing.go b/core/tracing.go new file mode 100644 index 0000000000..065fd83d42 --- /dev/null +++ b/core/tracing.go @@ -0,0 +1,279 @@ +/* + * Nuts node + * Copyright (C) 2025 Nuts community + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package core + +import ( + "context" + "errors" + "fmt" + "sync/atomic" + "time" + + "github.com/sirupsen/logrus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + otellog "go.opentelemetry.io/otel/log" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/log" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + oteltrace "go.opentelemetry.io/otel/trace" +) + +const serviceName = "nuts-node" + +// tracingEnabled is set to true when OpenTelemetry tracing is configured. +var tracingEnabled atomic.Bool + +// nutsTracerProvider holds nuts-node's own TracerProvider. +// This is used instead of the global when nuts-node is embedded in another application. +var nutsTracerProvider *trace.TracerProvider + +// TracingEnabled returns true if OpenTelemetry tracing is configured. +func TracingEnabled() bool { + return tracingEnabled.Load() +} + +// SetTracingEnabled sets the tracing enabled flag. +// Exported for testing only; do not call from production code. +func SetTracingEnabled(enabled bool) { + tracingEnabled.Store(enabled) +} + +// GetTracerProvider returns nuts-node's TracerProvider. +// This should be used by nuts-node components instead of otel.GetTracerProvider() +// to ensure spans are attributed to "nuts-node" service. +func GetTracerProvider() oteltrace.TracerProvider { + if nutsTracerProvider != nil { + return nutsTracerProvider + } + return otel.GetTracerProvider() +} + +// RegisterAuditLogHook is a function that registers a logrus hook with the audit logger. +// It is set by the audit package during initialization to avoid circular imports. +var RegisterAuditLogHook func(hook logrus.Hook) = func(logrus.Hook) {} + +// SetupTracing initializes OpenTelemetry tracing with the given configuration. +// Returns a shutdown function that should be called on application exit. +// If cfg.Endpoint is empty, tracing is disabled and a no-op shutdown function is returned. +// When tracing is enabled, logs are sent to both stdout and the OTLP endpoint. +func SetupTracing(cfg TracingConfig) (shutdown func(context.Context) error, err error) { + if cfg.Endpoint == "" { + logrus.Info("Tracing disabled (no endpoint configured)") + return func(context.Context) error { return nil }, nil + } + + // Enable tracing flag for HTTP clients and other components + tracingEnabled.Store(true) + + ctx := context.Background() + var shutdownFuncs []func(context.Context) error + + shutdown = func(ctx context.Context) error { + var errs error + for _, fn := range shutdownFuncs { + if err := fn(ctx); err != nil { + errs = errors.Join(errs, err) + } + } + return errs + } + + // Handle errors by cleaning up already-created resources + handleErr := func(err error) (func(context.Context) error, error) { + shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + _ = shutdown(shutdownCtx) + return nil, err + } + + // Set up OpenTelemetry error handler to integrate with logrus + otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { + logrus.WithError(err).Error("OpenTelemetry SDK error") + })) + + // Set up propagator (W3C Trace Context + Baggage) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + // Set up resource with service info + version := Version() + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceNameKey.String(serviceName), + semconv.ServiceVersionKey.String(version), + ), + ) + if err != nil { + return handleErr(err) + } + + // Set up OTLP HTTP exporter + opts := []otlptracehttp.Option{ + otlptracehttp.WithEndpoint(cfg.Endpoint), + } + if cfg.Insecure { + opts = append(opts, otlptracehttp.WithInsecure()) + } + traceExporter, err := otlptracehttp.New(ctx, opts...) + if err != nil { + return handleErr(err) + } + shutdownFuncs = append(shutdownFuncs, traceExporter.Shutdown) + + // Set up trace provider with batch exporter + tracerProvider := trace.NewTracerProvider( + trace.WithBatcher(traceExporter), + trace.WithResource(res), + ) + shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown) + + // Store nuts-node's provider for use by GetTracerProvider() + nutsTracerProvider = tracerProvider + + // Only set as global if no other provider exists (i.e., not embedded). + // When embedded, the parent application owns the global provider. + _, hasParentProvider := otel.GetTracerProvider().(*trace.TracerProvider) + if !hasParentProvider { + otel.SetTracerProvider(tracerProvider) + } + + // Set up OTLP log exporter + logOpts := []otlploghttp.Option{ + otlploghttp.WithEndpoint(cfg.Endpoint), + } + if cfg.Insecure { + logOpts = append(logOpts, otlploghttp.WithInsecure()) + } + logExporter, err := otlploghttp.New(ctx, logOpts...) + if err != nil { + return handleErr(err) + } + shutdownFuncs = append(shutdownFuncs, logExporter.Shutdown) + + // Set up log provider + loggerProvider := log.NewLoggerProvider( + log.WithProcessor(log.NewBatchProcessor(logExporter)), + log.WithResource(res), + ) + shutdownFuncs = append(shutdownFuncs, loggerProvider.Shutdown) + + // Create OTEL hook for sending logs via OTLP (logs go to both stdout and OTLP) + otelHook := &OtelLogrusHook{logger: loggerProvider.Logger(serviceName)} + logrus.AddHook(otelHook) + + // Also add trace context to stdout logs + logrus.AddHook(&tracingLogrusHook{}) + + // Register hook with audit logger (which uses its own logger instance) + RegisterAuditLogHook(otelHook) + + logrus.WithFields(logrus.Fields{ + "endpoint": cfg.Endpoint, + "version": version, + }).Info("OpenTelemetry tracing initialized") + + return shutdown, nil +} + +// tracingLogrusHook is a logrus hook that injects trace context into log entries. +type tracingLogrusHook struct{} + +func (h *tracingLogrusHook) Levels() []logrus.Level { + return logrus.AllLevels +} + +func (h *tracingLogrusHook) Fire(entry *logrus.Entry) error { + if entry.Context == nil { + return nil + } + span := oteltrace.SpanFromContext(entry.Context) + if !span.SpanContext().IsValid() { + return nil + } + spanCtx := span.SpanContext() + entry.Data["trace_id"] = spanCtx.TraceID().String() + entry.Data["span_id"] = spanCtx.SpanID().String() + return nil +} + +// OtelLogrusHook is a logrus hook that sends logs to an OTLP endpoint. +// It is exported so other loggers (like the audit logger) can use it. +type OtelLogrusHook struct { + logger otellog.Logger +} + +func (h *OtelLogrusHook) Levels() []logrus.Level { + return logrus.AllLevels +} + +func (h *OtelLogrusHook) Fire(entry *logrus.Entry) error { + ctx := entry.Context + if ctx == nil { + ctx = context.Background() + } + + // Convert logrus level to otel severity + var severity otellog.Severity + switch entry.Level { + case logrus.TraceLevel: + severity = otellog.SeverityTrace + case logrus.DebugLevel: + severity = otellog.SeverityDebug + case logrus.InfoLevel: + severity = otellog.SeverityInfo + case logrus.WarnLevel: + severity = otellog.SeverityWarn + case logrus.ErrorLevel: + severity = otellog.SeverityError + case logrus.FatalLevel, logrus.PanicLevel: + severity = otellog.SeverityFatal + default: + severity = otellog.SeverityInfo + } + + // Build log record + record := otellog.Record{} + record.SetTimestamp(entry.Time) + record.SetSeverity(severity) + record.SetBody(otellog.StringValue(entry.Message)) + + // Add logrus fields as attributes + attrs := make([]otellog.KeyValue, 0, len(entry.Data)) + for k, v := range entry.Data { + attrs = append(attrs, otellog.String(k, formatValue(v))) + } + record.AddAttributes(attrs...) + + h.logger.Emit(ctx, record) + return nil +} + +func formatValue(v any) string { + if err, ok := v.(error); ok { + return err.Error() + } + return fmt.Sprintf("%v", v) +} diff --git a/core/tracing_test.go b/core/tracing_test.go new file mode 100644 index 0000000000..e66422b1fb --- /dev/null +++ b/core/tracing_test.go @@ -0,0 +1,123 @@ +/* + * Nuts node + * Copyright (C) 2025 Nuts community + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +package core + +import ( + "context" + "testing" + + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/trace" +) + +func TestSetupTracing(t *testing.T) { + t.Run("disabled when endpoint is empty", func(t *testing.T) { + cfg := TracingConfig{Endpoint: ""} + + shutdown, err := SetupTracing(cfg) + + require.NoError(t, err) + assert.NotNil(t, shutdown) + // Shutdown should be a no-op + assert.NoError(t, shutdown(context.Background())) + }) +} + +func TestGetTracerProvider(t *testing.T) { + t.Run("returns global provider when nutsTracerProvider is nil", func(t *testing.T) { + // Reset state + nutsTracerProvider = nil + + provider := GetTracerProvider() + assert.NotNil(t, provider) + }) +} + +func TestTracingLogrusHook(t *testing.T) { + hook := &tracingLogrusHook{} + + t.Run("no-op when context is nil", func(t *testing.T) { + entry := &logrus.Entry{ + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.NotContains(t, entry.Data, "trace_id") + assert.NotContains(t, entry.Data, "span_id") + }) + + t.Run("no-op when span context is invalid", func(t *testing.T) { + entry := &logrus.Entry{ + Context: context.Background(), + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.NotContains(t, entry.Data, "trace_id") + assert.NotContains(t, entry.Data, "span_id") + }) + + t.Run("adds trace context when span is valid", func(t *testing.T) { + // Create a valid span context + traceID, _ := trace.TraceIDFromHex("0102030405060708090a0b0c0d0e0f10") + spanID, _ := trace.SpanIDFromHex("0102030405060708") + spanCtx := trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: traceID, + SpanID: spanID, + TraceFlags: trace.FlagsSampled, + }) + + // Use noop tracer but with our span context + ctx := trace.ContextWithSpanContext(context.Background(), spanCtx) + + entry := &logrus.Entry{ + Context: ctx, + Data: make(logrus.Fields), + } + err := hook.Fire(entry) + assert.NoError(t, err) + assert.Equal(t, "0102030405060708090a0b0c0d0e0f10", entry.Data["trace_id"]) + assert.Equal(t, "0102030405060708", entry.Data["span_id"]) + }) +} + +func TestFormatValue(t *testing.T) { + t.Run("string value", func(t *testing.T) { + result := formatValue("test") + assert.Equal(t, "test", result) + }) + + t.Run("error value", func(t *testing.T) { + result := formatValue(assert.AnError) + assert.Equal(t, assert.AnError.Error(), result) + }) + + t.Run("int value", func(t *testing.T) { + result := formatValue(42) + assert.Equal(t, "42", result) + }) + + t.Run("nil value", func(t *testing.T) { + result := formatValue(nil) + assert.Equal(t, "", result) + }) +} diff --git a/crypto/storage/external/client.go b/crypto/storage/external/client.go index e810b8a2df..23a1e84599 100644 --- a/crypto/storage/external/client.go +++ b/crypto/storage/external/client.go @@ -30,6 +30,7 @@ import ( "github.com/nuts-foundation/nuts-node/core" "github.com/nuts-foundation/nuts-node/crypto/storage/spi" "github.com/nuts-foundation/nuts-node/crypto/util" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // StorageType is the name of this storage type, used in health check reports and configuration. @@ -82,8 +83,19 @@ func NewAPIClient(config Config) (spi.Storage, error) { if _, err := url.ParseRequestURI(config.Address); err != nil { return nil, err } - client, _ := NewClientWithResponses(config.Address, WithHTTPClient(&http.Client{Timeout: config.Timeout})) - return &APIClient{httpClient: client}, nil + var transport http.RoundTripper = http.DefaultTransport + if core.TracingEnabled() { + transport = otelhttp.NewTransport(http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "crypto-storage: " + r.Method + " " + r.URL.Path + })) + } + httpClient := &http.Client{ + Transport: transport, + Timeout: config.Timeout, + } + apiClient, _ := NewClientWithResponses(config.Address, WithHTTPClient(httpClient)) + return &APIClient{httpClient: apiClient}, nil } func (c APIClient) GetPrivateKey(ctx context.Context, keyName string, _ string) (crypto.Signer, error) { diff --git a/crypto/storage/vault/vault.go b/crypto/storage/vault/vault.go index 4dc0909e4c..117e6e6870 100644 --- a/crypto/storage/vault/vault.go +++ b/crypto/storage/vault/vault.go @@ -23,13 +23,16 @@ import ( "crypto" "errors" "fmt" + "net/http" + "path/filepath" + "time" + vault "github.com/hashicorp/vault/api" "github.com/nuts-foundation/nuts-node/core" "github.com/nuts-foundation/nuts-node/crypto/log" "github.com/nuts-foundation/nuts-node/crypto/storage/spi" "github.com/nuts-foundation/nuts-node/crypto/util" - "path/filepath" - "time" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) const privateKeyPathName = "nuts-private-keys" @@ -110,6 +113,17 @@ func (v vaultKVStorage) NewPrivateKey(ctx context.Context, keyPath string) (cryp func configureVaultClient(cfg Config) (*vault.Client, error) { vaultConfig := vault.DefaultConfig() vaultConfig.Timeout = cfg.Timeout + + // Add tracing if enabled + if core.TracingEnabled() { + vaultConfig.HttpClient.Transport = otelhttp.NewTransport( + vaultConfig.HttpClient.Transport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "vault: " + r.Method + " " + r.URL.Path + }), + ) + } + client, err := vault.NewClient(vaultConfig) if err != nil { return nil, fmt.Errorf("unable to initialize Vault client: %w", err) diff --git a/docs/pages/deployment/monitoring.rst b/docs/pages/deployment/monitoring.rst index 7529dca086..d670c62d6c 100644 --- a/docs/pages/deployment/monitoring.rst +++ b/docs/pages/deployment/monitoring.rst @@ -178,6 +178,61 @@ The Nuts service executable exports the following metric namespaces: * ``go_`` contains Go metrics related to the process * ``promhttp_`` contains metrics related to HTTP calls to the Nuts node's ``/metrics`` endpoint +Tracing +******* + +The Nuts node supports distributed tracing via OpenTelemetry. When enabled, it exports traces to an OTLP-compatible backend +(e.g., Jaeger, Zipkin, .NET Aspire Dashboard, Grafana Tempo). + +Configuration +============= + +Enable tracing by configuring the OTLP endpoint: + +.. code-block:: yaml + + tracing: + endpoint: localhost:4318 + +Or via environment variables: + +.. code-block:: shell + + NUTS_TRACING_ENDPOINT=localhost:4318 + +Configuration options: + +* ``tracing.endpoint`` - OTLP HTTP endpoint (e.g., ``localhost:4318``). Tracing is disabled when empty. +* ``tracing.insecure`` - Disable TLS for the OTLP connection (default: ``false``). Only use in trusted networks or development environments, as trace data may contain sensitive information. + +What is traced +============== + +The following are automatically instrumented: + +* **Inbound HTTP requests** - All API calls to the Nuts node create spans (except ``/health``, ``/metrics``, ``/status``) +* **Outbound HTTP requests** - HTTP calls to external services (e.g., fetching DID documents, OAuth flows) +* **SQL database** - Database queries via GORM +* **Hashicorp Vault** - Key storage operations when using Vault backend +* **Log correlation** - Log entries include ``trace_id`` and ``span_id`` fields when tracing is enabled +* **OTLP log export** - Logs are also exported to the OTLP backend for unified observability + +Trace context propagation +========================= + +The Nuts node uses W3C Trace Context (``traceparent`` header) for propagating trace context across service boundaries. +When calling the Nuts node from another traced service, include the ``traceparent`` header to link spans. + +Known limitations +================= + +The following components are not yet instrumented: + +* **Azure Key Vault** - Azure managed keys backend is not instrumented. The Azure SDK supports OpenTelemetry via the ``azotel`` package (see `Azure SDK tracing `_). +* **gRPC network layer** - P2P communication between nodes (``did:nuts``) does not include tracing as it's for v5 and deprecated + +These limitations may be addressed in future releases. + CPU profiling ************* diff --git a/docs/pages/deployment/server_options.rst b/docs/pages/deployment/server_options.rst index a93ea21e31..207b399b64 100755 --- a/docs/pages/deployment/server_options.rst +++ b/docs/pages/deployment/server_options.rst @@ -15,6 +15,8 @@ url Public facing URL of the server (required). Must be HTTPS when strictmode is set. verbosity info Log level (trace, debug, info, warn, error) httpclient.timeout 30s Request time-out for HTTP clients, such as '10s'. Refer to Golang's 'time.Duration' syntax for a more elaborate description of the syntax. + tracing.endpoint OTLP collector endpoint for OpenTelemetry tracing (e.g., 'localhost:4318'). When empty, tracing is disabled. + tracing.insecure false Disable TLS for the OTLP connection. **Auth** auth.authorizationendpoint.enabled false enables the v2 API's OAuth2 Authorization Endpoint, used by OpenID4VP and OpenID4VCI. This flag might be removed in a future version (or its default become 'true') as the use cases and implementation of OpenID4VP and OpenID4VCI mature. **Crypto** diff --git a/go.mod b/go.mod index fbdb3e58d4..5d168d48e5 100644 --- a/go.mod +++ b/go.mod @@ -47,7 +47,7 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/spf13/cobra v1.9.1 github.com/spf13/pflag v1.0.7 - github.com/stretchr/testify v1.10.0 + github.com/stretchr/testify v1.11.1 github.com/twmb/murmur3 v1.1.8 go.etcd.io/bbolt v1.4.3 go.uber.org/atomic v1.11.0 @@ -56,7 +56,7 @@ require ( golang.org/x/crypto v0.45.0 golang.org/x/time v0.12.0 google.golang.org/grpc v1.75.0 - google.golang.org/protobuf v1.36.6 + google.golang.org/protobuf v1.36.8 gopkg.in/Regis24GmbH/go-phonetics.v2 v2.0.3 gopkg.in/yaml.v3 v3.0.1 gorm.io/driver/mysql v1.6.0 @@ -189,7 +189,7 @@ require ( golang.org/x/sys v0.38.0 // indirect golang.org/x/term v0.37.0 // indirect golang.org/x/text v0.31.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect gopkg.in/Regis24GmbH/go-diacritics.v2 v2.0.3 // indirect gorm.io/gorm v1.30.2 modernc.org/mathutil v1.7.1 // indirect @@ -206,16 +206,37 @@ require ( github.com/eko/gocache/store/memcache/v4 v4.2.2 github.com/eko/gocache/store/redis/v4 v4.2.2 github.com/patrickmn/go-cache v2.1.0+incompatible + github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2 + go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 + go.opentelemetry.io/otel v1.38.0 + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 + go.opentelemetry.io/otel/log v0.14.0 + go.opentelemetry.io/otel/sdk v1.38.0 + go.opentelemetry.io/otel/sdk/log v0.14.0 + go.opentelemetry.io/otel/trace v1.38.0 ) require ( github.com/benbjohnson/clock v1.3.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-json-experiment/json v0.0.0-20250725192818-e39067aee2d2 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/mock v1.6.0 // indirect github.com/google/go-tpm v0.9.5 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect github.com/klauspost/cpuid/v2 v2.2.5 // indirect github.com/rs/zerolog v1.26.1 // indirect - go.yaml.in/yaml/v3 v3.0.3 // indirect + github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect + go.opentelemetry.io/otel/metric v1.38.0 // indirect + go.opentelemetry.io/proto/otlp v1.7.1 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect modernc.org/libc v1.66.3 // indirect ) diff --git a/go.sum b/go.sum index ed4ecb3ed9..bbb32a25d5 100644 --- a/go.sum +++ b/go.sum @@ -75,6 +75,8 @@ github.com/cbroglie/mustache v1.4.0 h1:Azg0dVhxTml5me+7PsZ7WPrQq1Gkf3WApcHMjMprY github.com/cbroglie/mustache v1.4.0/go.mod h1:SS1FTIghy0sjse4DUVGV1k/40B1qE1XkD9DtDsHo9iM= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -125,6 +127,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo= github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor v1.5.1 h1:XjQWBgdmQyqimslUh5r4tUGmoqzHmBFQOImkWGi2awg= @@ -139,6 +143,7 @@ github.com/go-jose/go-jose/v4 v4.1.1 h1:JYhSgy4mXXzAdF3nUx3ygx347LRXJRrpgyU3adRm github.com/go-jose/go-jose/v4 v4.1.1/go.mod h1:BdsZGqgdO3b6tTc6LSE56wcDbMMLuPsw5d4ZD5f94kA= github.com/go-json-experiment/json v0.0.0-20250725192818-e39067aee2d2 h1:iizUGZ9pEquQS5jTGkh4AqeeHCMbfbjeb0zMt0aEFzs= github.com/go-json-experiment/json v0.0.0-20250725192818-e39067aee2d2/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -202,6 +207,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -472,8 +479,8 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203 h1:QVqDTf3h2WHt08YuiTGPZLls0Wq99X9bWd0Q5ZSBesM= github.com/stvp/tempredis v0.0.0-20181119212430-b82af8480203/go.mod h1:oqN97ltKNihBbwlX8dLpwxCl3+HnXKV/R0e+sRLd9C8= github.com/templexxx/cpu v0.0.1/go.mod h1:w7Tb+7qgcAlIyX4NhLuDKt78AHA5SzPmq0Wj6HiEnnk= @@ -493,6 +500,10 @@ github.com/timshannon/bolthold v0.0.0-20210913165410-232392fc8a6a h1:oIi7H/bwFUY github.com/timshannon/bolthold v0.0.0-20210913165410-232392fc8a6a/go.mod h1:iSvujNDmpZ6eQX+bg/0X3lF7LEmZ8N77g2a/J/+Zt2U= github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg= github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= +github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2 h1:Jjn3zoRz13f8b1bR6LrXWglx93Sbh4kYfwgmPju3E2k= +github.com/uptrace/opentelemetry-go-extra/otelgorm v0.3.2/go.mod h1:wocb5pNrj/sjhWB9J5jctnC0K2eisSdz/nJJBNFHo+A= +github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2 h1:ZjUj9BLYf9PEqBn8W/OapxhPjVRdC6CsXTdULHsyk5c= +github.com/uptrace/opentelemetry-go-extra/otelsql v0.3.2/go.mod h1:O8bHQfyinKwTXKkiKNGmLQS7vRsqRxIQTFZpYpHK3IQ= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= @@ -512,16 +523,36 @@ go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= -go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= -go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI= -go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg= -go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc= -go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps= -go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= -go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0 h1:6YeICKmGrvgJ5th4+OMNpcuoB6q/Xs8gt0YCO7MUv1k= +go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho v0.63.0/go.mod h1:ZEA7j2B35siNV0T00aapacNzjz4tvOlNoHp0ncCfwNQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg= +go.opentelemetry.io/contrib/propagators/b3 v1.38.0 h1:uHsCCOSKl0kLrV2dLkFK+8Ywk9iKa/fptkytc6aFFEo= +go.opentelemetry.io/contrib/propagators/b3 v1.38.0/go.mod h1:wMRSZJZcY8ya9mApLLhwIMjqmApy2o/Ml+62lhvxyHU= +go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= +go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0 h1:QQqYw3lkrzwVsoEX0w//EhH/TCnpRdEenKBOOEIMjWc= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.14.0/go.mod h1:gSVQcr17jk2ig4jqJ2DX30IdWH251JcNAecvrqTxH1s= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F04bJHUlztTsNGJ2l+6he8c+y/b//eR0jjjemT4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4= +go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM= +go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno= +go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= +go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI= +go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E= +go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg= +go.opentelemetry.io/otel/sdk/log v0.14.0 h1:JU/U3O7N6fsAXj0+CXz21Czg532dW2V4gG1HE/e8Zrg= +go.opentelemetry.io/otel/sdk/log v0.14.0/go.mod h1:imQvII+0ZylXfKU7/wtOND8Hn4OpT3YUoIgqJVksUkM= +go.opentelemetry.io/otel/sdk/log/logtest v0.14.0 h1:Ijbtz+JKXl8T2MngiwqBlPaHqc4YCaP/i13Qrow6gAM= +go.opentelemetry.io/otel/sdk/log/logtest v0.14.0/go.mod h1:dCU8aEL6q+L9cYTqcVOk8rM9Tp8WdnHOPLiBgp0SGOA= +go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM= +go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA= +go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE= +go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= @@ -531,8 +562,8 @@ go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.yaml.in/yaml/v3 v3.0.3 h1:bXOww4E/J3f66rav3pX3m8w6jDE4knZjGOw8b5Y6iNE= -go.yaml.in/yaml/v3 v3.0.3/go.mod h1:tBHosrYAkRZjRAOREWbDnBXUf08JOwYq++0QNwQiWzI= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= @@ -680,12 +711,14 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7 h1:pFyd6EwwL2TqFf8emdthzeX+gZE1ElRq3iM8pui4KBY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250707201910-8d1bb00bc6a7/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY= +google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc= google.golang.org/grpc v1.75.0 h1:+TW+dqTd2Biwe6KKfhE5JpiYIBWq865PhKGSXiivqt4= google.golang.org/grpc v1.75.0/go.mod h1:JtPAzKiq4v1xcAB2hydNlWI2RnF85XXcV0mhKXr2ecQ= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/Regis24GmbH/go-diacritics.v2 v2.0.3 h1:rz88vn1OH2B9kKorR+QCrcuw6WbizVwahU2Y9Q09xqU= gopkg.in/Regis24GmbH/go-diacritics.v2 v2.0.3/go.mod h1:vJmfdx2L0+30M90zUd0GCjLV14Ip3ZgWR5+MV1qljOo= gopkg.in/Regis24GmbH/go-phonetics.v2 v2.0.3 h1:pSSZonNnrORBQXIm3kl6P9EQTNqVds9zszK/BXbOItg= diff --git a/http/client/client.go b/http/client/client.go index 60d9c57b87..47ab2dd1f7 100644 --- a/http/client/client.go +++ b/http/client/client.go @@ -23,10 +23,12 @@ import ( "crypto/tls" "errors" "fmt" - "github.com/nuts-foundation/nuts-node/core" "io" "net/http" "time" + + "github.com/nuts-foundation/nuts-node/core" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // SafeHttpTransport is a http.Transport that can be used as a default transport for HTTP clients. @@ -44,6 +46,11 @@ func init() { DefaultCachingTransport = SafeHttpTransport } +// httpSpanName formats span names for outbound HTTP requests. +func httpSpanName(_ string, r *http.Request) string { + return "http-client: " + r.Method + " " + r.URL.Path +} + // StrictMode is a flag that can be set to true to enable strict mode for the HTTP client. var StrictMode bool @@ -63,21 +70,33 @@ func limitedReadAll(reader io.Reader) ([]byte, error) { } // New creates a new HTTP client with the given timeout. +// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func New(timeout time.Duration) *StrictHTTPClient { + transport := getTransport(SafeHttpTransport) return &StrictHTTPClient{ client: &http.Client{ - Transport: SafeHttpTransport, + Transport: transport, Timeout: timeout, }, } } +// getTransport wraps the given transport with OpenTelemetry instrumentation if tracing is enabled. +func getTransport(base http.RoundTripper) http.RoundTripper { + if core.TracingEnabled() { + return otelhttp.NewTransport(base, otelhttp.WithSpanNameFormatter(httpSpanName)) + } + return base +} + // NewWithCache creates a new HTTP client with the given timeout. // It uses the DefaultCachingTransport as the underlying transport. +// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func NewWithCache(timeout time.Duration) *StrictHTTPClient { + transport := getTransport(DefaultCachingTransport) return &StrictHTTPClient{ client: &http.Client{ - Transport: DefaultCachingTransport, + Transport: transport, Timeout: timeout, }, } @@ -86,12 +105,13 @@ func NewWithCache(timeout time.Duration) *StrictHTTPClient { // NewWithTLSConfig creates a new HTTP client with the given timeout and TLS configuration. // It copies the http.DefaultTransport and sets the TLSClientConfig to the given tls.Config. // As such, it can't be used in conjunction with the CachingRoundTripper. +// If tracing is enabled, the transport will be wrapped with OpenTelemetry instrumentation. func NewWithTLSConfig(timeout time.Duration, tlsConfig *tls.Config) *StrictHTTPClient { transport := SafeHttpTransport.Clone() transport.TLSClientConfig = tlsConfig return &StrictHTTPClient{ client: &http.Client{ - Transport: transport, + Transport: getTransport(transport), Timeout: timeout, }, } diff --git a/http/client/client_test.go b/http/client/client_test.go index 76d5c3d401..0db38d0c0c 100644 --- a/http/client/client_test.go +++ b/http/client/client_test.go @@ -21,8 +21,6 @@ package client import ( "crypto/tls" "fmt" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "net/http" "net/http/httptest" "strings" @@ -30,6 +28,10 @@ import ( "sync/atomic" "testing" "time" + + "github.com/nuts-foundation/nuts-node/core" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestStrictHTTPClient(t *testing.T) { @@ -73,6 +75,9 @@ func TestStrictHTTPClient(t *testing.T) { assert.Equal(t, 0, rt.invocations) }) t.Run("sets TLS config", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(false) // ensure we can cast to *http.Transport + t.Cleanup(func() { core.SetTracingEnabled(original) }) client := NewWithTLSConfig(time.Second, &tls.Config{ InsecureSkipVerify: true, }) @@ -197,3 +202,49 @@ func TestCaching(t *testing.T) { assert.Equal(t, int32(1), total.Load()) } + +func TestGetTransport(t *testing.T) { + t.Run("wraps transport when tracing enabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(true) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + transport := getTransport(SafeHttpTransport) + + // Should not be the same as SafeHttpTransport (it's wrapped) + assert.NotEqual(t, SafeHttpTransport, transport) + }) + + t.Run("returns base transport when tracing disabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(false) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + transport := getTransport(SafeHttpTransport) + + assert.Equal(t, SafeHttpTransport, transport) + }) +} + +func TestNew(t *testing.T) { + t.Run("wraps transport when tracing enabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(true) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + client := New(time.Second) + + // Transport should be wrapped (not equal to SafeHttpTransport) + assert.NotEqual(t, SafeHttpTransport, client.client.Transport) + }) + + t.Run("uses SafeHttpTransport when tracing disabled", func(t *testing.T) { + original := core.TracingEnabled() + core.SetTracingEnabled(false) + t.Cleanup(func() { core.SetTracingEnabled(original) }) + + client := New(time.Second) + + assert.Equal(t, SafeHttpTransport, client.client.Transport) + }) +} diff --git a/http/engine.go b/http/engine.go index 9803b2d5b3..0c6026c848 100644 --- a/http/engine.go +++ b/http/engine.go @@ -36,6 +36,7 @@ import ( "github.com/nuts-foundation/nuts-node/http/log" "github.com/nuts-foundation/nuts-node/http/tokenV2" "github.com/nuts-foundation/nuts-node/vdr/didnuts" + "go.opentelemetry.io/contrib/instrumentation/github.com/labstack/echo/otelecho" ) const moduleName = "HTTP" @@ -90,6 +91,7 @@ func (h *Engine) Configure(serverConfig core.ServerConfig) error { return err } + h.applyTracingMiddleware(h.server) h.applyRateLimiterMiddleware(h.server, serverConfig) h.applyLoggerMiddleware(h.server, []string{MetricsPath, StatusPath, HealthPath}, h.config.Log) return h.applyAuthMiddleware(h.server, InternalPath, h.config.Internal.Auth) @@ -103,6 +105,24 @@ func (h *Engine) configureClient(serverConfig core.ServerConfig) { } } +func (h *Engine) applyTracingMiddleware(echoServer core.EchoRouter) { + // Only apply tracing middleware if tracing is enabled + if !core.TracingEnabled() { + return + } + skipper := func(c echo.Context) bool { + // Skip health/metrics/status endpoints to reduce noise + path := c.Request().URL.Path + return matchesPath(path, HealthPath) || matchesPath(path, MetricsPath) || matchesPath(path, StatusPath) + } + // Use nuts-node's own TracerProvider to ensure spans are attributed to "nuts-node" service, + // even when embedded in another application that has its own TracerProvider. + echoServer.Use(otelecho.Middleware(moduleName, + otelecho.WithSkipper(skipper), + otelecho.WithTracerProvider(core.GetTracerProvider()), + )) +} + func (h *Engine) createEchoServer(ipHeader string) (EchoServer, error) { echoServer := echo.New() echoServer.HideBanner = true diff --git a/pki/validator.go b/pki/validator.go index 80db1ed633..207b1f6952 100644 --- a/pki/validator.go +++ b/pki/validator.go @@ -29,6 +29,9 @@ import ( "strings" "sync" "time" + + "github.com/nuts-foundation/nuts-node/core" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) var _ Validator = (*validator)(nil) @@ -89,7 +92,18 @@ func newRevocationList(cert *x509.Certificate) *revocationList { // newValidator returns a new PKI (crl/denylist) validator. func newValidator(config Config) (*validator, error) { // we do not use our safe http client here since we're downloading from a trusted resource - return newValidatorWithHTTPClient(config, &http.Client{Timeout: syncTimeout}) + var transport http.RoundTripper = http.DefaultTransport + if core.TracingEnabled() { + transport = otelhttp.NewTransport(http.DefaultTransport, + otelhttp.WithSpanNameFormatter(func(_ string, r *http.Request) string { + return "pki: " + r.Method + " " + r.URL.Path + })) + } + httpClient := &http.Client{ + Transport: transport, + Timeout: syncTimeout, + } + return newValidatorWithHTTPClient(config, httpClient) } // NewValidatorWithHTTPClient returns a new instance with a pre-configured HTTP client diff --git a/storage/engine.go b/storage/engine.go index 8f9790c82e..04ba16b5b5 100644 --- a/storage/engine.go +++ b/storage/engine.go @@ -38,6 +38,7 @@ import ( "github.com/pressly/goose/v3" "github.com/redis/go-redis/v9" "github.com/sirupsen/logrus" + "github.com/uptrace/opentelemetry-go-extra/otelgorm" "gorm.io/driver/mysql" "gorm.io/driver/postgres" "gorm.io/driver/sqlserver" @@ -324,6 +325,14 @@ func (e *engine) initSQLDatabase(strictmode bool) error { default: return errors.New("unsupported SQL database") } + + // Add OpenTelemetry tracing to GORM if tracing is enabled + if core.TracingEnabled() { + if err := e.sqlDB.Use(otelgorm.NewPlugin(otelgorm.WithTracerProvider(core.GetTracerProvider()))); err != nil { + return fmt.Errorf("failed to add GORM tracing plugin: %w", err) + } + } + goose.SetVerbose(log.Logger().Level >= logrus.DebugLevel) goose.SetLogger(e.sqlMigrationLogger) if err != nil { diff --git a/vdr/api/v2/api.go b/vdr/api/v2/api.go index 7b330ce919..8a691b13e9 100644 --- a/vdr/api/v2/api.go +++ b/vdr/api/v2/api.go @@ -88,14 +88,14 @@ func (w *Wrapper) Routes(router core.EchoRouter) { router.Use(cache.MaxAge(5*time.Minute, cacheControlMaxAgeURLs...).Handle) } -func (r Wrapper) GetTenantWebDID(_ context.Context, request GetTenantWebDIDRequestObject) (GetTenantWebDIDResponseObject, error) { +func (r Wrapper) GetTenantWebDID(ctx context.Context, request GetTenantWebDIDRequestObject) (GetTenantWebDIDResponseObject, error) { ownDID := r.requestedWebDID(request.Id) document, err := r.VDR.ResolveManaged(ownDID) if err != nil { if resolver.IsFunctionalResolveError(err) { return GetTenantWebDID404Response{}, nil } - log.Logger().WithError(err).Errorf("Could not resolve tenant did:web: %s", ownDID.String()) + log.Logger().WithContext(ctx).WithError(err).Errorf("Could not resolve tenant did:web: %s", ownDID.String()) return nil, errors.New("unable to resolve DID") } return GetTenantWebDID200JSONResponse(*document), nil @@ -108,7 +108,7 @@ func (r Wrapper) GetRootWebDID(ctx context.Context, _ GetRootWebDIDRequestObject if resolver.IsFunctionalResolveError(err) { return GetRootWebDID404Response{}, nil } - log.Logger().WithError(err).Errorf("Could not resolve root did:web: %s", ownDID.String()) + log.Logger().WithContext(ctx).WithError(err).Errorf("Could not resolve root did:web: %s", ownDID.String()) return nil, errors.New("unable to resolve DID") } return GetRootWebDID200JSONResponse(*document), nil diff --git a/vdr/didsubject/manager.go b/vdr/didsubject/manager.go index 1fe0bbf925..58f87811f7 100644 --- a/vdr/didsubject/manager.go +++ b/vdr/didsubject/manager.go @@ -63,8 +63,8 @@ func New(db *gorm.DB, methodManagers map[string]MethodManager, keyStore nutsCryp } } -func (r *SqlManager) List(_ context.Context) (map[string][]did.DID, error) { - sqlDIDManager := NewDIDManager(r.DB) +func (r *SqlManager) List(ctx context.Context) (map[string][]did.DID, error) { + sqlDIDManager := NewDIDManager(r.DB.WithContext(ctx)) dids, err := sqlDIDManager.All() if err != nil { return nil, err