diff --git a/CHANGELOG.md b/CHANGELOG.md index edc98678..7b0c0b86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file. ### Added -- Adds new telemetry CLI arguments and environment variables ([#715]). +- Adds new telemetry CLI arguments and environment variables ([#715], [#744]). - Use `--file-log-max-files` (or `FILE_LOG_MAX_FILES`) to limit the number of log files kept. - Use `--file-log-rotation-period` (or `FILE_LOG_ROTATION_PERIOD`) to configure the frequency of rotation. - Use `--console-log-format` (or `CONSOLE_LOG_FORMAT`) to set the format to `plain` (default) or `json`. @@ -17,7 +17,7 @@ All notable changes to this project will be documented in this file. ### Changed -- BREAKING: Replace stackable-operator `initialize_logging` with stackable-telemetry `Tracing` ([#703], [#710], [#715]). +- BREAKING: Replace stackable-operator `initialize_logging` with stackable-telemetry `Tracing` ([#703], [#710], [#715], [#744]). - operator-binary: - The console log level was set by `OPA_OPERATOR_LOG`, and is now set by `CONSOLE_LOG_LEVEL`. - The file log level was set by `OPA_OPERATOR_LOG`, and is now set by `FILE_LOG_LEVEL`. @@ -43,7 +43,7 @@ All notable changes to this project will be documented in this file. - The defaults from the docker images itself will now apply, which will be different from 1000/0 going forward - This is marked as breaking because tools and policies might exist, which require these fields to be set - user-info-fetcher: the AD backend now uses the Kerberos realm to expand the user search filter ([#737]) -- BREAKING: Bump stackable-operator to 0.94.0 and update other dependencies ([#743]). +- BREAKING: Bump stackable-operator to 0.94.0 and update other dependencies ([#743], [#744]). - The default Kubernetes cluster domain name is now fetched from the kubelet API unless explicitly configured. - This requires operators to have the RBAC permission to get nodes/proxy in the apiGroup "". The helm-chart takes care of this. - The CLI argument `--kubernetes-node-name` or env variable `KUBERNETES_NODE_NAME` needs to be set. The helm-chart takes care of this. @@ -52,6 +52,8 @@ All notable changes to this project will be documented in this file. - Use `json` file extension for log files ([#709]). - Allow uppercase characters in domain names ([#743]). +- Add missing RBAC permission to patch Kubernetes `events` used for error reporting to helm-chart ([#744]). +- Correctly propagate the Kubernetes cluster domain to the `opa-bundle-builder` and `user-info-fetcher` sidecars ([#744]). ### Removed @@ -71,6 +73,7 @@ All notable changes to this project will be documented in this file. [#732]: https://github.com/stackabletech/opa-operator/pull/732 [#737]: https://github.com/stackabletech/opa-operator/pull/737 [#743]: https://github.com/stackabletech/opa-operator/pull/743 +[#744]: https://github.com/stackabletech/opa-operator/pull/744 ## [25.3.0] - 2025-03-21 diff --git a/deploy/helm/opa-operator/templates/roles.yaml b/deploy/helm/opa-operator/templates/roles.yaml index e547f2a2..60b05c9d 100644 --- a/deploy/helm/opa-operator/templates/roles.yaml +++ b/deploy/helm/opa-operator/templates/roles.yaml @@ -85,6 +85,7 @@ rules: - patch verbs: - create + - patch - apiGroups: - {{ include "operator.name" . }}.stackable.tech resources: diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 39a53ae4..87d5291e 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -38,8 +38,9 @@ use stackable_operator::{ api::{ apps::v1::{DaemonSet, DaemonSetSpec}, core::v1::{ - ConfigMap, EmptyDirVolumeSource, EnvVar, HTTPGetAction, Probe, SecretVolumeSource, - Service, ServiceAccount, ServicePort, ServiceSpec, + ConfigMap, EmptyDirVolumeSource, EnvVar, EnvVarSource, HTTPGetAction, + ObjectFieldSelector, Probe, SecretVolumeSource, Service, ServiceAccount, + ServicePort, ServiceSpec, }, }, apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, @@ -69,7 +70,7 @@ use stackable_operator::{ operations::ClusterOperationsConditionBuilder, }, time::Duration, - utils::COMMON_BASH_TRAP_FUNCTIONS, + utils::{COMMON_BASH_TRAP_FUNCTIONS, cluster_info::KubernetesClusterInfo}, }; use strum::{EnumDiscriminants, IntoStaticStr}; @@ -104,6 +105,12 @@ const USER_INFO_FETCHER_KERBEROS_DIR: &str = "/stackable/kerberos"; const DOCKER_IMAGE_BASE_NAME: &str = "opa"; +const CONSOLE_LOG_LEVEL_ENV: &str = "CONSOLE_LOG_LEVEL"; +const FILE_LOG_LEVEL_ENV: &str = "FILE_LOG_LEVEL"; +const FILE_LOG_DIRECTORY_ENV: &str = "FILE_LOG_DIRECTORY"; +const KUBERNETES_NODE_NAME_ENV: &str = "KUBERNETES_NODE_NAME"; +const KUBERNETES_CLUSTER_DOMAIN_ENV: &str = "KUBERNETES_CLUSTER_DOMAIN"; + // logging defaults const DEFAULT_DECISION_LOGGING_ENABLED: bool = false; const DEFAULT_FILE_LOG_LEVEL: LogLevel = LogLevel::INFO; @@ -144,6 +151,7 @@ pub struct Ctx { pub product_config: ProductConfigManager, pub opa_bundle_builder_image: String, pub user_info_fetcher_image: String, + pub cluster_info: KubernetesClusterInfo, } #[derive(Snafu, Debug, EnumDiscriminants)] @@ -492,6 +500,7 @@ pub async fn reconcile_opa( &ctx.opa_bundle_builder_image, &ctx.user_info_fetcher_image, &rbac_sa, + &ctx.cluster_info, )?; cluster_resources @@ -714,6 +723,44 @@ fn build_server_rolegroup_config_map( }) } +/// Env variables that are need to run stackable Rust binaries, such as +/// * opa-bundle-builder +/// * user-info-fetcher +fn add_stackable_rust_cli_env_vars( + container_builder: &mut ContainerBuilder, + cluster_info: &KubernetesClusterInfo, + log_level: impl Into, + container: &v1alpha1::Container, +) { + let log_level = log_level.into(); + container_builder + .add_env_var(CONSOLE_LOG_LEVEL_ENV, log_level.clone()) + .add_env_var(FILE_LOG_LEVEL_ENV, log_level) + .add_env_var( + FILE_LOG_DIRECTORY_ENV, + format!("{STACKABLE_LOG_DIR}/{container}",), + ) + .add_env_var_from_source( + KUBERNETES_NODE_NAME_ENV, + EnvVarSource { + field_ref: Some(ObjectFieldSelector { + field_path: "spec.nodeName".to_owned(), + ..Default::default() + }), + ..Default::default() + }, + ) + // We set the cluster domain always explicitly, because the product Pods does not have the + // RBAC permission to get the `nodes/proxy` resource at cluster scope. This is likely + // because it only has a RoleBinding and no ClusterRoleBinding. + // By setting the cluster domain explicitly we avoid that the sidecars try to look it up + // based on some information coming from the node. + .add_env_var( + KUBERNETES_CLUSTER_DOMAIN_ENV, + cluster_info.cluster_domain.to_string(), + ); +} + /// The rolegroup [`DaemonSet`] runs the rolegroup, as configured by the administrator. /// /// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the @@ -732,6 +779,7 @@ fn build_server_rolegroup_daemonset( opa_bundle_builder_image: &str, user_info_fetcher_image: &str, service_account: &ServiceAccount, + cluster_info: &KubernetesClusterInfo, ) -> Result { let opa_name = opa.metadata.name.as_deref().context(NoNameSnafu)?; let role = opa.role(opa_role); @@ -782,8 +830,6 @@ fn build_server_rolegroup_daemonset( .context(AddVolumeMountSnafu)? .resources(merged_config.resources.to_owned().into()); - let console_and_file_log_level = bundle_builder_log_level(merged_config); - cb_bundle_builder .image_from_product_image(resolved_product_image) // inherit the pull policy and pull secrets, and then... .image(opa_bundle_builder_image) // ...override the image @@ -799,12 +845,6 @@ fn build_server_rolegroup_daemonset( &bundle_builder_container_name, )]) .add_env_var_from_field_path("WATCH_NAMESPACE", FieldPathEnvVar::Namespace) - .add_env_var("CONSOLE_LOG_LEVEL", console_and_file_log_level.to_string()) - .add_env_var("FILE_LOG_LEVEL", console_and_file_log_level.to_string()) - .add_env_var( - "FILE_LOG_DIRECTORY", - format!("{STACKABLE_LOG_DIR}/{bundle_builder_container_name}"), - ) .add_volume_mount(BUNDLES_VOLUME_NAME, BUNDLES_DIR) .context(AddVolumeMountSnafu)? .add_volume_mount(LOG_VOLUME_NAME, STACKABLE_LOG_DIR) @@ -838,6 +878,12 @@ fn build_server_rolegroup_daemonset( }), ..Probe::default() }); + add_stackable_rust_cli_env_vars( + &mut cb_bundle_builder, + cluster_info, + sidecar_container_log_level(merged_config, &v1alpha1::Container::BundleBuilder).to_string(), + &v1alpha1::Container::BundleBuilder, + ); cb_opa .image_from_product_image(resolved_product_image) @@ -949,6 +995,13 @@ fn build_server_rolegroup_daemonset( .with_memory_limit("128Mi") .build(), ); + add_stackable_rust_cli_env_vars( + &mut cb_user_info_fetcher, + cluster_info, + sidecar_container_log_level(merged_config, &v1alpha1::Container::UserInfoFetcher) + .to_string(), + &v1alpha1::Container::UserInfoFetcher, + ); match &user_info.backend { user_info_fetcher::v1alpha1::Backend::None {} => {} @@ -1257,13 +1310,42 @@ fn build_bundle_builder_start_command( } } -fn bundle_builder_log_level(merged_config: &v1alpha1::OpaConfig) -> BundleBuilderLogLevel { +/// TODO: *Technically* this function would need to be way more complex. +/// For now it's a good-enough approximation, this is fine :D +/// +/// The following config +/// +/// ``` +/// containers: +/// opa-bundle-builder: +/// console: +/// level: DEBUG +/// file: +/// level: INFO +/// loggers: +/// ROOT: +/// level: INFO +/// my.module: +/// level: DEBUG +/// some.chatty.module: +/// level: NONE +/// ``` +/// +/// should result in +/// `CONSOLE_LOG_LEVEL=info,my.module=debug,some.chatty.module=none` +/// and +/// `FILE_LOG_LEVEL=info,my.module=info,some.chatty.module=none`. +/// Note that `my.module` is `info` instead of `debug`, because it's clamped by the global file log +/// level. +/// +/// Context: https://docs.stackable.tech/home/stable/concepts/logging/ +fn sidecar_container_log_level( + merged_config: &v1alpha1::OpaConfig, + sidecar_container: &v1alpha1::Container, +) -> BundleBuilderLogLevel { if let Some(ContainerLogConfig { choice: Some(ContainerLogConfigChoice::Automatic(log_config)), - }) = merged_config - .logging - .containers - .get(&v1alpha1::Container::BundleBuilder) + }) = merged_config.logging.containers.get(sidecar_container) { if let Some(logger) = log_config .loggers diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index f2490259..010a0c29 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -168,6 +168,7 @@ pub mod versioned { Vector, BundleBuilder, Opa, + UserInfoFetcher, } #[derive(Clone, Debug, Default, Fragment, JsonSchema, PartialEq)] diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 3ac2f816..40587f03 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -28,6 +28,7 @@ use stackable_operator::{ namespace::WatchNamespace, shared::yaml::SerializeOptions, telemetry::Tracing, + utils::cluster_info::KubernetesClusterInfo, }; use crate::controller::OPA_FULL_CONTROLLER_NAME; @@ -100,12 +101,14 @@ async fn main() -> anyhow::Result<()> { let client = client::initialize_operator(Some(OPERATOR_NAME.to_string()), &cluster_info_opts) .await?; + let kubernetes_cluster_info = client.kubernetes_cluster_info.clone(); create_controller( client, product_config, watch_namespace, operator_image.clone(), operator_image, + kubernetes_cluster_info, ) .await; } @@ -123,6 +126,7 @@ async fn create_controller( watch_namespace: WatchNamespace, opa_bundle_builder_image: String, user_info_fetcher_image: String, + cluster_info: KubernetesClusterInfo, ) { let opa_api: Api> = watch_namespace.get_api(&client); let daemonsets_api: Api> = watch_namespace.get_api(&client); @@ -150,6 +154,7 @@ async fn create_controller( product_config, opa_bundle_builder_image, user_info_fetcher_image, + cluster_info, }), ) // We can let the reporting happen in the background