@@ -50,7 +50,7 @@ use stackable_operator::{
5050 core:: { DeserializeGuard , error_boundary} ,
5151 runtime:: { controller:: Action , reflector:: ObjectRef } ,
5252 } ,
53- kvp:: { Label , LabelError , Labels , ObjectLabels } ,
53+ kvp:: { LabelError , Labels , ObjectLabels } ,
5454 logging:: controller:: ReconcilerError ,
5555 memory:: { BinaryMultiple , MemoryQuantity } ,
5656 product_config_utils:: { transform_all_roles_to_config, validate_all_roles_and_groups_config} ,
@@ -93,6 +93,7 @@ pub const BUNDLES_ACTIVE_DIR: &str = "/bundles/active";
9393pub const BUNDLES_INCOMING_DIR : & str = "/bundles/incoming" ;
9494pub const BUNDLES_TMP_DIR : & str = "/bundles/tmp" ;
9595pub const BUNDLE_BUILDER_PORT : i32 = 3030 ;
96+ pub const OPA_STACKABLE_SERVICE_NAME : & str = "stackable" ;
9697
9798const CONFIG_VOLUME_NAME : & str = "config" ;
9899const CONFIG_DIR : & str = "/stackable/config" ;
@@ -189,6 +190,12 @@ pub enum Error {
189190 rolegroup : RoleGroupRef < v1alpha1:: OpaCluster > ,
190191 } ,
191192
193+ #[ snafu( display( "failed to apply metrics Service for [{rolegroup}]" ) ) ]
194+ ApplyRoleGroupMetricsService {
195+ source : stackable_operator:: cluster_resources:: Error ,
196+ rolegroup : RoleGroupRef < v1alpha1:: OpaCluster > ,
197+ } ,
198+
192199 #[ snafu( display( "failed to build ConfigMap for [{rolegroup}]" ) ) ]
193200 BuildRoleGroupConfig {
194201 source : stackable_operator:: builder:: configmap:: Error ,
@@ -346,19 +353,20 @@ pub struct OpaClusterConfigFile {
346353 bundles : OpaClusterBundle ,
347354 #[ serde( skip_serializing_if = "Option::is_none" ) ]
348355 decision_logs : Option < OpaClusterConfigDecisionLog > ,
356+ status : Option < OpaClusterConfigStatus > ,
349357}
350358
351359impl OpaClusterConfigFile {
352360 pub fn new ( decision_logging : Option < OpaClusterConfigDecisionLog > ) -> Self {
353361 Self {
354362 services : vec ! [ OpaClusterConfigService {
355- name: String :: from ( "stackable" ) ,
356- url: String :: from ( "http://localhost:3030/opa/v1" ) ,
363+ name: OPA_STACKABLE_SERVICE_NAME . to_owned ( ) ,
364+ url: "http://localhost:3030/opa/v1" . to_owned ( ) ,
357365 } ] ,
358366 bundles : OpaClusterBundle {
359367 stackable : OpaClusterBundleConfig {
360- service : String :: from ( "stackable" ) ,
361- resource : String :: from ( "opa/bundle.tar.gz" ) ,
368+ service : OPA_STACKABLE_SERVICE_NAME . to_owned ( ) ,
369+ resource : "opa/bundle.tar.gz" . to_owned ( ) ,
362370 persist : true ,
363371 polling : OpaClusterBundleConfigPolling {
364372 min_delay_seconds : 10 ,
@@ -367,6 +375,12 @@ impl OpaClusterConfigFile {
367375 } ,
368376 } ,
369377 decision_logs : decision_logging,
378+ // Enable more Prometheus metrics, such as bundle loads
379+ // See https://www.openpolicyagent.org/docs/monitoring#status-metrics
380+ status : Some ( OpaClusterConfigStatus {
381+ service : OPA_STACKABLE_SERVICE_NAME . to_owned ( ) ,
382+ prometheus : true ,
383+ } ) ,
370384 }
371385 }
372386}
@@ -401,6 +415,12 @@ pub struct OpaClusterConfigDecisionLog {
401415 console : bool ,
402416}
403417
418+ #[ derive( Serialize , Deserialize ) ]
419+ struct OpaClusterConfigStatus {
420+ service : String ,
421+ prometheus : bool ,
422+ }
423+
404424pub async fn reconcile_opa (
405425 opa : Arc < DeserializeGuard < v1alpha1:: OpaCluster > > ,
406426 ctx : Arc < Ctx > ,
@@ -498,7 +518,10 @@ pub async fn reconcile_opa(
498518 & rolegroup,
499519 & merged_config,
500520 ) ?;
501- let rg_service = build_rolegroup_service ( opa, & resolved_product_image, & rolegroup) ?;
521+ let rg_service =
522+ build_rolegroup_headless_service ( opa, & resolved_product_image, & rolegroup) ?;
523+ let rg_metrics_service =
524+ build_rolegroup_metrics_service ( opa, & resolved_product_image, & rolegroup) ?;
502525 let rg_daemonset = build_server_rolegroup_daemonset (
503526 opa,
504527 & resolved_product_image,
@@ -524,6 +547,12 @@ pub async fn reconcile_opa(
524547 . with_context ( |_| ApplyRoleGroupServiceSnafu {
525548 rolegroup : rolegroup. clone ( ) ,
526549 } ) ?;
550+ cluster_resources
551+ . add ( client, rg_metrics_service)
552+ . await
553+ . with_context ( |_| ApplyRoleGroupServiceSnafu {
554+ rolegroup : rolegroup. clone ( ) ,
555+ } ) ?;
527556 ds_cond_builder. add (
528557 cluster_resources
529558 . add ( client, rg_daemonset. clone ( ) )
@@ -647,17 +676,14 @@ pub fn build_server_role_service(
647676/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup
648677///
649678/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing.
650- fn build_rolegroup_service (
679+ fn build_rolegroup_headless_service (
651680 opa : & v1alpha1:: OpaCluster ,
652681 resolved_product_image : & ResolvedProductImage ,
653682 rolegroup : & RoleGroupRef < v1alpha1:: OpaCluster > ,
654683) -> Result < Service > {
655- let prometheus_label =
656- Label :: try_from ( ( "prometheus.io/scrape" , "true" ) ) . context ( BuildLabelSnafu ) ?;
657-
658684 let metadata = ObjectMetaBuilder :: new ( )
659685 . name_and_namespace ( opa)
660- . name ( rolegroup. object_name ( ) )
686+ . name ( rolegroup. rolegroup_headless_service_name ( ) )
661687 . ownerreference_from_resource ( opa, None , Some ( true ) )
662688 . context ( ObjectMissingMetadataForOwnerRefSnafu ) ?
663689 . with_recommended_labels ( build_recommended_labels (
@@ -667,19 +693,20 @@ fn build_rolegroup_service(
667693 & rolegroup. role_group ,
668694 ) )
669695 . context ( ObjectMetaSnafu ) ?
670- . with_label ( prometheus_label)
671696 . build ( ) ;
672697
673- let service_selector_labels =
674- Labels :: role_group_selector ( opa, APP_NAME , & rolegroup. role , & rolegroup. role_group )
675- . context ( BuildLabelSnafu ) ?;
676-
677698 let service_spec = ServiceSpec {
678- // Internal communication does not need to be exposed
699+ // Currently we don't offer listener-exposition of OPA mostly due to security concerns.
700+ // OPA is currently public within the Kubernetes (without authentication).
701+ // Opening it up to outside of Kubernetes might worsen things.
702+ // We are open to implement listener-integration, but this needs to be thought through before
703+ // implementing it.
704+ // Note: We have kind of similar situations for HMS and Zookeeper, as the authentication
705+ // options there are non-existent (mTLS still opens plain port) or suck (Kerberos).
679706 type_ : Some ( "ClusterIP" . to_string ( ) ) ,
680707 cluster_ip : Some ( "None" . to_string ( ) ) ,
681- ports : Some ( service_ports ( opa. spec . cluster_config . tls . is_some ( ) ) ) ,
682- selector : Some ( service_selector_labels . into ( ) ) ,
708+ ports : Some ( data_service_ports_with_tls ( opa. spec . cluster_config . tls . is_some ( ) ) ) ,
709+ selector : Some ( role_group_selector_labels ( opa , rolegroup ) ? . into ( ) ) ,
683710 publish_not_ready_addresses : Some ( true ) ,
684711 ..ServiceSpec :: default ( )
685712 } ;
@@ -691,6 +718,55 @@ fn build_rolegroup_service(
691718 } )
692719}
693720
721+ /// The rolegroup metrics [`Service`] is a service that exposes metrics and has the
722+ /// prometheus.io/scrape label.
723+ fn build_rolegroup_metrics_service (
724+ opa : & v1alpha1:: OpaCluster ,
725+ resolved_product_image : & ResolvedProductImage ,
726+ rolegroup : & RoleGroupRef < v1alpha1:: OpaCluster > ,
727+ ) -> Result < Service > {
728+ let labels = Labels :: try_from ( [ ( "prometheus.io/scrape" , "true" ) ] )
729+ . expect ( "static Prometheus labels must be valid" ) ;
730+
731+ let metadata = ObjectMetaBuilder :: new ( )
732+ . name_and_namespace ( opa)
733+ . name ( rolegroup. rolegroup_metrics_service_name ( ) )
734+ . ownerreference_from_resource ( opa, None , Some ( true ) )
735+ . context ( ObjectMissingMetadataForOwnerRefSnafu ) ?
736+ . with_recommended_labels ( build_recommended_labels (
737+ opa,
738+ & resolved_product_image. app_version_label ,
739+ & rolegroup. role ,
740+ & rolegroup. role_group ,
741+ ) )
742+ . context ( ObjectMetaSnafu ) ?
743+ . with_labels ( labels)
744+ . build ( ) ;
745+
746+ let service_spec = ServiceSpec {
747+ type_ : Some ( "ClusterIP" . to_string ( ) ) ,
748+ cluster_ip : Some ( "None" . to_string ( ) ) ,
749+ ports : Some ( vec ! [ metrics_service_port_with_tls( opa. spec. cluster_config. tls. is_some( ) ) ] ) ,
750+ selector : Some ( role_group_selector_labels ( opa, rolegroup) ?. into ( ) ) ,
751+ ..ServiceSpec :: default ( )
752+ } ;
753+
754+ Ok ( Service {
755+ metadata,
756+ spec : Some ( service_spec) ,
757+ status : None ,
758+ } )
759+ }
760+
761+ /// Returns the [`Labels`] that can be used to select all Pods that are part of the roleGroup.
762+ fn role_group_selector_labels (
763+ opa : & v1alpha1:: OpaCluster ,
764+ rolegroup : & RoleGroupRef < v1alpha1:: OpaCluster > ,
765+ ) -> Result < Labels > {
766+ Labels :: role_group_selector ( opa, APP_NAME , & rolegroup. role , & rolegroup. role_group )
767+ . context ( BuildLabelSnafu )
768+ }
769+
694770/// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator
695771fn build_server_rolegroup_config_map (
696772 opa : & v1alpha1:: OpaCluster ,
@@ -923,6 +999,11 @@ fn build_server_rolegroup_daemonset(
923999 ) ;
9241000
9251001 // Add appropriate container port based on TLS configuration
1002+ // If we also add a container port "metrics" pointing to the same port number, we get a
1003+ //
1004+ // .spec.template.spec.containers[name="opa"].ports: duplicate entries for key [containerPort=8081,protocol="TCP"]
1005+ //
1006+ // So we don't do that
9261007 if opa_tls_config. is_some ( ) {
9271008 cb_opa. add_container_port ( APP_TLS_PORT_NAME , APP_TLS_PORT . into ( ) ) ;
9281009 cb_opa
@@ -1455,36 +1536,35 @@ fn build_prepare_start_command(
14551536 prepare_container_args
14561537}
14571538
1458- fn service_ports ( tls_enabled : bool ) -> Vec < ServicePort > {
1459- let ( port_name, port, target_port) = if tls_enabled {
1460- (
1461- APP_TLS_PORT_NAME ,
1462- APP_TLS_PORT ,
1463- IntOrString :: String ( APP_TLS_PORT_NAME . to_string ( ) ) ,
1464- )
1539+ fn data_service_ports_with_tls ( tls_enabled : bool ) -> Vec < ServicePort > {
1540+ let ( port_name, port) = if tls_enabled {
1541+ ( APP_TLS_PORT_NAME , APP_TLS_PORT )
14651542 } else {
1466- (
1467- APP_PORT_NAME ,
1468- APP_PORT ,
1469- IntOrString :: String ( APP_PORT_NAME . to_string ( ) ) ,
1470- )
1543+ ( APP_PORT_NAME , APP_PORT )
14711544 } ;
14721545
1473- vec ! [
1474- ServicePort {
1475- name: Some ( port_name. to_string( ) ) ,
1476- port: port. into( ) ,
1477- protocol: Some ( "TCP" . to_string( ) ) ,
1478- ..ServicePort :: default ( )
1479- } ,
1480- ServicePort {
1481- name: Some ( METRICS_PORT_NAME . to_string( ) ) ,
1482- port: 9504 , // Arbitrary port number, this is never actually used anywhere
1483- protocol: Some ( "TCP" . to_string( ) ) ,
1484- target_port: Some ( target_port) ,
1485- ..ServicePort :: default ( )
1486- } ,
1487- ]
1546+ vec ! [ ServicePort {
1547+ name: Some ( port_name. to_string( ) ) ,
1548+ port: port. into( ) ,
1549+ protocol: Some ( "TCP" . to_string( ) ) ,
1550+ ..ServicePort :: default ( )
1551+ } ]
1552+ }
1553+
1554+ fn metrics_service_port_with_tls ( tls_enabled : bool ) -> ServicePort {
1555+ let port = if tls_enabled {
1556+ APP_TLS_PORT
1557+ } else {
1558+ APP_PORT
1559+ } ;
1560+
1561+ ServicePort {
1562+ name : Some ( METRICS_PORT_NAME . to_string ( ) ) ,
1563+ // The metrics are served on the same port as the HTTP traffic
1564+ port : port. into ( ) ,
1565+ protocol : Some ( "TCP" . to_string ( ) ) ,
1566+ ..ServicePort :: default ( )
1567+ }
14881568}
14891569
14901570/// Creates recommended `ObjectLabels` to be used in deployed resources
0 commit comments