From 3c72ffded12a98028bf295ae144511dfa7ef8dad Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 13 Dec 2024 13:18:30 +0100 Subject: [PATCH 1/4] docs: Fix getting started guide to use postgres and TLS --- .../examples/getting_started/druid.yaml.j2 | 23 +++++++++++++------ .../getting_started/getting_started.sh.j2 | 11 +++++++++ .../examples/getting_started/hdfs.yaml.j2 | 4 ++++ .../pages/getting_started/first_steps.adoc | 16 ++++++++++++- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/docs/modules/druid/examples/getting_started/druid.yaml.j2 b/docs/modules/druid/examples/getting_started/druid.yaml.j2 index dcaf87d4..f01d56f3 100644 --- a/docs/modules/druid/examples/getting_started/druid.yaml.j2 +++ b/docs/modules/druid/examples/getting_started/druid.yaml.j2 @@ -7,17 +7,18 @@ spec: image: productVersion: 30.0.0 clusterConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired + zookeeperConfigMapName: simple-druid-znode deepStorage: hdfs: configMapName: simple-hdfs - directory: /data + directory: /druid metadataStorageDatabase: - dbType: derby - connString: jdbc:derby://localhost:1527/var/druid/metadata.db;create=true - host: localhost - port: 1527 - tls: null - zookeeperConfigMapName: simple-druid-znode + dbType: postgresql + connString: jdbc:postgresql://postgresql-druid/druid + host: postgresql-druid + port: 5432 + credentialsSecret: druid-db-credentials brokers: roleGroups: default: @@ -38,3 +39,11 @@ spec: roleGroups: default: replicas: 1 +--- +apiVersion: v1 +kind: Secret +metadata: + name: druid-db-credentials +stringData: + username: druid + password: druid diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 index f711d309..fee92f42 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 +++ b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 @@ -100,6 +100,17 @@ kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default --tim kubectl rollout status --watch statefulset/simple-hdfs-namenode-default --timeout=300s # end::watch-hdfs-rollout[] +echo "Installing PostgreSQL for Druid" +# tag::helm-install-postgres[] +helm install postgresql-druid \ +--repo https://charts.bitnami.com/bitnami postgresql \ +--version 16.1.2 \ +--set auth.database=druid \ +--set auth.username=druid \ +--set auth.password=druid \ +--wait +# end::helm-install-postgres[] + echo "Install DruidCluster from druid.yaml" # tag::install-druid[] kubectl apply --server-side -f druid.yaml diff --git a/docs/modules/druid/examples/getting_started/hdfs.yaml.j2 b/docs/modules/druid/examples/getting_started/hdfs.yaml.j2 index 692f8737..893a524a 100644 --- a/docs/modules/druid/examples/getting_started/hdfs.yaml.j2 +++ b/docs/modules/druid/examples/getting_started/hdfs.yaml.j2 @@ -10,10 +10,14 @@ spec: dfsReplication: 1 zookeeperConfigMapName: simple-hdfs-znode nameNodes: + config: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired roleGroups: default: replicas: 2 dataNodes: + config: + listenerClass: external-unstable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired roleGroups: default: replicas: 1 diff --git a/docs/modules/druid/pages/getting_started/first_steps.adoc b/docs/modules/druid/pages/getting_started/first_steps.adoc index 30a90cee..5e947a19 100644 --- a/docs/modules/druid/pages/getting_started/first_steps.adoc +++ b/docs/modules/druid/pages/getting_started/first_steps.adoc @@ -10,6 +10,7 @@ Three things need to be installed to have a Druid cluster: * A ZooKeeper instance for internal use by Druid * An HDFS instance to be used as a backend for deep storage +* A PostgreSQL database to store the metadata of Druid * The Druid cluster itself Create them in this order, each one is created by applying a manifest file. @@ -47,6 +48,17 @@ And apply it: include::example$getting_started/getting_started.sh[tag=install-hdfs] ---- + +=== PostgreSQL + +Install a PostgreSQL database using `helm`. +If you already have a PostgreSQL instance, you can skip this step and use your own below. + +[source,bash] +---- +include::example$getting_started/getting_started.sh[tag=helm-install-postgres] +---- + === Druid Create a file named `druid.yaml` with the following contents: @@ -94,7 +106,9 @@ simple-hdfs-namenode-default 2/2 6m simple-zk-server-default 3/3 7m ---- -Then, create a port-forward for the Druid Router: +Ideally you use `stackablectl stacklet list` to find out the address the Druid router is reachable at and use that address. + +As an alternative, you can create a port-forward for the Druid Router: ---- include::example$getting_started/getting_started.sh[tag=port-forwarding] From 006adc892550bc7c000bd37e53cc313fc33baa41 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 13 Dec 2024 13:21:35 +0100 Subject: [PATCH 2/4] run templating --- .../druid/examples/getting_started/druid.yaml | 23 +++++++++++++------ .../getting_started/getting_started.sh | 11 +++++++++ .../druid/examples/getting_started/hdfs.yaml | 4 ++++ 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/docs/modules/druid/examples/getting_started/druid.yaml b/docs/modules/druid/examples/getting_started/druid.yaml index dcaf87d4..f01d56f3 100644 --- a/docs/modules/druid/examples/getting_started/druid.yaml +++ b/docs/modules/druid/examples/getting_started/druid.yaml @@ -7,17 +7,18 @@ spec: image: productVersion: 30.0.0 clusterConfig: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired + zookeeperConfigMapName: simple-druid-znode deepStorage: hdfs: configMapName: simple-hdfs - directory: /data + directory: /druid metadataStorageDatabase: - dbType: derby - connString: jdbc:derby://localhost:1527/var/druid/metadata.db;create=true - host: localhost - port: 1527 - tls: null - zookeeperConfigMapName: simple-druid-znode + dbType: postgresql + connString: jdbc:postgresql://postgresql-druid/druid + host: postgresql-druid + port: 5432 + credentialsSecret: druid-db-credentials brokers: roleGroups: default: @@ -38,3 +39,11 @@ spec: roleGroups: default: replicas: 1 +--- +apiVersion: v1 +kind: Secret +metadata: + name: druid-db-credentials +stringData: + username: druid + password: druid diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh b/docs/modules/druid/examples/getting_started/getting_started.sh index 34ec4da5..e5602734 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh +++ b/docs/modules/druid/examples/getting_started/getting_started.sh @@ -100,6 +100,17 @@ kubectl rollout status --watch statefulset/simple-hdfs-journalnode-default --tim kubectl rollout status --watch statefulset/simple-hdfs-namenode-default --timeout=300s # end::watch-hdfs-rollout[] +echo "Installing PostgreSQL for Druid" +# tag::helm-install-postgres[] +helm install postgresql-druid \ +--repo https://charts.bitnami.com/bitnami postgresql \ +--version 16.1.2 \ +--set auth.database=druid \ +--set auth.username=druid \ +--set auth.password=druid \ +--wait +# end::helm-install-postgres[] + echo "Install DruidCluster from druid.yaml" # tag::install-druid[] kubectl apply --server-side -f druid.yaml diff --git a/docs/modules/druid/examples/getting_started/hdfs.yaml b/docs/modules/druid/examples/getting_started/hdfs.yaml index 692f8737..893a524a 100644 --- a/docs/modules/druid/examples/getting_started/hdfs.yaml +++ b/docs/modules/druid/examples/getting_started/hdfs.yaml @@ -10,10 +10,14 @@ spec: dfsReplication: 1 zookeeperConfigMapName: simple-hdfs-znode nameNodes: + config: + listenerClass: external-stable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired roleGroups: default: replicas: 2 dataNodes: + config: + listenerClass: external-unstable # This exposes your Stacklet outside of Kubernetes. Remove this configuration if this is not desired roleGroups: default: replicas: 1 From 0dabe5966be951f17908a36d9822e626025b8856 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 13 Dec 2024 13:42:57 +0100 Subject: [PATCH 3/4] Adjust port number --- .../examples/getting_started/getting_started.sh | 12 ++++++------ .../examples/getting_started/getting_started.sh.j2 | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh b/docs/modules/druid/examples/getting_started/getting_started.sh index e5602734..68051249 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh +++ b/docs/modules/druid/examples/getting_started/getting_started.sh @@ -135,10 +135,10 @@ kubectl rollout status --watch statefulset/simple-druid-middlemanager-default -- kubectl rollout status --watch statefulset/simple-druid-router-default --timeout=300s # end::watch-druid-rollout[] -echo "Starting port-forwarding of port 8888" +echo "Starting port-forwarding of port 9088" # shellcheck disable=2069 # we want all output to be blackholed # tag::port-forwarding[] -kubectl port-forward svc/simple-druid-router 8888 > /dev/null 2>&1 & +kubectl port-forward svc/simple-druid-router 9088 > /dev/null 2>&1 & # end::port-forwarding[] PORT_FORWARD_PID=$! # shellcheck disable=2064 # we want the PID evaluated now, not at the time the trap is @@ -147,7 +147,7 @@ sleep 5 submit_job() { # tag::submit-job[] -curl -s -X 'POST' -H 'Content-Type:application/json' -d @ingestion_spec.json http://localhost:8888/druid/indexer/v1/task +curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @ingestion_spec.json https://localhost:9088/druid/indexer/v1/task # end::submit-job[] } @@ -155,7 +155,7 @@ echo "Submitting job" task_id=$(submit_job | sed -e 's/.*":"\([^"]\+\).*/\1/g') request_job_status() { - curl -s "http://localhost:8888/druid/indexer/v1/task/${task_id}/status" | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' + curl -s -k "https://localhost:9088/druid/indexer/v1/task/${task_id}/status" | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' } while [ "$(request_job_status)" == "RUNNING" ]; do @@ -173,7 +173,7 @@ else fi segment_load_status() { - curl -s http://localhost:8888/druid/coordinator/v1/loadstatus | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' + curl -s -k https://localhost:9088/druid/coordinator/v1/loadstatus | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' } while [ "$(segment_load_status)" != "100.0" ]; do @@ -183,7 +183,7 @@ done query_data() { # tag::query-data[] -curl -s -X 'POST' -H 'Content-Type:application/json' -d @query.json http://localhost:8888/druid/v2/sql +curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @query.json https://localhost:9088/druid/v2/sql # end::query-data[] } diff --git a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 index fee92f42..b944827f 100755 --- a/docs/modules/druid/examples/getting_started/getting_started.sh.j2 +++ b/docs/modules/druid/examples/getting_started/getting_started.sh.j2 @@ -135,10 +135,10 @@ kubectl rollout status --watch statefulset/simple-druid-middlemanager-default -- kubectl rollout status --watch statefulset/simple-druid-router-default --timeout=300s # end::watch-druid-rollout[] -echo "Starting port-forwarding of port 8888" +echo "Starting port-forwarding of port 9088" # shellcheck disable=2069 # we want all output to be blackholed # tag::port-forwarding[] -kubectl port-forward svc/simple-druid-router 8888 > /dev/null 2>&1 & +kubectl port-forward svc/simple-druid-router 9088 > /dev/null 2>&1 & # end::port-forwarding[] PORT_FORWARD_PID=$! # shellcheck disable=2064 # we want the PID evaluated now, not at the time the trap is @@ -147,7 +147,7 @@ sleep 5 submit_job() { # tag::submit-job[] -curl -s -X 'POST' -H 'Content-Type:application/json' -d @ingestion_spec.json http://localhost:8888/druid/indexer/v1/task +curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @ingestion_spec.json https://localhost:9088/druid/indexer/v1/task # end::submit-job[] } @@ -155,7 +155,7 @@ echo "Submitting job" task_id=$(submit_job | sed -e 's/.*":"\([^"]\+\).*/\1/g') request_job_status() { - curl -s "http://localhost:8888/druid/indexer/v1/task/${task_id}/status" | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' + curl -s -k "https://localhost:9088/druid/indexer/v1/task/${task_id}/status" | sed -e 's/.*statusCode":"\([^"]\+\).*/\1/g' } while [ "$(request_job_status)" == "RUNNING" ]; do @@ -173,7 +173,7 @@ else fi segment_load_status() { - curl -s http://localhost:8888/druid/coordinator/v1/loadstatus | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' + curl -s -k https://localhost:9088/druid/coordinator/v1/loadstatus | sed -e 's/.*wikipedia":\([0-9\.]\+\).*/\1/g' } while [ "$(segment_load_status)" != "100.0" ]; do @@ -183,7 +183,7 @@ done query_data() { # tag::query-data[] -curl -s -X 'POST' -H 'Content-Type:application/json' -d @query.json http://localhost:8888/druid/v2/sql +curl -s -k -X 'POST' -H 'Content-Type:application/json' -d @query.json https://localhost:9088/druid/v2/sql # end::query-data[] } From 78f24441f3e3021573ec6ebb49781b7a1a81f4bd Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 13 Dec 2024 13:44:34 +0100 Subject: [PATCH 4/4] Adjust port number --- docs/modules/druid/pages/getting_started/first_steps.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/druid/pages/getting_started/first_steps.adoc b/docs/modules/druid/pages/getting_started/first_steps.adoc index 5e947a19..c1e3065b 100644 --- a/docs/modules/druid/pages/getting_started/first_steps.adoc +++ b/docs/modules/druid/pages/getting_started/first_steps.adoc @@ -140,7 +140,7 @@ Continue with the <<_query_the_data,next section>>. ==== -To open the web interface navigate your browser to https://localhost:8888/ to find the dashboard: +To open the web interface navigate your browser to https://localhost:9088/ to find the dashboard: image::getting_started/dashboard.png[]