Skip to content

Commit e5a664e

Browse files
authored
Merge pull request #14 from bollig/main
Thanks @bollig , PR approved and merged.
2 parents e92abfc + 7b2af0d commit e5a664e

File tree

4 files changed

+32
-21
lines changed

4 files changed

+32
-21
lines changed

custom-metrics/1h-cost-metrics.sh

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
export AWS_DEFAULT_REGION=$cfn_region
1313
aws_region_long_name=$(python /usr/local/bin/aws-region.py $cfn_region)
14+
aws_region_long_name=${aws_region_long_name/Europe/EU}
1415

1516
masterInstanceType=$(ec2-metadata -t | awk '{print $2}')
1617
masterInstanceId=$(ec2-metadata -i | awk '{print $2}')
@@ -58,18 +59,26 @@ echo "master_node_cost $master_node_h_price" | curl --data-binary @- http://127.
5859

5960

6061
####################### FSX #########################
61-
fsx_size_gb=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
62+
#fsx_size_gb=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
63+
# | jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "FSXOptions"))[0].ParameterValue' \
64+
# | awk -F "," '{print $3}')
65+
#
66+
#fsx_type=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
67+
# | jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "FSXOptions"))[0].ParameterValue' \
68+
# | awk -F "," '{print $9}')
69+
#
70+
#fsx_throughput=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
71+
# | jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "FSXOptions"))[0].ParameterValue' \
72+
# | awk -F "," '{print $10}')
73+
74+
fsx_id=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
6275
| jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "FSXOptions"))[0].ParameterValue' \
63-
| awk -F "," '{print $3}')
76+
| awk -F "," '{print $2}')
77+
fsx_summary=$(aws fsx describe-file-systems --region $cfn_region --file-system-ids $fsx_id)
78+
fsx_size_gb=$(echo $fsx_summary | jq -r '.FileSystems[0].StorageCapacity')
79+
fsx_type=$(echo $fsx_summary | jq -r '.FileSystems[0].LustreConfiguration.DeploymentType')
80+
fsx_throughput=$(echo $fsx_summary | jq -r '.FileSystems[0].LustreConfiguration.PerUnitStorageThroughput')
6481

65-
fsx_type=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
66-
| jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "FSXOptions"))[0].ParameterValue' \
67-
| awk -F "," '{print $9}')
68-
69-
fsx_throughput=$(aws cloudformation describe-stacks --stack-name $stack_name --region $cfn_region \
70-
| jq -r '.Stacks[0].Parameters | map(select(.ParameterKey == "FSXOptions"))[0].ParameterValue' \
71-
| awk -F "," '{print $10}')
72-
7382
if [[ $fsx_type = "SCRATCH_2" ]] || [[ $fsx_type = "SCRATCH_1" ]]; then
7483
fsx_cost_gb_month=$(aws pricing get-products \
7584
--region us-east-1 \
@@ -124,4 +133,4 @@ do
124133
ebs_volume_total_cost=$(echo "scale=2; $ebs_volume_total_cost + $ebs_volume_cost" | bc)
125134
done
126135

127-
echo "ebs_master_cost $ebs_volume_total_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
136+
echo "ebs_master_cost $ebs_volume_total_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost

custom-metrics/1m-cost-metrics.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
export AWS_DEFAULT_REGION=$cfn_region
1313
aws_region_long_name=$(python /usr/local/bin/aws-region.py $cfn_region)
14+
aws_region_long_name=${aws_region_long_name/Europe/EU}
1415

1516
monitoring_dir_name=$(echo ${cfn_postinstall_args}| cut -d ',' -f 2 )
1617
monitoring_home="/home/${cfn_cluster_user}/${monitoring_dir_name}"
@@ -57,4 +58,4 @@ for queue in $queues; do
5758
done
5859

5960
echo "ebs_compute_cost $compute_ebs_volume_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
60-
echo "compute_nodes_cost $compute_nodes_total_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost
61+
echo "compute_nodes_cost $compute_nodes_total_cost" | curl --data-binary @- http://127.0.0.1:9091/metrics/job/cost

grafana/dashboards/ParallelCluster.json

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@
303303
"targets": [
304304
{
305305
"alias": "",
306-
"dimensions": {},
306+
"dimensions": {"FileSystemId": "__FSX_ID__"},
307307
"expression": "",
308308
"id": "fsxRead",
309309
"matchExact": false,
@@ -318,7 +318,7 @@
318318
},
319319
{
320320
"alias": "",
321-
"dimensions": {},
321+
"dimensions": {"FileSystemId": "__FSX_ID__"},
322322
"expression": "",
323323
"hide": false,
324324
"id": "fsxWrite",
@@ -350,8 +350,8 @@
350350
"include": {
351351
"names": [
352352
"Time",
353-
"__FSX_ID__ DataReadBytes",
354-
"__FSX_ID__ DataWriteBytes"
353+
"DataReadBytes",
354+
"DataWriteBytes"
355355
]
356356
}
357357
}
@@ -459,7 +459,7 @@
459459
"targets": [
460460
{
461461
"alias": "",
462-
"dimensions": {},
462+
"dimensions": {"FileSystemId": "__FSX_ID__"},
463463
"expression": "",
464464
"id": "fsxIOPsRead",
465465
"matchExact": false,
@@ -474,7 +474,7 @@
474474
},
475475
{
476476
"alias": "",
477-
"dimensions": {},
477+
"dimensions": {"FileSystemId": "__FSX_ID__"},
478478
"expression": "",
479479
"hide": false,
480480
"id": "fsxIOPsWrite",
@@ -506,8 +506,8 @@
506506
"include": {
507507
"names": [
508508
"Time",
509-
"__FSX_ID__ DataReadOperations",
510-
"__FSX_ID__ DataWriteOperations"
509+
"DataReadOperations",
510+
"DataWriteOperations"
511511
]
512512
}
513513
}

parallelcluster-setup/install-monitoring.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ case "${cfn_node_type}" in
8484
# More info here: https://github.com/vpenso/prometheus-slurm-exporter/blob/master/LICENSE
8585
cd ${monitoring_home}
8686
git clone https://github.com/vpenso/prometheus-slurm-exporter.git
87+
sed -i 's/NodeList,AllocMem,Memory,CPUsState,StateLong/NodeList: ,AllocMem: ,Memory: ,CPUsState: ,StateLong:/' prometheus-slurm-exporter/node.go
8788
cd prometheus-slurm-exporter
8889
GOPATH=/root/go-modules-cache HOME=/root go mod download
8990
GOPATH=/root/go-modules-cache HOME=/root go build
@@ -108,4 +109,4 @@ case "${cfn_node_type}" in
108109
/usr/local/bin/docker-compose -f /home/${cfn_cluster_user}/${monitoring_dir_name}/docker-compose/docker-compose.compute.yml -p monitoring-compute up -d
109110
fi
110111
;;
111-
esac
112+
esac

0 commit comments

Comments
 (0)