From 4ed083efd9e0c051fb337680ba131d1d8992f0dc Mon Sep 17 00:00:00 2001 From: Roman Safronov Date: Thu, 14 Aug 2025 17:17:16 +0300 Subject: [PATCH] Add kepler_vm metrics test when CPU load is high The test implements a scenario when test VM has a period of high CPU load during the test. Scenario: 1. Get VM power consumption in idle state 2. Run a process that causes high CPU load of VM for 30 seconds and make sure that VM power consumption metrics increased accordingly 3. After high CPU load process completed get VM metrics again and make sure that value is similar to the one received in step 1 --- ci/vars/osp18_env.yml | 9 +++- roles/telemetry_verify_metrics/tasks/main.yml | 11 ++++- .../tasks/verify_kepler_vm_metrics.yml | 42 +++++++++++++++++-- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/ci/vars/osp18_env.yml b/ci/vars/osp18_env.yml index 1d7f977f1..6ec9d330f 100644 --- a/ci/vars/osp18_env.yml +++ b/ci/vars/osp18_env.yml @@ -6,5 +6,12 @@ stack_flavor: "m1.small" stack_network: "private" stack_external_network: "public" stack_name: "vnf" -# kepler_test_vm should be created before executing the test suite + +# For testing kepler_vm metrics a full-fledged Linux VM is required. +# kepler_test_vm should be created before executing the test suite. +# It's supposed that kepler_test_vm is connected to kepler_test_vm_network +# and has a floating IP accessible from the host that executes the tests. kepler_test_vm: "kepler_test_vm" +kepler_test_vm_network: "management_net" +kepler_test_vm_user: "cloud-user" +kepler_test_vm_key: "~/test_keypair.key" diff --git a/roles/telemetry_verify_metrics/tasks/main.yml b/roles/telemetry_verify_metrics/tasks/main.yml index a8dec24c7..3334ff80a 100644 --- a/roles/telemetry_verify_metrics/tasks/main.yml +++ b/roles/telemetry_verify_metrics/tasks/main.yml @@ -96,7 +96,15 @@ ignore_errors: true changed_when: false when: '"kepler_vm" in telemetry_verify_metrics_metric_sources_to_test' - + +- name: Get kepler test VM uuid + ansible.builtin.shell: | + {{ openstack_cmd }} server show {{ kepler_test_vm }} -c id -f value + register: kepler_test_vm_id + ignore_errors: true + changed_when: false + when: '"kepler_vm" in telemetry_verify_metrics_metric_sources_to_test' + - name: Verify Kepler VM metrics are being exposed and stored ansible.builtin.include_tasks: file: verify_kepler_vm_metrics.yml @@ -105,6 +113,7 @@ - power_monitoring when: '"kepler_vm" in telemetry_verify_metrics_metric_sources_to_test and "ACTIVE" in kepler_vm_status.stdout' vars: + test_vm_id: "{{ kepler_test_vm_id.stdout }}" metrics_cmd_prefix: "{{ openstack_cmd }} metric show --disable-rbac -c value -f value" - name: Verify Ceilometer volume pool metrics are being exposed and stored diff --git a/roles/telemetry_verify_metrics/tasks/verify_kepler_vm_metrics.yml b/roles/telemetry_verify_metrics/tasks/verify_kepler_vm_metrics.yml index e2e5c157a..41ccaee97 100644 --- a/roles/telemetry_verify_metrics/tasks/verify_kepler_vm_metrics.yml +++ b/roles/telemetry_verify_metrics/tasks/verify_kepler_vm_metrics.yml @@ -2,7 +2,7 @@ TEST Check that kepler shows idle power consumption metric for VM and it is not zero ansible.builtin.shell: | set -euxo pipefail - {{ metrics_cmd_prefix }} "kepler_vm_package_joules_total{'vm_id'='{{ kepler_test_vm }}', 'mode'='idle'}" + {{ metrics_cmd_prefix }} "kepler_vm_cpu_joules_total{'vm_id'='{{ test_vm_id }}', 'zone'='package'}" register: result changed_when: false failed_when: result.rc != 0 or result.stdout|float == 0 @@ -12,7 +12,7 @@ Step 1 Get an initial value of VM dynamic metrics and check that it's not zero ansible.builtin.shell: | set -euxo pipefail - {{ metrics_cmd_prefix }} "kepler_vm_package_joules_total{'vm_id'='{{ kepler_test_vm }}', 'mode'='dynamic'}" + {{ metrics_cmd_prefix }} "kepler_vm_cpu_joules_total{'vm_id'='{{ test_vm_id }}', 'zone'='package'}" register: value1 changed_when: false failed_when: value1.rc != 0 or value1.stdout|float == 0 @@ -22,9 +22,45 @@ Step 2 Get a new value of VM dynamic metrics and compare with the initial value ansible.builtin.shell: | set -euxo pipefail - {{ metrics_cmd_prefix }} "kepler_vm_package_joules_total{'vm_id'='{{ kepler_test_vm }}', 'mode'='dynamic'}" + {{ metrics_cmd_prefix }} "kepler_vm_cpu_joules_total{'vm_id'='{{ test_vm_id }}', 'zone'='package'}" register: value2 delay: 10 retries: 3 changed_when: false until: value2.rc == 0 and (value2.stdout|float > value1.stdout|float) + +- name: | + TEST Check kepler metrics dynamics when test VM is under load for some time + Step 1 Get initial value of VM power consumption in watts + ansible.builtin.shell: | + set -euxo pipefail + {{ metrics_cmd_prefix }} "kepler_vm_cpu_watts{'vm_id'='{{ test_vm_id }}','zone'='package'}" + register: value1 + changed_when: false + failed_when: value1.rc != 0 + +- name: | + TEST Check kepler metrics dynamics when test VM is under load for some time + Step 2 Run process that creates high CPU load on the test VM and get new power consumption metrics + ansible.builtin.shell: | + set -euxo pipefail + port_id=$({{ openstack_cmd }} port list --device-id='{{ test_vm_id }}' --network '{{ kepler_test_vm_network }}' -c id -f value) + floating_ip=$({{ openstack_cmd }} floating ip list --port $port_id -c 'Floating IP Address' -f value) + ssh_options='-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' + ssh -i '{{ kepler_test_vm_key }}' $ssh_options '{{kepler_test_vm_user}}'@$floating_ip 'nohup timeout 30 openssl speed -multi $(grep -ci processor /proc/cpuinfo) > /dev/null 2>&1 &' + sleep 20 + {{ metrics_cmd_prefix }} "kepler_vm_cpu_watts{'vm_id'='{{ test_vm_id }}','zone'='package'}" + register: value2 + changed_when: false + failed_when: value2.rc != 0 or (value2.stdout|float <= value1.stdout|float + 5.0) + +- name: | + TEST Check kepler metrics dynamics when test VM is under load for some time + Step 3 Check that VM power consumption decreased back after removing load + ansible.builtin.shell: | + set -euxo pipefail + sleep 20 + {{ metrics_cmd_prefix }} "kepler_vm_cpu_watts{'vm_id'='{{ test_vm_id }}','zone'='package'}" + register: value3 + changed_when: false + failed_when: value3.rc != 0 or (value3.stdout|float - value1.stdout|float)|abs > 0.1