Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions libvirt/tests/cfg/passthrough/pci/libvirt_pci_passthrough.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@
operation = "suspend"
- passthrough_shutdown_start:
operation = "shutdown"
- passthrough_multiple_reboots:
number_of_reboots = 15
operation = "reboot"
supported_err = "not supported by the connection driver: virDomainReboot"
211 changes: 193 additions & 18 deletions libvirt/tests/src/multivm_stress/multivm_stress.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import logging as log
import time

from virttest import utils_stress
from virttest import error_context
from virttest import utils_test
from virttest import virsh
from virttest.libvirt_xml import vm_xml


# Using as lower capital is not the best way to do, but this is just a
Expand All @@ -20,38 +23,210 @@ def run(test, params, env):

guest_stress = params.get("guest_stress", "no") == "yes"
host_stress = params.get("host_stress", "no") == "yes"
stress_events = params.get("stress_events", "reboot")
stress_events = params.get("stress_events", "")
stress_time = params.get("stress_time", "30")
debug_dir = params.get("debug_dir", "/home/")
dump_options = params.get("dump_options", "--memory-only --bypass-cache")
vms = env.get_all_vms()
vms_uptime_init = {}

if "reboot" not in stress_events:
for vm in vms:
vms_uptime_init[vm.name] = vm.uptime()
stress_event = utils_stress.VMStressEvents(params, env)

if guest_stress:
# change the on_crash value to "preserve" when guest crashes
for vm in vms:
logging.debug("Setting on_crash to preserve in %s" % vm.name)
vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name)
if vm.is_alive():
vm.destroy(gracefully=False)
vmxml.on_crash = "preserve"
vmxml.sync()
vm.start()

try:
utils_test.load_stress("stress_in_vms", params=params, vms=vms)
except Exception as err:
test.fail("Error running stress in vms: %s" % err)
test.fail("Error running stress in vms: %s" % str(err))

if host_stress:
if params.get("host_stress_args", ""):
params["stress_args"] = params.get("host_stress_args")
try:
utils_test.load_stress("stress_on_host", params=params)
except Exception as err:
test.fail("Error running stress in host: %s" % err)
try:
stress_event.run_threads()
finally:
stress_event.wait_for_threads()
if guest_stress:
utils_test.unload_stress("stress_in_vms", params=params, vms=vms)
if host_stress:
utils_test.unload_stress("stress_on_host", params=params)
if "reboot" not in stress_events:
fail = False
test.fail("Error running stress in host: %s" % str(err))

stress_timer = int(stress_time)
fail = False
found_traces = False
failed_vms = []
login_error_vms = []
unexpected_reboot_vms = []
error_message = ""

if guest_stress:
# check for any call traces in guest dmesg while stress is running
def check_call_traces(vm):
nonlocal stress_timer
found_trace = False
try:
retry_login = True
retry_times = 0
while retry_login:
try:
retry_login = False
session = vm.wait_for_login(timeout=100)
if vm in login_error_vms:
login_error_vms.remove(vm)

except Exception:
stress_timer -= 150
if vm in login_error_vms:
return False

retry_login = True
retry_times += 1
if retry_times == 3:
logging.debug("Error in logging into %s" % vm.name)
if vm not in login_error_vms:
login_error_vms.append(vm)
return False

time.sleep(30)
stress_timer -= 30

dmesg = session.cmd("dmesg")
dmesg_level = session.cmd("dmesg -l emerg,alert,crit")
if "Call Trace" in dmesg or len(dmesg_level) >= 1:
logging.debug("Call trace found in %s" % vm.name)
if vm not in failed_vms:
failed_vms.append(vm)
found_trace = True
session.close()

except Exception as err:
test.error("Error getting dmesg of %s due to %s" % (vm.name, str(err)))
return found_trace

# run stress for stress_time seconds
logging.debug("Sleeping for %s seconds waiting for stress completion" % stress_time)
stress_time = int(stress_time)

# check domstate of vms after stress_time
if stress_time < 600:
time.sleep(stress_time)
for vm in vms:
if vm.uptime() < vms_uptime_init[vm.name]:
logging.error("Unexpected reboot of VM: %s between test", vm.name)
if vm.state() != "running":
logging.debug("%s state is %s" % (vm.name, vm.state()))
failed_vms.append(vm)
fail = True
if fail:
test.fail("Unexpected VM reboot detected")
else:
found_traces = check_call_traces(vm)
if found_traces:
fail = True
time.sleep(2)

# check domstate of vms for every 5 minutes during stress_time
else:
all_failed = False
number_of_checks = int(stress_time / 600)
delta_time = int(stress_time % 600)
for itr in range(number_of_checks):
if len(failed_vms) == len(vms) or len(login_error_vms) == len(vms):
all_failed = True
break
if stress_timer <= 0:
break
time.sleep(600)
for vm in vms:
if vm.state() != "running":
logging.debug("%s state is %s" % (vm.name, vm.state()))
if vm not in failed_vms:
failed_vms.append(vm)
fail = True
else:
found_traces = check_call_traces(vm)
if found_traces:
fail = True
time.sleep(3)
stress_timer -= 3

if delta_time > 0 and stress_timer > 0 and not all_failed:
time.sleep(delta_time)
for vm in vms:
if vm.state() != "running":
logging.debug("%s state is %s" % (vm.name, vm.state()))
if vm not in failed_vms:
failed_vms.append(vm)
fail = True
else:
found_traces = check_call_traces(vm)
if found_traces:
fail = True
time.sleep(3)
stress_timer -= 3

# virsh dump the failed vms into debug_dir
if fail:
for vm in failed_vms:
if vm.state() != "shut off":
logging.debug("Dumping %s to debug_dir %s" % (vm.name, debug_dir))
virsh.dump(vm.name, debug_dir+vm.name+"-core", dump_options, ignore_status=False, debug=True)
logging.debug("Successfully dumped %s as %s-core" % (vm.name, vm.name))
else:
logging.debug("Cannot dump %s as it is in shut off state" % vm.name)
failed_vms_string = ", ".join(vm.name for vm in failed_vms)
error_message = "Failure in " + failed_vms_string + " while running stress. "

if login_error_vms:
login_error_vms_string = ", ".join(vm.name for vm in login_error_vms)
error_message += "Login error in " + login_error_vms_string + " while running stress. "

if len(failed_vms) == len(vms) or len(login_error_vms) == len(vms):
error_message += "All vms in unstable state while running stress. Couldn't run STRESS EVENTS"
test.fail(error_message)

# run STRESS EVENTS in the remaining stable guests
if len(failed_vms) < len(vms) and len(login_error_vms) < len(vms):
for vm in failed_vms:
if vm in vms:
vms.remove(vm)
for vm in login_error_vms:
if vm in vms:
vms.remove(vm)

if len(vms) == 0:
error_message += "All vms in unstable state while running stress. Couldn't run STRESS EVENTS"
test.fail(error_message)

new_vms = ", ".join(vm.name for vm in vms)
try:
if stress_events != "":
logging.debug("Running stress_events in %s" % new_vms)
stress_event = utils_stress.VMStressEvents(params, env, vms)
stress_event.run_threads()
stress_event.wait_for_threads()

if guest_stress:
utils_test.unload_stress("stress_in_vms", params=params, vms=vms)

if host_stress:
utils_test.unload_stress("stress_on_host", params=params)

if "reboot" not in stress_events:
for vm in vms:
if vm.uptime() < vms_uptime_init[vm.name]:
logging.debug("Unexpected reboot of VM: %s between test", vm.name)
unexpected_reboot_vms.append(vm)
unexpected_reboot_vms_string = ", ".join(vm.name for vm in unexpected_reboot_vms)
if unexpected_reboot_vms:
error_message += "Unexpected reboot of guest(s) " + unexpected_reboot_vms_string + ". "

except Exception as err:
error_message += "Failure running STRESS EVENTS in " + new_vms + " due to" + str(err)

# check the test status
if error_message:
test.fail(error_message)
116 changes: 70 additions & 46 deletions libvirt/tests/src/passthrough/pci/libvirt_pci_passthrough.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging as log
import ipaddress
import platform
import time

from virttest import virsh, virt_vm
Expand Down Expand Up @@ -39,6 +40,8 @@ def run(test, params, env):
i) Reboot.
ii) Suspend/Resume.
iii) Start/Shutdown.
d). Multiple Reboots:
1. Checking PCI Device remains persistent across multiple reboots
"""

def guest_lifecycle():
Expand Down Expand Up @@ -93,6 +96,8 @@ def guest_lifecycle():
sriov = ('yes' == params.get("libvirt_pci_SRIOV", 'no'))
device_type = params.get("libvirt_pci_device_type", "NIC")
vm_vfs = int(params.get("number_vfs", 2))
number_of_reboots = int(params.get("number_of_reboots", "1"))
arch = platform.machine()
pci_dev = None
pci_address = None
bus_info = []
Expand Down Expand Up @@ -174,59 +179,78 @@ def guest_lifecycle():
pci_address = pci_xml.cap.get_address_dict()
vmxml.add_hostdev(pci_address)

try:
for itr in range(iteration):
logging.info("Currently executing iteration number: '%s'", itr)
vmxml.sync()
vm.start()
session = vm.wait_for_login()
# The Network configuration is generic irrespective of PF or SRIOV VF
if device_type == "NIC":
if sorted(vm.get_pci_devices()) != sorted(nic_list_before):
logging.debug("Adapter passthroughed to guest successfully")
else:
test.fail("Passthrough adapter not found in guest.")
net_ip = ipaddress.ip_address(net_ip)
nic_list_after = vm.get_pci_devices()
nic_list = list(set(nic_list_after).difference(set(nic_list_before)))
for val in range(len(nic_list)):
bus_info.append(str(nic_list[val]).split(' ', 1)[0])
nic_list[val] = str(nic_list[val]).split(' ', 1)[0][:-2]
bus_info.sort()
if not sriov:
# check all functions get same iommu group
def check_device_status(net_ip, server_ip, netmask):
logging.info("Currently executing iteration number: '%s'", itr)
vmxml.sync()
vm.start()
session = vm.wait_for_login()
# The Network configuration is generic irrespective of PF or SRIOV VF
if device_type == "NIC":
if sorted(vm.get_pci_devices()) != sorted(nic_list_before):
logging.debug("Adapter passthroughed to guest successfully")
else:
test.fail("Passthrough adapter not found in guest.")
net_ip = ipaddress.ip_address(net_ip)
nic_list_after = vm.get_pci_devices()
nic_list = list(set(nic_list_after).difference(set(nic_list_before)))
for val in range(len(nic_list)):
bus_info.append(str(nic_list[val]).split(' ', 1)[0])
nic_list[val] = str(nic_list[val]).split(' ', 1)[0][:-2]
bus_info.sort()
if not sriov:
# check all functions get same iommu group
# arch ppc64 gets different iommu group when attached to VM
if arch != "ppc64le":
if len(set(nic_list)) != 1:
test.fail("Multifunction Device passthroughed but "
"functions are in different iommu group")
# ping to server from each function
for val in bus_info:
nic_name = str(utils_misc.get_interface_from_pci_id(val, session))
session.cmd("ip addr flush dev %s" % nic_name)
session.cmd("ip addr add %s/%s dev %s"
% (net_ip, netmask, nic_name))
session.cmd("ip link set %s up" % nic_name)
# Pinging using nic_name is having issue,
# hence replaced with IPAddress
s_ping, o_ping = utils_test.ping(server_ip, count=5,
interface=net_ip, timeout=30,
session=session)
logging.info(o_ping)
if s_ping != 0:
err_msg = "Ping test fails, error info: '%s'"
test.fail(err_msg % o_ping)
# Each interface should have unique IP
# ping to server from each function
for val in bus_info:
nic_name = str(utils_misc.get_interface_from_pci_id(val, session))
session.cmd("ip addr flush dev %s" % nic_name)
session.cmd("ip addr add %s/%s dev %s"
% (net_ip, netmask, nic_name))
session.cmd("ip link set %s up" % nic_name)
# Pinging using nic_name is having issue,
# hence replaced with IPAddress
s_ping, o_ping = utils_test.ping(server_ip, count=5,
interface=net_ip, timeout=30,
session=session)
logging.info(o_ping)
if s_ping != 0:
err_msg = "Ping test fails, error info: '%s'"
test.fail(err_msg % o_ping)
# Each interface should have unique IP
# For ppc64 arch let's test using one ip only
if arch != "ppc64le":
net_ip = net_ip + 1

elif device_type == "STORAGE":
# Get the result of "fdisk -l" in guest, and
# compare the result with fdisk_list_before.
output = session.cmd_output("fdisk -l|grep \"Disk identifier:\"")
fdisk_list_after = output.splitlines()
if fdisk_list_after == fdisk_list_before:
test.fail("Didn't find the disk attached to guest.")
elif device_type == "STORAGE":
# Get the result of "fdisk -l" in guest, and
# compare the result with fdisk_list_before.
output = session.cmd_output("fdisk -l|grep \"Disk identifier:\"")
fdisk_list_after = output.splitlines()
if fdisk_list_after == fdisk_list_before:
test.fail("Didn't find the disk attached to guest.")

# Execute VM Life-cycle Operation with device pass-through
def multiple_reboot(number_of_reboots):
for reboot_count in range(number_of_reboots):
logging.info("Performing VM Reboot with device pass-through for reboot count : %s", \
reboot_count)
guest_lifecycle()
logging.info("Check device avialablity after VM Reboot for reboot count : %s", \
reboot_count)
check_device_status(net_ip, server_ip, netmask)

try:
for itr in range(iteration):
check_device_status(net_ip, server_ip, netmask)

# Execute VM Life-cycle Operation with device pass-through
guest_lifecycle()

# Execute Multiple reboots on VM and check the device persistency
multiple_reboot(number_of_reboots)

finally:
backup_xml.sync()
Expand Down