From 27e8365a7386e8159820fc7e389b938f8d3534a5 Mon Sep 17 00:00:00 2001 From: "Hamdy H. Khader" Date: Thu, 13 Nov 2025 18:05:07 +0300 Subject: [PATCH 1/5] R25.10 hotfix multi fix (#738) * Updating Storage node monitor (cherry picked from commit ad546ca5fe667a74a5559109fb0e7c58d3a707b0) * Fix fdb value exceed limit Create two new onjects to hold the remote device connections: RemoteDevice RemoteJMDevice Those two objects would hold 5 attributes related to the remote connection only * fix type checker issues * set version * set version * disable lvol geo * fix logger issue * Do not set spdk mem when starting spdk * Fix healthcheck logger _1 * Update storage_node_monitor.py * Revert "Update __init__.py" This reverts commit 5b83b13776ea6d184fa957eb0a74bf081f70f385. --------- Co-authored-by: schmidt-scaled <82834682+schmidt-scaled@users.noreply.github.com> --- .../controllers/device_controller.py | 75 --- .../controllers/health_controller.py | 29 +- simplyblock_core/distr_controller.py | 15 +- simplyblock_core/models/nvme_device.py | 22 +- simplyblock_core/models/storage_node.py | 6 +- .../services/health_check_service.py | 441 +++++++++--------- .../services/main_distr_event_collector.py | 6 +- .../services/storage_node_monitor.py | 365 ++++++++------- .../services/tasks_runner_port_allow.py | 13 +- simplyblock_core/storage_node_ops.py | 86 ++-- simplyblock_core/utils/__init__.py | 37 +- .../api/internal/storage_node/docker.py | 3 +- 12 files changed, 531 insertions(+), 567 deletions(-) diff --git a/simplyblock_core/controllers/device_controller.py b/simplyblock_core/controllers/device_controller.py index 8e684c942..df6f0eb89 100644 --- a/simplyblock_core/controllers/device_controller.py +++ b/simplyblock_core/controllers/device_controller.py @@ -657,81 +657,6 @@ def add_device(device_id, add_migration_task=True): tasks_controller.add_new_device_mig_task(device_id) return device_id - # - # # create partitions - # partitions = snode.num_partitions_per_dev - # rpc_client = RPCClient(snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) - # # look for partitions - # partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj) - # logger.debug("partitioned_devices") - # logger.debug(partitioned_devices) - # if len(partitioned_devices) == partitions+1: - # logger.info("Partitioned devices found") - # else: - # logger.info(f"Creating partitions for {device_obj.nvme_bdev}") - # storage_node_ops._create_device_partitions(rpc_client, device_obj, snode, partitions, snode.jm_percent) - # partitioned_devices = storage_node_ops._search_for_partitions(rpc_client, device_obj) - # if len(partitioned_devices) == partitions+1: - # logger.info("Device partitions created") - # else: - # logger.error("Failed to create partitions") - # return False - # - # jm_part = partitioned_devices.pop(0) - # new_devices = [] - # dev_order = storage_node_ops.get_next_cluster_device_order(db_controller, snode.cluster_id) - # for dev in partitioned_devices: - # new_device = storage_node_ops._create_storage_device_stack(rpc_client, dev, snode, after_restart=False) - # if not new_device: - # logger.error("failed to create dev stack") - # continue - # - # new_device.cluster_device_order = dev_order - # dev_order += 1 - # device_events.device_create(new_device) - # new_devices.append(new_device) - # - # if new_devices: - # snode.nvme_devices.remove(device_obj) - # snode.nvme_devices.extend(new_devices) - # snode.write_to_db(db_controller.kv_store) - # else: - # logger.error("failed to create devices") - # return False - # - # for dev in new_devices: - # distr_controller.send_cluster_map_add_device(dev, snode) - # - # logger.info("Make other nodes connect to the node devices") - # snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id) - # for node in snodes: - # if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE: - # continue - # node.remote_devices = storage_node_ops._connect_to_remote_devs(node) - # node.write_to_db() - # for dev in new_devices: - # distr_controller.send_cluster_map_add_device(dev, node) - # - # for dev in new_devices: - # tasks_controller.add_new_device_mig_task(dev.get_id()) - # - # # add to jm raid - # if snode.jm_device and snode.jm_device.raid_bdev and jm_part: - # # looking for jm partition - # jm_dev_part = jm_part.nvme_bdev - # ret = rpc_client.get_bdevs(jm_dev_part) - # if ret: - # logger.info(f"JM part found: {jm_dev_part}") - # if snode.jm_device.status in [JMDevice.STATUS_UNAVAILABLE, JMDevice.STATUS_REMOVED]: - # restart_jm_device(snode.jm_device.get_id(), force=True, format_alceml=True) - # - # if snode.jm_device.status == JMDevice.STATUS_ONLINE and \ - # jm_dev_part not in snode.jm_device.jm_nvme_bdev_list: - # remove_jm_device(snode.jm_device.get_id(), force=True) - # restart_jm_device(snode.jm_device.get_id(), force=True) - # - # return "Done" - def device_set_failed_and_migrated(device_id): db_controller = DBController() diff --git a/simplyblock_core/controllers/health_controller.py b/simplyblock_core/controllers/health_controller.py index c013e2d58..9c80d4754 100644 --- a/simplyblock_core/controllers/health_controller.py +++ b/simplyblock_core/controllers/health_controller.py @@ -9,7 +9,7 @@ from simplyblock_core.db_controller import DBController from simplyblock_core.fw_api_client import FirewallClient from simplyblock_core.models.cluster import Cluster -from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice +from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice from simplyblock_core.models.storage_node import StorageNode from simplyblock_core.rpc_client import RPCClient from simplyblock_core.snode_client import SNodeClient @@ -117,7 +117,7 @@ def _check_node_rpc(rpc_ip, rpc_port, rpc_username, rpc_password, timeout=5, ret def _check_node_api(ip): try: - snode_api = SNodeClient(f"{ip}:5000", timeout=10, retry=2) + snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2) logger.debug(f"Node API={ip}:5000") ret, _ = snode_api.is_live() logger.debug(f"snode is alive: {ret}") @@ -130,7 +130,7 @@ def _check_node_api(ip): def _check_spdk_process_up(ip, rpc_port): try: - snode_api = SNodeClient(f"{ip}:5000", timeout=10, retry=2) + snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2) logger.debug(f"Node API={ip}:5000") is_up, _ = snode_api.spdk_process_is_up(rpc_port) logger.debug(f"SPDK is {is_up}") @@ -373,6 +373,13 @@ def _check_node_lvstore( else: node_bdev_names = [] + nodes = {} + devices = {} + for n in db_controller.get_storage_nodes(): + nodes[n.get_id()] = n + for dev in n.nvme_devices: + devices[dev.get_id()] = dev + for distr in distribs_list: if distr in node_bdev_names: logger.info(f"Checking distr bdev : {distr} ... ok") @@ -391,7 +398,7 @@ def _check_node_lvstore( logger.error("Failed to get cluster map") lvstore_check = False else: - results, is_passed = distr_controller.parse_distr_cluster_map(ret) + results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices) if results: logger.info(utils.print_table(results)) logger.info(f"Checking Distr map ... {is_passed}") @@ -413,8 +420,16 @@ def _check_node_lvstore( if dev.get_id() == rem_dev.get_id(): continue new_remote_devices.append(rem_dev) - dev.remote_bdev = remote_bdev - new_remote_devices.append(dev) + + remote_device = RemoteDevice() + remote_device.uuid = dev.uuid + remote_device.alceml_name = dev.alceml_name + remote_device.node_id = dev.node_id + remote_device.size = dev.size + remote_device.status = NVMeDevice.STATUS_ONLINE + remote_device.nvmf_multipath = dev.nvmf_multipath + remote_device.remote_bdev = remote_bdev + new_remote_devices.append(remote_device) n.remote_devices = new_remote_devices n.write_to_db() distr_controller.send_dev_status_event(dev, dev.status, node) @@ -428,7 +443,7 @@ def _check_node_lvstore( logger.error("Failed to get cluster map") lvstore_check = False else: - results, is_passed = distr_controller.parse_distr_cluster_map(ret) + results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices) logger.info(f"Checking Distr map ... {is_passed}") else: diff --git a/simplyblock_core/distr_controller.py b/simplyblock_core/distr_controller.py index e50115f62..57203ebb1 100644 --- a/simplyblock_core/distr_controller.py +++ b/simplyblock_core/distr_controller.py @@ -192,12 +192,20 @@ def get_distr_cluster_map(snodes, target_node, distr_name=""): return cl_map -def parse_distr_cluster_map(map_string): +def parse_distr_cluster_map(map_string, nodes=None, devices=None): db_controller = DBController() node_pattern = re.compile(r".*uuid_node=(.*) status=(.*)$", re.IGNORECASE) device_pattern = re.compile( r".*storage_ID=(.*) status=(.*) uuid_device=(.*) storage_bdev_name=(.*)$", re.IGNORECASE) + if not nodes or not devices: + nodes = {} + devices = {} + for n in db_controller.get_storage_nodes(): + nodes[n.get_id()] = n + for dev in n.nvme_devices: + devices[dev.get_id()] = dev + results = [] passed = True for line in map_string.split('\n'): @@ -213,8 +221,7 @@ def parse_distr_cluster_map(map_string): "Results": "", } try: - nd = db_controller.get_storage_node_by_id(node_id) - node_status = nd.status + node_status = nodes[node_id].status if node_status == StorageNode.STATUS_SCHEDULABLE: node_status = StorageNode.STATUS_UNREACHABLE data["Desired Status"] = node_status @@ -238,7 +245,7 @@ def parse_distr_cluster_map(map_string): "Results": "", } try: - sd = db_controller.get_storage_device_by_id(device_id) + sd = devices[device_id] data["Desired Status"] = sd.status if sd.status == status: data["Results"] = "ok" diff --git a/simplyblock_core/models/nvme_device.py b/simplyblock_core/models/nvme_device.py index b86e25c44..761d425da 100644 --- a/simplyblock_core/models/nvme_device.py +++ b/simplyblock_core/models/nvme_device.py @@ -47,19 +47,12 @@ class NVMeDevice(BaseModel): nvmf_nqn: str = "" nvmf_port: int = 0 nvmf_multipath: bool = False - overload_percentage: int = 0 # Unused - partition_jm_bdev: str = "" # Unused - partition_jm_size: int = 0 # Unused - partition_main_bdev: str = "" # Unused - partition_main_size: int = 0 # Unused - partitions_count: int = 0 # Unused pcie_address: str = "" physical_label: int = 0 pt_bdev: str = "" qos_bdev: str = "" remote_bdev: str = "" retries_exhausted: bool = False - sequential_number: int = 0 # Unused serial_number: str = "" size: int = -1 testing_bdev: str = "" @@ -73,3 +66,18 @@ class JMDevice(NVMeDevice): jm_bdev: str = "" jm_nvme_bdev_list: List[str] = [] raid_bdev: str = "" + + +class RemoteDevice(BaseModel): + + remote_bdev: str = "" + alceml_name: str = "" + node_id: str = "" + size: int = -1 + nvmf_multipath: bool = False + + +class RemoteJMDevice(RemoteDevice): + + jm_bdev: str = "" + diff --git a/simplyblock_core/models/storage_node.py b/simplyblock_core/models/storage_node.py index 81639c556..b2ad4c6f5 100644 --- a/simplyblock_core/models/storage_node.py +++ b/simplyblock_core/models/storage_node.py @@ -7,7 +7,7 @@ from simplyblock_core.models.base_model import BaseNodeObject from simplyblock_core.models.hublvol import HubLVol from simplyblock_core.models.iface import IFace -from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice +from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice, RemoteJMDevice from simplyblock_core.rpc_client import RPCClient, RPCException logger = utils.get_logger(__name__) @@ -79,8 +79,8 @@ class StorageNode(BaseNodeObject): pollers_mask: str = "" primary_ip: str = "" raid: str = "" - remote_devices: List[NVMeDevice] = [] - remote_jm_devices: List[JMDevice] = [] + remote_devices: List[RemoteDevice] = [] + remote_jm_devices: List[RemoteJMDevice] = [] rpc_password: str = "" rpc_port: int = -1 rpc_username: str = "" diff --git a/simplyblock_core/services/health_check_service.py b/simplyblock_core/services/health_check_service.py index bb48e9620..f822e199f 100644 --- a/simplyblock_core/services/health_check_service.py +++ b/simplyblock_core/services/health_check_service.py @@ -1,4 +1,7 @@ # coding=utf-8 +import logging +import sys +import threading import time from datetime import datetime @@ -10,8 +13,6 @@ from simplyblock_core.rpc_client import RPCClient from simplyblock_core import constants, db_controller, distr_controller, storage_node_ops -logger = utils.get_logger(__name__) - utils.init_sentry_sdk() @@ -42,223 +43,239 @@ def set_device_health_check(cluster_id, device, health_check_status): return -# get DB controller -db = db_controller.DBController() +def check_node(snode, logger): -logger.info("Starting health check service") -while True: - clusters = db.get_clusters() - for cluster in clusters: - cluster_id = cluster.get_id() - snodes = db.get_storage_nodes_by_cluster_id(cluster_id) - if not snodes: - logger.warning("storage nodes list is empty") - - for snode in snodes: - logger.info("Node: %s, status %s", snode.get_id(), snode.status) - - if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, - StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - logger.info(f"Node status is: {snode.status}, skipping") - set_node_health_check(snode, False) - for device in snode.nvme_devices: - set_device_health_check(cluster_id, device, False) - continue - - # 1- check node ping - ping_check = health_controller._check_node_ping(snode.mgmt_ip) - logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") - - # 2- check node API - node_api_check = health_controller._check_node_api(snode.mgmt_ip) - logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}") - - # 3- check node RPC - node_rpc_check = health_controller._check_node_rpc( - snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) - logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") - - is_node_online = ping_check and node_api_check and node_rpc_check - - health_check_status = is_node_online - if node_rpc_check: - logger.info(f"Node device count: {len(snode.nvme_devices)}") - node_devices_check = True - node_remote_devices_check = True - - rpc_client = RPCClient( - snode.mgmt_ip, snode.rpc_port, - snode.rpc_username, snode.rpc_password, - timeout=3, retry=2) - connected_devices = [] - - node_bdevs = rpc_client.get_bdevs() - if node_bdevs: - # node_bdev_names = [b['name'] for b in node_bdevs] - node_bdev_names = {} - for b in node_bdevs: - node_bdev_names[b['name']] = b - for al in b['aliases']: - node_bdev_names[al] = b - else: - node_bdev_names = {} - - subsystem_list = rpc_client.subsystem_list() or [] - subsystems = { - subsystem['nqn']: subsystem - for subsystem - in subsystem_list - } - - for device in snode.nvme_devices: - passed = True - - if device.io_error: - logger.info(f"Device io_error {device.get_id()}") - passed = False - - if device.status != NVMeDevice.STATUS_ONLINE: - logger.info(f"Device status {device.status}") - passed = False - - if snode.enable_test_device: - bdevs_stack = [device.nvme_bdev, device.testing_bdev, device.alceml_bdev, device.pt_bdev] - else: - bdevs_stack = [device.nvme_bdev, device.alceml_bdev, device.pt_bdev] - - logger.info(f"Checking Device: {device.get_id()}, status:{device.status}") - problems = 0 - for bdev in bdevs_stack: - if not bdev: - continue - - if not health_controller.check_bdev(bdev, bdev_names=node_bdev_names): - problems += 1 - passed = False - - logger.info(f"Checking Device's BDevs ... ({(len(bdevs_stack) - problems)}/{len(bdevs_stack)})") - - passed &= health_controller.check_subsystem(device.nvmf_nqn, nqns=subsystems) - - set_device_health_check(cluster_id, device, passed) - if device.status == NVMeDevice.STATUS_ONLINE: - node_devices_check &= passed - - logger.info(f"Node remote device: {len(snode.remote_devices)}") - - for remote_device in snode.remote_devices: - org_dev = db.get_storage_device_by_id(remote_device.get_id()) - org_node = db.get_storage_node_by_id(remote_device.node_id) - if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status == StorageNode.STATUS_ONLINE: - if health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names): - connected_devices.append(remote_device.get_id()) - continue - - if not org_dev.alceml_bdev: - logger.error(f"device alceml bdev not found!, {org_dev.get_id()}") - continue - - try: - storage_node_ops.connect_device( - f"remote_{org_dev.alceml_bdev}", org_dev, snode, - bdev_names=list(node_bdev_names), reattach=False, - ) - connected_devices.append(org_dev.get_id()) - sn = db.get_storage_node_by_id(snode.get_id()) - for d in sn.remote_devices: - if d.get_id() == remote_device.get_id(): - d.status = NVMeDevice.STATUS_ONLINE - sn.write_to_db() - break - distr_controller.send_dev_status_event(org_dev, NVMeDevice.STATUS_ONLINE, snode) - except RuntimeError: - logger.error(f"Failed to connect to device: {org_dev.get_id()}") - node_remote_devices_check = False - - connected_jms = [] - if snode.jm_device and snode.jm_device.get_id(): - jm_device = snode.jm_device - logger.info(f"Node JM: {jm_device.get_id()}") - if jm_device.jm_bdev in node_bdev_names: - logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... ok") - connected_jms.append(jm_device.get_id()) + logger.info("Node: %s, status %s", snode.get_id(), snode.status) + + if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, + StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + logger.info(f"Node status is: {snode.status}, skipping") + set_node_health_check(snode, False) + for device in snode.nvme_devices: + set_device_health_check(snode.cluster_id, device, False) + return + + # 1- check node ping + ping_check = health_controller._check_node_ping(snode.mgmt_ip) + logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") + + # 2- check node API + node_api_check = health_controller._check_node_api(snode.mgmt_ip) + logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}") + + # 3- check node RPC + node_rpc_check = health_controller._check_node_rpc( + snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password) + logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") + + is_node_online = ping_check and node_api_check and node_rpc_check + + health_check_status = is_node_online + if node_rpc_check: + logger.info(f"Node device count: {len(snode.nvme_devices)}") + node_devices_check = True + node_remote_devices_check = True + + rpc_client = RPCClient( + snode.mgmt_ip, snode.rpc_port, + snode.rpc_username, snode.rpc_password, + timeout=3, retry=2) + connected_devices = [] + + node_bdevs = rpc_client.get_bdevs() + if node_bdevs: + # node_bdev_names = [b['name'] for b in node_bdevs] + node_bdev_names = {} + for b in node_bdevs: + node_bdev_names[b['name']] = b + for al in b['aliases']: + node_bdev_names[al] = b + else: + node_bdev_names = {} + + subsystem_list = rpc_client.subsystem_list() or [] + subsystems = { + subsystem['nqn']: subsystem + for subsystem + in subsystem_list + } + + for device in snode.nvme_devices: + passed = True + + if device.io_error: + logger.info(f"Device io_error {device.get_id()}") + passed = False + + if device.status != NVMeDevice.STATUS_ONLINE: + logger.info(f"Device status {device.status}") + passed = False + + if snode.enable_test_device: + bdevs_stack = [device.nvme_bdev, device.testing_bdev, device.alceml_bdev, device.pt_bdev] + else: + bdevs_stack = [device.nvme_bdev, device.alceml_bdev, device.pt_bdev] + + logger.info(f"Checking Device: {device.get_id()}, status:{device.status}") + problems = 0 + for bdev in bdevs_stack: + if not bdev: + continue + + if not health_controller.check_bdev(bdev, bdev_names=node_bdev_names): + problems += 1 + passed = False + + logger.info(f"Checking Device's BDevs ... ({(len(bdevs_stack) - problems)}/{len(bdevs_stack)})") + + passed &= health_controller.check_subsystem(device.nvmf_nqn, nqns=subsystems) + + set_device_health_check(snode.cluster_id, device, passed) + if device.status == NVMeDevice.STATUS_ONLINE: + node_devices_check &= passed + + logger.info(f"Node remote device: {len(snode.remote_devices)}") + + for remote_device in snode.remote_devices: + org_dev = db.get_storage_device_by_id(remote_device.get_id()) + org_node = db.get_storage_node_by_id(remote_device.node_id) + if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status == StorageNode.STATUS_ONLINE: + if health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names): + connected_devices.append(remote_device.get_id()) + continue + + if not org_dev.alceml_bdev: + logger.error(f"device alceml bdev not found!, {org_dev.get_id()}") + continue + + try: + storage_node_ops.connect_device( + f"remote_{org_dev.alceml_bdev}", org_dev, snode, + bdev_names=list(node_bdev_names), reattach=False, + ) + connected_devices.append(org_dev.get_id()) + sn = db.get_storage_node_by_id(snode.get_id()) + for d in sn.remote_devices: + if d.get_id() == remote_device.get_id(): + d.status = NVMeDevice.STATUS_ONLINE + sn.write_to_db() + break + distr_controller.send_dev_status_event(org_dev, NVMeDevice.STATUS_ONLINE, snode) + except RuntimeError: + logger.error(f"Failed to connect to device: {org_dev.get_id()}") + node_remote_devices_check = False + + connected_jms = [] + if snode.jm_device and snode.jm_device.get_id(): + jm_device = snode.jm_device + logger.info(f"Node JM: {jm_device.get_id()}") + if jm_device.jm_bdev in node_bdev_names: + logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... ok") + connected_jms.append(jm_device.get_id()) + else: + logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... not found") + + if snode.enable_ha_jm: + logger.info(f"Node remote JMs: {len(snode.remote_jm_devices)}") + for remote_device in snode.remote_jm_devices: + if remote_device.remote_bdev: + check = health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names) + if check: + connected_jms.append(remote_device.get_id()) else: - logger.info(f"Checking jm bdev: {jm_device.jm_bdev} ... not found") - - if snode.enable_ha_jm: - logger.info(f"Node remote JMs: {len(snode.remote_jm_devices)}") - for remote_device in snode.remote_jm_devices: - if remote_device.remote_bdev: - check = health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names) - if check: - connected_jms.append(remote_device.get_id()) - else: + node_remote_devices_check = False + + for jm_id in snode.jm_ids: + if jm_id and jm_id not in connected_jms: + for nd in db.get_storage_nodes(): + if nd.jm_device and nd.jm_device.get_id() == jm_id: + if nd.status == StorageNode.STATUS_ONLINE: node_remote_devices_check = False + break - for jm_id in snode.jm_ids: - if jm_id and jm_id not in connected_jms: - for nd in db.get_storage_nodes(): - if nd.jm_device and nd.jm_device.get_id() == jm_id: - if nd.status == StorageNode.STATUS_ONLINE: - node_remote_devices_check = False - break - - if not node_remote_devices_check and cluster.status in [ - Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: - snode = db.get_storage_node_by_id(snode.get_id()) - snode.remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(snode) - snode.write_to_db() - - lvstore_check = True + if not node_remote_devices_check and cluster.status in [ + Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: snode = db.get_storage_node_by_id(snode.get_id()) - if snode.lvstore_status == "ready" or snode.status == StorageNode.STATUS_ONLINE or \ - snode.lvstore_status == "failed" : + snode.remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(snode) + snode.write_to_db() + + lvstore_check = True + snode = db.get_storage_node_by_id(snode.get_id()) + if snode.lvstore_status == "ready" or snode.status == StorageNode.STATUS_ONLINE or \ + snode.lvstore_status == "failed": + + lvstore_stack = snode.lvstore_stack + lvstore_check &= health_controller._check_node_lvstore( + lvstore_stack, snode, auto_fix=True, node_bdev_names=node_bdev_names) - lvstore_stack = snode.lvstore_stack + if snode.secondary_node_id: + + lvstore_check &= health_controller._check_node_hublvol( + snode, node_bdev_names=node_bdev_names, node_lvols_nqns=subsystems) + + second_node_1 = db.get_storage_node_by_id(snode.secondary_node_id) + if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: lvstore_check &= health_controller._check_node_lvstore( - lvstore_stack, snode, auto_fix=True, node_bdev_names=node_bdev_names) - - if snode.secondary_node_id: - - lvstore_check &= health_controller._check_node_hublvol( - snode, node_bdev_names=node_bdev_names, node_lvols_nqns=subsystems) - - second_node_1 = db.get_storage_node_by_id(snode.secondary_node_id) - if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: - lvstore_check &= health_controller._check_node_lvstore( - lvstore_stack, second_node_1, auto_fix=True, stack_src_node=snode) - sec_node_check = health_controller._check_sec_node_hublvol(second_node_1) - if not sec_node_check: - if snode.status == StorageNode.STATUS_ONLINE: - ret = second_node_1.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if ret: - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - # is_sec_node_leader = True - # check jc_compression status - jc_compression_is_active = second_node_1.rpc_client().jc_compression_get_status(snode.jm_vuid) - if not jc_compression_is_active: - lvstore_check &= health_controller._check_sec_node_hublvol(second_node_1, auto_fix=True) - - - lvol_port_check = False - # if node_api_check: - ports = [snode.lvol_subsys_port] - - if snode.lvstore_stack_secondary_1: - second_node_1 = db.get_storage_node_by_id(snode.lvstore_stack_secondary_1) - if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: - ports.append(second_node_1.lvol_subsys_port) - - for port in ports: - lvol_port_check = health_controller._check_port_on_node(snode, port) - logger.info( - f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}") - if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED: - tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port) - - health_check_status = is_node_online and node_devices_check and node_remote_devices_check and lvstore_check - set_node_health_check(snode, bool(health_check_status)) + lvstore_stack, second_node_1, auto_fix=True, stack_src_node=snode) + sec_node_check = health_controller._check_sec_node_hublvol(second_node_1) + if not sec_node_check: + if snode.status == StorageNode.STATUS_ONLINE: + ret = second_node_1.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if ret: + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + # is_sec_node_leader = True + # check jc_compression status + jc_compression_is_active = second_node_1.rpc_client().jc_compression_get_status( + snode.jm_vuid) + if not jc_compression_is_active: + lvstore_check &= health_controller._check_sec_node_hublvol(second_node_1, + auto_fix=True) + + lvol_port_check = False + # if node_api_check: + ports = [snode.lvol_subsys_port] + + if snode.lvstore_stack_secondary_1: + second_node_1 = db.get_storage_node_by_id(snode.lvstore_stack_secondary_1) + if second_node_1 and second_node_1.status == StorageNode.STATUS_ONLINE: + ports.append(second_node_1.lvol_subsys_port) + + for port in ports: + lvol_port_check = health_controller._check_port_on_node(snode, port) + logger.info( + f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}") + if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED: + tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port) + + health_check_status = is_node_online and node_devices_check and node_remote_devices_check and lvstore_check + set_node_health_check(snode, bool(health_check_status)) + time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC) + + +def loop_for_node(snode): + logger = logging.getLogger() + logger.setLevel("INFO") + logger_handler = logging.StreamHandler(stream=sys.stdout) + logger_handler.setFormatter(logging.Formatter(f'%(asctime)s: node:{snode.mgmt_ip} %(levelname)s: %(message)s')) + logger.addHandler(logger_handler) + while True: + check_node(snode, logger) + time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC) + + +# logger.info("Starting health check service") +db = db_controller.DBController() +threads_maps: dict[str, threading.Thread] = {} +while True: + clusters = db.get_clusters() + for cluster in clusters: + for node in db.get_storage_nodes_by_cluster_id(cluster.get_id()): + node_id = node.get_id() + if node_id not in threads_maps or threads_maps[node_id].is_alive() is False: + t = threading.Thread(target=loop_for_node, args=(node,)) + t.start() + threads_maps[node_id] = t time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC) diff --git a/simplyblock_core/services/main_distr_event_collector.py b/simplyblock_core/services/main_distr_event_collector.py index 31dffeda0..bd4acd16f 100644 --- a/simplyblock_core/services/main_distr_event_collector.py +++ b/simplyblock_core/services/main_distr_event_collector.py @@ -84,9 +84,9 @@ def process_device_event(event): else: distr_controller.send_dev_status_event(device_obj, NVMeDevice.STATUS_UNAVAILABLE, event_node_obj) event_node_obj = db.get_storage_node_by_id(event_node_obj.get_id()) - for dev in event_node_obj.remote_devices: - if dev.get_id() == device_obj.get_id(): - event_node_obj.remote_devices.remove(dev) + for remote_dev in event_node_obj.remote_devices: + if remote_dev.get_id() == device_obj.get_id(): + event_node_obj.remote_devices.remove(remote_dev) event_node_obj.write_to_db() break diff --git a/simplyblock_core/services/storage_node_monitor.py b/simplyblock_core/services/storage_node_monitor.py index 17a7d0369..b9a38eb42 100644 --- a/simplyblock_core/services/storage_node_monitor.py +++ b/simplyblock_core/services/storage_node_monitor.py @@ -1,9 +1,10 @@ # coding=utf-8 +import logging +import sys import threading import time from datetime import datetime, timezone - from simplyblock_core import constants, db_controller, cluster_ops, storage_node_ops, utils from simplyblock_core.controllers import health_controller, device_controller, tasks_controller, storage_events from simplyblock_core.models.cluster import Cluster @@ -14,7 +15,6 @@ logger = utils.get_logger(__name__) - # get DB controller db = db_controller.DBController() @@ -114,11 +114,12 @@ def get_next_cluster_status(cluster_id): k = cluster.distr_npcs # if number of devices in the cluster unavailable on DIFFERENT nodes > k --> I cannot read and in some cases cannot write (suspended) - if affected_nodes == k and (not cluster.strict_node_anti_affinity or online_nodes >= (n+k)): + if affected_nodes == k and (not cluster.strict_node_anti_affinity or online_nodes >= (n + k)): return Cluster.STATUS_DEGRADED elif jm_replication_tasks: return Cluster.STATUS_DEGRADED - elif (affected_nodes > k or online_devices < (n + k) or (online_nodes < (n+k) and cluster.strict_node_anti_affinity)): + elif (affected_nodes > k or online_devices < (n + k) or ( + online_nodes < (n + k) and cluster.strict_node_anti_affinity)): return Cluster.STATUS_SUSPENDED else: return Cluster.STATUS_ACTIVE @@ -136,7 +137,7 @@ def update_cluster_status(cluster_id): first_iter_task_pending += 1 cluster = db.get_cluster_by_id(cluster_id) - cluster.is_re_balancing = first_iter_task_pending > 0 + cluster.is_re_balancing = first_iter_task_pending > 0 cluster.write_to_db() current_cluster_status = cluster.status @@ -145,7 +146,7 @@ def update_cluster_status(cluster_id): return if current_cluster_status == Cluster.STATUS_DEGRADED and next_current_status == Cluster.STATUS_ACTIVE: - # if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_UNREADY] and cluster_current_status == Cluster.STATUS_ACTIVE: + # if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_UNREADY] and cluster_current_status == Cluster.STATUS_ACTIVE: # cluster_ops.cluster_activate(cluster_id, True) cluster_ops.set_cluster_status(cluster_id, Cluster.STATUS_ACTIVE) return @@ -186,7 +187,6 @@ def update_cluster_status(cluster_id): cluster_ops.set_cluster_status(cluster_id, next_current_status) - def set_node_online(node): if node.status != StorageNode.STATUS_ONLINE: @@ -211,24 +211,56 @@ def set_node_online(node): if online_devices_list: tasks_controller.add_device_mig_task(online_devices_list, node.cluster_id) -def set_node_offline(node, set_devs_offline=False): - if node.status != StorageNode.STATUS_UNREACHABLE: - # set node unavailable - storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_UNREACHABLE) + update_cluster_status(cluster_id) + - # if set_devs_offline: - # # set devices unavailable - # for dev in node.nvme_devices: - # if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY]: - # device_controller.device_set_unavailable(dev.get_id()) +def set_node_offline(node): + if node.status != StorageNode.STATUS_OFFLINE: + try: + storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_OFFLINE) + for dev in node.nvme_devices: + if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, + NVMeDevice.STATUS_CANNOT_ALLOCATE]: + device_controller.device_set_unavailable(dev.get_id()) + update_cluster_status(cluster_id) + # initiate restart + tasks_controller.add_node_to_auto_restart(node) + except Exception as e: + logger.debug("Setting node to OFFLINE state failed") + logger.error(e) + + +def set_node_unreachable(node): + if node.status != StorageNode.STATUS_UNREACHABLE: + try: + storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_UNREACHABLE) + update_cluster_status(cluster_id) + except Exception as e: + logger.debug("Setting node to UNREACHABLE state failed") + logger.error(e) + + +def set_node_schedulable(node): + if node.status != StorageNode.STATUS_SCHEDULABLE: + try: + storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_SCHEDULABLE) + # initiate shutdown + # initiate restart + tasks_controller.add_node_to_auto_restart(node) + for dev in node.nvme_devices: + if dev.status in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, + NVMeDevice.STATUS_CANNOT_ALLOCATE]: + device_controller.device_set_unavailable(dev.get_id()) + update_cluster_status(cluster_id) + except Exception as e: + logger.debug("Setting node to SCHEDULABLE state failed") + logger.error(e) - # # set jm dev offline - # if node.jm_device.status != JMDevice.STATUS_UNAVAILABLE: - # device_controller.set_jm_device_state(node.jm_device.get_id(), JMDevice.STATUS_UNAVAILABLE) def set_node_down(node): if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_SUSPENDED]: storage_node_ops.set_node_status(node.get_id(), StorageNode.STATUS_DOWN) + update_cluster_status(cluster_id) def node_rpc_timeout_check_and_report(node): @@ -242,10 +274,137 @@ def node_rpc_timeout_check_and_report(node): except Exception as e: logger.debug(e) # RPC timeout detected, send to cluster log - storage_events.snode_rpc_timeout(node, time.time()-start_time) + storage_events.snode_rpc_timeout(node, int(time.time() - start_time)) + return False + + +def node_port_check_fun(snode): + node_port_check = True + if snode.lvstore_status == "ready": + ports = [snode.nvmf_port] + if snode.lvstore_stack_secondary_1: + for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()): + if n.lvstore_status == "ready": + ports.append(n.lvol_subsys_port) + if not snode.is_secondary_node: + ports.append(snode.lvol_subsys_port) + ports = [snode.nvmf_port] + if snode.lvstore_stack_secondary_1: + for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()): + if n.lvstore_status == "ready": + ports.append(n.lvol_subsys_port) + if not snode.is_secondary_node: + ports.append(snode.lvol_subsys_port) + + for port in ports: + ret = health_controller._check_port_on_node(snode, port) + logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}") + node_port_check &= ret + + node_data_nic_ping_check = False + for data_nic in snode.data_nics: + if data_nic.ip4_address: + data_ping_check = health_controller._check_node_ping(data_nic.ip4_address) + logger.info(f"Check: ping data nic {data_nic.ip4_address} ... {data_ping_check}") + node_data_nic_ping_check |= data_ping_check + + node_port_check &= node_data_nic_ping_check + + return node_port_check + + +def check_node(snode): + snode = db.get_storage_node_by_id(snode.get_id()) + + if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, + StorageNode.STATUS_SCHEDULABLE, StorageNode.STATUS_DOWN]: + logger.info(f"Node status is: {snode.status}, skipping") + return False + + if snode.status == StorageNode.STATUS_ONLINE and snode.lvstore_status == "in_creation": + logger.info(f"Node lvstore is in creation: {snode.get_id()}, skipping") + return False + + logger.info(f"Checking node {snode.hostname}") + + # 1- check node ping + ping_check = health_controller._check_node_ping(snode.mgmt_ip) + logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") + if not ping_check: + logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}: FAILED") + set_node_unreachable(snode) + return False + + # 2- check node API + try: + snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=10, retry=2) + ret, _ = snode_api.is_live() + logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {ret}") + if not ret: + logger.info("Check: node API failed, setting node unreachable") + set_node_unreachable(snode) + return False + except Exception as e: + logger.debug(e) + set_node_unreachable(snode) + return False + + # 3- check spdk process through node API + try: + snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=20, retry=2) + is_up, _ = snode_api.spdk_process_is_up( snode.rpc_port) + logger.info(f"Check: spdk process {snode.mgmt_ip}:5000 ... {bool(is_up)}") + if not is_up: + logger.info("Check: node API failed, setting node offline") + set_node_offline(snode) + return False + except Exception as e: + logger.debug(e) + set_node_unreachable(snode) + return False + + # 4- check node rpc interface + node_rpc_check = health_controller._check_node_rpc( + snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=20, retry=2) + logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") + + if not node_rpc_check and snode.get_id() not in node_rpc_timeout_threads: + t = threading.Thread(target=node_rpc_timeout_check_and_report, args=(snode,)) + t.start() + node_rpc_timeout_threads[snode.get_id()] = t + + if not node_rpc_check: + logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}:FAILED") + set_node_schedulable(snode) + return False + + node_port_check = node_port_check_fun(snode) + + if not node_port_check: + cluster = db.get_cluster_by_id(snode.cluster_id) + if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: + logger.error("Port check failed") + set_node_down(snode) + return True + + set_node_online(snode) + + +def loop_for_node(snode): + # global logger + # logger = logging.getLogger() + # logger_handler = logging.StreamHandler(stream=sys.stdout) + # logger_handler.setFormatter(logging.Formatter(f'%(asctime)s: node:{snode.mgmt_ip} %(levelname)s: %(message)s')) + # logger.addHandler(logger_handler) + while True: + check_node(snode) + logger.info(f"Sleeping for {constants.NODE_MONITOR_INTERVAL_SEC} seconds") + time.sleep(constants.NODE_MONITOR_INTERVAL_SEC) logger.info("Starting node monitor") +threads_maps: dict[str, threading.Thread] = {} + while True: clusters = db.get_clusters() for cluster in clusters: @@ -255,166 +414,12 @@ def node_rpc_timeout_check_and_report(node): continue nodes = db.get_storage_nodes_by_cluster_id(cluster_id) - for snode in nodes: - - # get fresh node object, something could have changed until the last for loop is reached - snode = db.get_storage_node_by_id(snode.get_id()) - - if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, - StorageNode.STATUS_SCHEDULABLE, StorageNode.STATUS_DOWN]: - logger.info(f"Node status is: {snode.status}, skipping") - continue - - if snode.status == StorageNode.STATUS_ONLINE and snode.lvstore_status == "in_creation": - logger.info(f"Node lvstore is in creation: {snode.get_id()}, skipping") - continue - - logger.info(f"Checking node {snode.hostname}") - - # 1- check node ping - ping_check = health_controller._check_node_ping(snode.mgmt_ip) - logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") - if not ping_check: - time.sleep(1) - ping_check = health_controller._check_node_ping(snode.mgmt_ip) - logger.info(f"Check 2: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") - - # 2- check node API - node_api_check = health_controller._check_node_api(snode.mgmt_ip) - logger.info(f"Check: node API {snode.mgmt_ip}:5000 ... {node_api_check}") - - if snode.status == StorageNode.STATUS_SCHEDULABLE and not ping_check and not node_api_check: - continue - - spdk_process = False - if node_api_check: - # 3- check spdk_process - spdk_process = health_controller._check_spdk_process_up(snode.mgmt_ip, snode.rpc_port) - logger.info(f"Check: spdk process {snode.mgmt_ip}:5000 ... {spdk_process}") - - # 4- check rpc - node_rpc_check = health_controller._check_node_rpc( - snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=5, retry=2) - logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") - - if not node_rpc_check and snode.get_id() not in node_rpc_timeout_threads: - t = threading.Thread(target=node_rpc_timeout_check_and_report, args=(snode,)) + for node in nodes: + node_id = node.get_id() + if node_id not in threads_maps or threads_maps[node_id].is_alive() is False: + t = threading.Thread(target=loop_for_node, args=(node,)) t.start() - node_rpc_timeout_threads[snode.get_id()] = t - - if ping_check and node_api_check and spdk_process and not node_rpc_check: - start_time = time.time() - while time.time() < start_time + 60: - node_rpc_check = health_controller._check_node_rpc( - snode.mgmt_ip, snode.rpc_port, snode.rpc_username, snode.rpc_password, timeout=5, retry=2) - logger.info(f"Check: node RPC {snode.mgmt_ip}:{snode.rpc_port} ... {node_rpc_check}") - if node_rpc_check: - break - - node_port_check = True - - if spdk_process and node_rpc_check and snode.lvstore_status == "ready": - ports = [snode.nvmf_port] - if snode.lvstore_stack_secondary_1: - for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()): - if n.lvstore_status == "ready": - ports.append(n.lvol_subsys_port) - if not snode.is_secondary_node: - ports.append(snode.lvol_subsys_port) - - for port in ports: - ret = health_controller._check_port_on_node(snode, port) - logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}") - node_port_check &= ret - - node_data_nic_ping_check = False - for data_nic in snode.data_nics: - if data_nic.ip4_address: - data_ping_check = health_controller._check_node_ping(data_nic.ip4_address) - logger.info(f"Check: ping data nic {data_nic.ip4_address} ... {data_ping_check}") - node_data_nic_ping_check |= data_ping_check - - node_port_check &= node_data_nic_ping_check - - cluster = db.get_cluster_by_id(cluster.get_id()) - - # is_node_online = ping_check and spdk_process and node_rpc_check and node_port_check - is_node_online = spdk_process or node_rpc_check - if is_node_online: - - if snode.status == StorageNode.STATUS_UNREACHABLE: - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY, - Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]: - # tasks_controller.add_node_to_auto_restart(snode) - set_node_online(snode) - continue - - if not node_port_check: - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: - logger.error("Port check failed") - set_node_down(snode) - continue - - set_node_online(snode) - - # # check JM device - # if snode.jm_device: - # if snode.jm_device.status in [JMDevice.STATUS_ONLINE, JMDevice.STATUS_UNAVAILABLE]: - # ret = health_controller.check_jm_device(snode.jm_device.get_id()) - # if ret: - # logger.info(f"JM bdev is online: {snode.jm_device.get_id()}") - # if snode.jm_device.status != JMDevice.STATUS_ONLINE: - # device_controller.set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE) - # else: - # logger.error(f"JM bdev is offline: {snode.jm_device.get_id()}") - # if snode.jm_device.status != JMDevice.STATUS_UNAVAILABLE: - # device_controller.set_jm_device_state(snode.jm_device.get_id(), - # JMDevice.STATUS_UNAVAILABLE) - else: - - if not ping_check and not node_api_check and not spdk_process: - # restart on new node - storage_node_ops.set_node_status(snode.get_id(), StorageNode.STATUS_SCHEDULABLE) - - elif ping_check and node_api_check and (not spdk_process or not node_rpc_check): - # add node to auto restart - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY, - Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]: - if not spdk_process and not node_rpc_check: - logger.info("ping is fine, snodeapi is fine, But no spdk process and no rpc check, " - "So that we set device offline") - set_node_offline(snode, set_devs_offline=(not spdk_process and not node_rpc_check)) - try: - ret = snode.rpc_client(timeout=10).get_version() - if not ret: - logger.debug("False RPC response, adding node to auto restart") - tasks_controller.add_node_to_auto_restart(snode) - except Exception as e: - logger.debug("Timeout to get RPC response, skipping restart") - logger.error(e) - - elif not node_port_check: - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: - logger.error("Port check failed") - set_node_down(snode) - - else: - set_node_offline(snode, set_devs_offline=not spdk_process) - - if ping_check and node_api_check and spdk_process and not node_rpc_check: - # restart spdk proxy cont - if cluster.status in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_UNREADY, - Cluster.STATUS_SUSPENDED, Cluster.STATUS_READONLY]: - logger.info(f"Restarting spdk_proxy_{snode.rpc_port} on {snode.get_id()}") - snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=60, retry=1) - ret, err = snode_api.spdk_proxy_restart(snode.rpc_port) - if ret: - logger.info(f"Restarting spdk_proxy on {snode.get_id()} successfully") - continue - if err: - logger.error(err) + threads_maps[node_id] = t + time.sleep(constants.NODE_MONITOR_INTERVAL_SEC) update_cluster_status(cluster_id) - - logger.info(f"Sleeping for {constants.NODE_MONITOR_INTERVAL_SEC} seconds") - time.sleep(constants.NODE_MONITOR_INTERVAL_SEC) diff --git a/simplyblock_core/services/tasks_runner_port_allow.py b/simplyblock_core/services/tasks_runner_port_allow.py index e95dbdf94..d49d6c19b 100644 --- a/simplyblock_core/services/tasks_runner_port_allow.py +++ b/simplyblock_core/services/tasks_runner_port_allow.py @@ -7,7 +7,7 @@ from simplyblock_core.fw_api_client import FirewallClient from simplyblock_core.models.job_schedule import JobSchedule from simplyblock_core.models.cluster import Cluster -from simplyblock_core.models.nvme_device import NVMeDevice +from simplyblock_core.models.nvme_device import NVMeDevice, RemoteDevice from simplyblock_core.models.storage_node import StorageNode from simplyblock_core.snode_client import SNodeClient @@ -105,11 +105,18 @@ if not dev.alceml_bdev: raise ValueError(f"device alceml bdev not found!, {dev.get_id()}") - dev.remote_bdev = storage_node_ops.connect_device( + remote_device = RemoteDevice() + remote_device.uuid = dev.uuid + remote_device.alceml_name = dev.alceml_name + remote_device.node_id = dev.node_id + remote_device.size = dev.size + remote_device.nvmf_multipath = dev.nvmf_multipath + remote_device.status = NVMeDevice.STATUS_ONLINE + remote_device.remote_bdev = storage_node_ops.connect_device( f"remote_{dev.alceml_bdev}", dev, node, bdev_names=list(node_bdev_names), reattach=False) - remote_devices.append(dev) + remote_devices.append(remote_device) if not remote_devices: msg = "Node unable to connect to remote devs, retry task" logger.info(msg) diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index 162f0dd1a..e19c0f3c8 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -27,7 +27,7 @@ from simplyblock_core.models.iface import IFace from simplyblock_core.models.job_schedule import JobSchedule from simplyblock_core.models.lvol_model import LVol -from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice +from simplyblock_core.models.nvme_device import NVMeDevice, JMDevice, RemoteDevice, RemoteJMDevice from simplyblock_core.models.snapshot import SnapShot from simplyblock_core.models.storage_node import StorageNode from simplyblock_core.models.cluster import Cluster @@ -100,9 +100,6 @@ def connect_device(name: str, device: NVMeDevice, node: StorageNode, bdev_names: if device.nvmf_multipath: rpc_client.bdev_nvme_set_multipath_policy(bdev_name, "active_active") - # wait 5 seconds after controller attach - time.sleep(5) - if not bdev_name: msg = "Bdev name not returned from controller attach" logger.error(msg) @@ -716,11 +713,18 @@ def _connect_to_remote_devs( if not dev.alceml_bdev: raise ValueError(f"device alceml bdev not found!, {dev.get_id()}") - dev.remote_bdev = connect_device( + remote_bdev = RemoteDevice() + remote_bdev.uuid = dev.uuid + remote_bdev.alceml_name = dev.alceml_name + remote_bdev.node_id = dev.node_id + remote_bdev.size = dev.size + remote_bdev.status = NVMeDevice.STATUS_ONLINE + remote_bdev.nvmf_multipath = dev.nvmf_multipath + remote_bdev.remote_bdev = connect_device( f"remote_{dev.alceml_bdev}", dev, this_node, bdev_names=node_bdev_names, reattach=reattach, ) - remote_devices.append(dev) + remote_devices.append(remote_bdev) return remote_devices @@ -775,14 +779,22 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): if not org_dev or org_dev in new_devs or org_dev_node and org_dev_node.get_id() == this_node.get_id(): continue + remote_device = RemoteJMDevice() + remote_device.uuid = org_dev.uuid + remote_device.alceml_name = org_dev.alceml_name + remote_device.node_id = org_dev.node_id + remote_device.size = org_dev.size + remote_device.jm_bdev = org_dev.jm_bdev + remote_device.status = NVMeDevice.STATUS_ONLINE + remote_device.nvmf_multipath = org_dev.nvmf_multipath try: - org_dev.remote_bdev = connect_device( + remote_device.remote_bdev = connect_device( f"remote_{org_dev.jm_bdev}", org_dev, this_node, bdev_names=node_bdev_names, reattach=True, ) except RuntimeError: logger.error(f'Failed to connect to {org_dev.get_id()}') - new_devs.append(org_dev) + new_devs.append(remote_device) return new_devs @@ -1481,8 +1493,6 @@ def restart_storage_node( spdk_image=None, set_spdk_debug=None, small_bufsize=0, large_bufsize=0, force=False, node_ip=None, reattach_volume=False, clear_data=False, new_ssd_pcie=[], force_lvol_recreate=False): - db_controller = DBController() - kv_store = db_controller.kv_store db_controller = DBController() logger.info("Restarting storage node") @@ -1900,23 +1910,6 @@ def restart_storage_node( db_dev.health_check = True device_events.device_restarted(db_dev) snode.write_to_db(db_controller.kv_store) - # - # # make other nodes connect to the new devices - # logger.info("Make other nodes connect to the node devices") - # snodes = db_controller.get_storage_nodes_by_cluster_id(snode.cluster_id) - # for node in snodes: - # if node.get_id() == snode.get_id() or node.status != StorageNode.STATUS_ONLINE: - # continue - # node.remote_devices = _connect_to_remote_devs(node, force_connect_restarting_nodes=True) - # node.write_to_db(kv_store) - # - # logger.info(f"Sending device status event") - # snode = db_controller.get_storage_node_by_id(snode.get_id()) - # for db_dev in snode.nvme_devices: - # distr_controller.send_dev_status_event(db_dev, db_dev.status) - # - # if snode.jm_device and snode.jm_device.status in [JMDevice.STATUS_UNAVAILABLE, JMDevice.STATUS_ONLINE]: - # device_controller.set_jm_device_state(snode.jm_device.get_id(), JMDevice.STATUS_ONLINE) cluster = db_controller.get_cluster_by_id(snode.cluster_id) if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: @@ -1932,7 +1925,7 @@ def restart_storage_node( except RuntimeError: logger.error('Failed to connect to remote devices') return False - node.write_to_db(kv_store) + node.write_to_db() logger.info("Sending device status event") snode = db_controller.get_storage_node_by_id(snode.get_id()) @@ -1979,8 +1972,7 @@ def restart_storage_node( except RuntimeError: logger.error('Failed to connect to remote devices') return False - node.write_to_db(kv_store) - + node.write_to_db() logger.info("Sending device status event") snode = db_controller.get_storage_node_by_id(snode.get_id()) @@ -2152,13 +2144,13 @@ def list_storage_devices(node_id, is_json): "Health": jm_device.health_check }) - for device in snode.remote_devices: - logger.debug(device) + for remote_device in snode.remote_devices: + logger.debug(remote_device) logger.debug("*" * 20) - name = device.alceml_name - status = device.status - if device.remote_bdev: - name = device.remote_bdev + name = remote_device.alceml_name + status = remote_device.status + if remote_device.remote_bdev: + name = remote_device.remote_bdev try: org_dev = db_controller.get_storage_device_by_id(device.get_id()) status = org_dev.status @@ -2166,22 +2158,22 @@ def list_storage_devices(node_id, is_json): pass remote_devices.append({ - "UUID": device.uuid, + "UUID": remote_device.uuid, "Name": name, - "Size": utils.humanbytes(device.size), - "Node ID": device.node_id, + "Size": utils.humanbytes(remote_device.size), + "Node ID": remote_device.node_id, "Status": status, }) - for device in snode.remote_jm_devices: - logger.debug(device) + for remote_jm_device in snode.remote_jm_devices: + logger.debug(remote_jm_device) logger.debug("*" * 20) remote_devices.append({ - "UUID": device.uuid, - "Name": device.remote_bdev, - "Size": utils.humanbytes(device.size), - "Node ID": device.node_id, - "Status": device.status, + "UUID": remote_jm_device.uuid, + "Name": remote_jm_device.remote_bdev, + "Size": utils.humanbytes(remote_jm_device.size), + "Node ID": remote_jm_device.node_id, + "Status": remote_jm_device.status, }) data: dict[str, List[Any]] = { @@ -2429,7 +2421,7 @@ def resume_storage_node(node_id): return False if snode.enable_ha_jm: snode.remote_jm_devices = _connect_to_remote_jm_devs(snode) - snode.write_to_db(db_controller.kv_store) + snode.write_to_db() fw_api = FirewallClient(snode, timeout=20, retry=1) port_type = "tcp" diff --git a/simplyblock_core/utils/__init__.py b/simplyblock_core/utils/__init__.py index 0892db54a..fc3d6dc40 100644 --- a/simplyblock_core/utils/__init__.py +++ b/simplyblock_core/utils/__init__.py @@ -514,15 +514,15 @@ def generate_mask(cores): def calculate_pool_count(alceml_count, number_of_distribs, cpu_count, poller_count): ''' Small pool count Large pool count - Create JM 256 32 For each JM + Create JM 32 For each JM - RAID 256 32 2 one for raid of JM and one for raid of ditribs + RAID 32 2 one for raid of JM and one for raid of ditribs - Create Alceml 256 32 For each Alceml + Create Alceml 32 For each Alceml - Create Distrib 256 32 For each distrib + Create Distrib 32 For each distrib - First Send cluster map 256 32 Calculated or one time + First Send cluster map 32 Calculated or one time NVMF transport TCP 127 * poll_groups_mask||CPUCount + 384 15 * poll_groups_mask||CPUCount + 384 Calculated or one time @@ -530,30 +530,23 @@ def calculate_pool_count(alceml_count, number_of_distribs, cpu_count, poller_cou ####Create snapshot 512 64 For each snapshot - ####Clone lvol 256 32 For each clone + ####Clone lvol 32 For each clone ''' poller_number = poller_count if poller_count else cpu_count small_pool_count = 384 * (alceml_count + number_of_distribs + 3 + poller_count) + ( - 6 + alceml_count + number_of_distribs) * 256 + poller_number * 127 + 384 + 128 * poller_number + constants.EXTRA_SMALL_POOL_COUNT + 6 + alceml_count + number_of_distribs) * + poller_number * 127 + 384 + 128 * poller_number + constants.EXTRA_SMALL_POOL_COUNT large_pool_count = 48 * (alceml_count + number_of_distribs + 3 + poller_count) + ( 6 + alceml_count + number_of_distribs) * 32 + poller_number * 15 + 384 + 16 * poller_number + constants.EXTRA_LARGE_POOL_COUNT - return int(4.0 * small_pool_count), int(2.5 * large_pool_count) + return int(small_pool_count), int(large_pool_count) def calculate_minimum_hp_memory(small_pool_count, large_pool_count, lvol_count, max_prov, cpu_count): - ''' - 1092 (initial consumption) + 4 * CPU + 1.0277 * POOL_COUNT(Sum in MB) + (25) * lvol_count - then you can amend the expected memory need for the creation of lvols (6MB), - connection number over lvols (7MB per connection), creation of snaps (12MB), - extra buffer 2GB - return: minimum_hp_memory in bytes - ''' - pool_consumption = (small_pool_count * 8 + large_pool_count * 128) / 1024 + 1092 - memory_consumption = (4 * cpu_count + 1.0277 * pool_consumption + 25 * lvol_count) * (1024 * 1024) + ( - 250 * 1024 * 1024) * 1.1 * convert_size(max_prov, 'TiB') + constants.EXTRA_HUGE_PAGE_MEMORY + + pool_consumption = (small_pool_count * 8 + large_pool_count * 128) / 1024 + memory_consumption = (4 * cpu_count + 1.1 * pool_consumption + 22 * lvol_count) * (1024 * 1024) + constants.EXTRA_HUGE_PAGE_MEMORY return int(1.2 * memory_consumption) @@ -725,13 +718,7 @@ def convert_size(size: Union[int, str], unit: str, round_up: bool = False) -> in raw = size / (base ** exponent) return math.ceil(raw) if round_up else int(raw) -def first_six_chars(s: str) -> str: - """ - Returns the first six characters of a given string. - If the string is shorter than six characters, returns the entire string. - """ - return s[:6] - + def nearest_upper_power_of_2(n): # Check if n is already a power of 2 if (n & (n - 1)) == 0: diff --git a/simplyblock_web/api/internal/storage_node/docker.py b/simplyblock_web/api/internal/storage_node/docker.py index d1ee4f9f0..68a2fb10a 100644 --- a/simplyblock_web/api/internal/storage_node/docker.py +++ b/simplyblock_web/api/internal/storage_node/docker.py @@ -155,7 +155,8 @@ def spdk_process_start(body: SPDKParams): ssd_pcie_list = " ".join(body.ssd_pcie) if body.ssd_pcie else "none" spdk_debug = '1' if body.spdk_debug else '' total_mem_mib = core_utils.convert_size(core_utils.parse_size(body.total_mem), 'MiB') if body.total_mem else '' - spdk_mem_mib = core_utils.convert_size(body.spdk_mem, 'MiB') + # spdk_mem_mib = core_utils.convert_size(body.spdk_mem, 'MiB') + spdk_mem_mib = 0 node_docker = get_docker_client(timeout=60 * 3) for name in {f"/spdk_{body.rpc_port}", f"/spdk_proxy_{body.rpc_port}"}: From 93a9bcda209bc909ea33736a054768e6385cdba7 Mon Sep 17 00:00:00 2001 From: hamdykhader Date: Thu, 13 Nov 2025 18:10:34 +0300 Subject: [PATCH 2/5] fix linter issue --- simplyblock_core/services/storage_node_monitor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/simplyblock_core/services/storage_node_monitor.py b/simplyblock_core/services/storage_node_monitor.py index b9a38eb42..d1417ed3f 100644 --- a/simplyblock_core/services/storage_node_monitor.py +++ b/simplyblock_core/services/storage_node_monitor.py @@ -1,6 +1,4 @@ # coding=utf-8 -import logging -import sys import threading import time from datetime import datetime, timezone From d4c3864c495acd22fec41ee325fab5cce9b2ae80 Mon Sep 17 00:00:00 2001 From: hamdykhader Date: Thu, 13 Nov 2025 18:18:46 +0300 Subject: [PATCH 3/5] fix typecheck issue --- simplyblock_core/utils/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/simplyblock_core/utils/__init__.py b/simplyblock_core/utils/__init__.py index fc3d6dc40..2c8ee528d 100644 --- a/simplyblock_core/utils/__init__.py +++ b/simplyblock_core/utils/__init__.py @@ -718,6 +718,12 @@ def convert_size(size: Union[int, str], unit: str, round_up: bool = False) -> in raw = size / (base ** exponent) return math.ceil(raw) if round_up else int(raw) +def first_six_chars(s: str) -> str: + """ + Returns the first six characters of a given string. + If the string is shorter than six characters, returns the entire string. + """ + return s[:6] def nearest_upper_power_of_2(n): # Check if n is already a power of 2 From 84ddd701b1d5a73c53bbdeed7e0a2f2c3fa6ef9c Mon Sep 17 00:00:00 2001 From: hamdykhader Date: Tue, 2 Dec 2025 01:35:27 +0300 Subject: [PATCH 4/5] multi fix --- simplyblock_core/cluster_ops.py | 15 +- simplyblock_core/constants.py | 4 +- .../controllers/health_controller.py | 174 +++++--- .../controllers/tasks_controller.py | 43 +- simplyblock_core/distr_controller.py | 51 +-- simplyblock_core/fw_api_client.py | 2 +- simplyblock_core/rpc_client.py | 4 +- .../services/capacity_and_stats_collector.py | 41 +- .../services/health_check_service.py | 21 +- simplyblock_core/services/lvol_monitor.py | 354 +++++++-------- .../services/lvol_stat_collector.py | 107 +++-- simplyblock_core/services/snapshot_monitor.py | 269 ++++++----- .../services/storage_node_monitor.py | 33 +- .../services/tasks_runner_failed_migration.py | 8 +- .../services/tasks_runner_jc_comp.py | 6 +- .../services/tasks_runner_migration.py | 29 +- .../tasks_runner_new_dev_migration.py | 8 +- .../services/tasks_runner_node_add.py | 87 ++-- .../services/tasks_runner_port_allow.py | 420 +++++++++--------- .../services/tasks_runner_restart.py | 31 +- simplyblock_core/snode_client.py | 13 +- simplyblock_core/storage_node_ops.py | 66 +-- simplyblock_core/utils/__init__.py | 29 +- .../api/internal/storage_node/docker.py | 3 +- 24 files changed, 971 insertions(+), 847 deletions(-) diff --git a/simplyblock_core/cluster_ops.py b/simplyblock_core/cluster_ops.py index 24be657d7..d7ac4628a 100644 --- a/simplyblock_core/cluster_ops.py +++ b/simplyblock_core/cluster_ops.py @@ -458,7 +458,6 @@ def add_cluster(blk_size, page_size_in_blocks, cap_warn, cap_crit, prov_cap_warn cluster.strict_node_anti_affinity = strict_node_anti_affinity default_cluster = clusters[0] - cluster.mode = default_cluster.mode cluster.db_connection = default_cluster.db_connection cluster.grafana_secret = monitoring_secret if default_cluster.mode == "kubernetes" else default_cluster.grafana_secret cluster.grafana_endpoint = default_cluster.grafana_endpoint @@ -1132,6 +1131,7 @@ def get_logs(cluster_id, limit=50, **kwargs) -> t.List[dict]: if record.event in ["device_status", "node_status"]: msg = msg+f" ({record.count})" + logger.debug(record) out.append({ "Date": record.get_date_string(), "NodeId": record.node_id, @@ -1154,10 +1154,6 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, logger.info("Updating mgmt cluster") if cluster.mode == "docker": - sbcli=constants.SIMPLY_BLOCK_CLI_NAME - subprocess.check_call(f"pip install {sbcli} --upgrade".split(' ')) - logger.info(f"{sbcli} upgraded") - cluster_docker = utils.get_docker_client(cluster_id) logger.info(f"Pulling image {constants.SIMPLY_BLOCK_DOCKER_IMAGE}") pull_docker_image_with_retry(cluster_docker, constants.SIMPLY_BLOCK_DOCKER_IMAGE) @@ -1171,7 +1167,7 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, for service in cluster_docker.services.list(): if image_parts in service.attrs['Spec']['Labels']['com.docker.stack.image'] or \ "simplyblock" in service.attrs['Spec']['Labels']['com.docker.stack.image']: - if service.name == "app_CachingNodeMonitor": + if service.name in ["app_CachingNodeMonitor", "app_CachedLVolStatsCollector"]: logger.info(f"Removing service {service.name}") service.remove() else: @@ -1281,7 +1277,12 @@ def update_cluster(cluster_id, mgmt_only=False, restart=False, spdk_image=None, logger.info(f"Restarting node: {node.get_id()} with SPDK image: {spdk_image}") else: logger.info(f"Restarting node: {node.get_id()}") - storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image) + try: + storage_node_ops.restart_storage_node(node.get_id(), force=True, spdk_image=spdk_image) + except Exception as e: + logger.debug(e) + logger.error(f"Failed to restart node: {node.get_id()}") + return logger.info("Done") diff --git a/simplyblock_core/constants.py b/simplyblock_core/constants.py index 36ba14a9e..fd388c140 100644 --- a/simplyblock_core/constants.py +++ b/simplyblock_core/constants.py @@ -139,7 +139,6 @@ def get_config_var(name, default=None): CLIENT_QPAIR_COUNT=3 NVME_TIMEOUT_US=8000000 NVMF_MAX_SUBSYSTEMS=50000 -HA_JM_COUNT=3 KATO=10000 ACK_TO=11 BDEV_RETRY=0 @@ -225,4 +224,5 @@ def get_config_var(name, default=None): qos_class_meta_and_migration_weight_percent = 25 -MIG_PARALLEL_JOBS = 64 \ No newline at end of file +MIG_PARALLEL_JOBS = 64 +MIG_JOB_SIZE = 64 diff --git a/simplyblock_core/controllers/health_controller.py b/simplyblock_core/controllers/health_controller.py index 0180172c1..7c8b95036 100644 --- a/simplyblock_core/controllers/health_controller.py +++ b/simplyblock_core/controllers/health_controller.py @@ -18,7 +18,7 @@ logger = utils.get_logger(__name__) -def check_bdev(name, *, rpc_client=None, bdev_names=None): +def check_bdev(name, *, rpc_client=None, bdev_names=None) -> bool: present = ( ((bdev_names is not None) and (name in bdev_names)) or (rpc_client is not None and (rpc_client.get_bdevs(name) is not None)) @@ -27,7 +27,7 @@ def check_bdev(name, *, rpc_client=None, bdev_names=None): return present -def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None): +def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None) -> bool: if rpc_client: subsystem = subsystems[0] if (subsystems := rpc_client.subsystem_list(nqn)) is not None else None elif nqns: @@ -59,7 +59,7 @@ def check_subsystem(nqn, *, rpc_client=None, nqns=None, ns_uuid=None): for listener in listeners: logger.info(f"Checking listener {listener['traddr']}:{listener['trsvcid']} ... ok") - return bool(listeners) and namespaces + return bool(listeners) and bool(namespaces) def check_cluster(cluster_id): @@ -128,43 +128,35 @@ def _check_node_api(ip): return False -def _check_spdk_process_up(ip, rpc_port, cluster_id): - try: - snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2) - logger.debug(f"Node API={ip}:5000") - is_up, _ = snode_api.spdk_process_is_up(rpc_port, cluster_id) - logger.debug(f"SPDK is {is_up}") - return is_up - except Exception as e: - logger.debug(e) - return False - - -def _check_port_on_node(snode, port_id): - try: - fw_api = FirewallClient(snode, timeout=5, retry=2) - iptables_command_output, _ = fw_api.get_firewall(snode.rpc_port) - if type(iptables_command_output) is str: - iptables_command_output = [iptables_command_output] - for rules in iptables_command_output: - result = jc.parse('iptables', rules) - for chain in result: - if chain['chain'] in ["INPUT", "OUTPUT"]: # type: ignore - for rule in chain['rules']: # type: ignore - if str(port_id) in rule['options']: # type: ignore - action = rule['target'] # type: ignore - if action in ["DROP"]: - return False - - # check RDMA port block - if snode.active_rdma: - rdma_fw_port_list = snode.rpc_client().nvmf_get_blocked_ports_rdma() - if port_id in rdma_fw_port_list: - return False +def _check_spdk_process_up(ip, rpc_port): + snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2) + logger.debug(f"Node API={ip}:5000") + is_up, _ = snode_api.spdk_process_is_up(rpc_port) + logger.debug(f"SPDK is {is_up}") + return is_up + + +def check_port_on_node(snode, port_id): + fw_api = FirewallClient(snode, timeout=5, retry=2) + iptables_command_output, _ = fw_api.get_firewall(snode.rpc_port) + if type(iptables_command_output) is str: + iptables_command_output = [iptables_command_output] + for rules in iptables_command_output: + result = jc.parse('iptables', rules) + for chain in result: + if chain['chain'] in ["INPUT", "OUTPUT"]: # type: ignore + for rule in chain['rules']: # type: ignore + if str(port_id) in rule['options']: # type: ignore + action = rule['target'] # type: ignore + if action in ["DROP"]: + return False + + # check RDMA port block + if snode.active_rdma: + rdma_fw_port_list = snode.rpc_client().nvmf_get_blocked_ports_rdma() + if port_id in rdma_fw_port_list: + return False - return True - except Exception as e: - logger.error(e) return True @@ -175,7 +167,7 @@ def _check_node_ping(ip): else: return False -def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns=None): +def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns=None) -> bool: if not node.hublvol: logger.error(f"Node {node.get_id()} does not have a hublvol") return False @@ -235,15 +227,17 @@ def _check_node_hublvol(node: StorageNode, node_bdev_names=None, node_lvols_nqns passed = False else: lvs_info_dict.append({"Key": k, "Value": v, "expected": " "}) - for line in utils.print_table(lvs_info_dict).splitlines(): - logger.info(line) + if not passed: + for line in utils.print_table(lvs_info_dict).splitlines(): + logger.info(line) except Exception as e: logger.exception(e) + return False return passed -def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=None, auto_fix=False): +def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=None, auto_fix=False) -> bool: db_controller = DBController() try: primary_node = db_controller.get_storage_node_by_id(node.lvstore_stack_secondary_1) @@ -294,6 +288,16 @@ def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=N passed = bool(ret) logger.info(f"Checking controller: {primary_node.hublvol.bdev_name} ... {passed}") + node_bdev = {} + ret = rpc_client.get_bdevs() + if ret: + for b in ret: + node_bdev[b['name']] = b + for al in b['aliases']: + node_bdev[al]= b + else: + node_bdev = [] + passed &= check_bdev(primary_node.hublvol.get_remote_bdev_name(), bdev_names=node_bdev) if not passed: return False @@ -331,20 +335,19 @@ def _check_sec_node_hublvol(node: StorageNode, node_bdev=None, node_lvols_nqns=N else: lvs_info_dict.append({"Key": k, "Value": v, "expected": " "}) - for line in utils.print_table(lvs_info_dict).splitlines(): - logger.info(line) + if not passed: + for line in utils.print_table(lvs_info_dict).splitlines(): + logger.info(line) except Exception as e: logger.exception(e) + return False return passed def _check_node_lvstore( - lvstore_stack, node, auto_fix=False, node_bdev_names=None, stack_src_node=None): + lvstore_stack, node, auto_fix=False, node_bdev_names=None, stack_src_node=None) -> bool: db_controller = DBController() - lvstore_check = True logger.info(f"Checking distr stack on node : {node.get_id()}") - rpc_client = RPCClient( - node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=5, retry=1) cluster = db_controller.get_cluster_by_id(node.cluster_id) if cluster.status not in [Cluster.STATUS_ACTIVE, Cluster.STATUS_DEGRADED, Cluster.STATUS_READONLY]: auto_fix = False @@ -367,7 +370,12 @@ def _check_node_lvstore( node_distribs_list = bdev["distribs_list"] if not node_bdev_names: - ret = rpc_client.get_bdevs() + try: + ret = node.rpc_client().get_bdevs() + except Exception as e: + logger.info(e) + return False + if ret: node_bdev_names = [b['name'] for b in ret] else: @@ -393,22 +401,34 @@ def _check_node_lvstore( for jm in jm_names: logger.info(jm) logger.info("Checking Distr map ...") - ret = rpc_client.distr_get_cluster_map(distr) + try: + ret = node.rpc_client().distr_get_cluster_map(distr) + except Exception as e: + logger.info(f"Failed to get cluster map: {e}") + return False if not ret: logger.error("Failed to get cluster map") - lvstore_check = False + return False else: results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices) if results: - logger.info(utils.print_table(results)) logger.info(f"Checking Distr map ... {is_passed}") - if not is_passed and auto_fix: + if is_passed: + continue + + elif not auto_fix: + return False + + else: # is_passed is False and auto_fix is True + logger.info(utils.print_table(results)) for result in results: if result['Results'] == 'failed': if result['Kind'] == "Device": if result['Found Status']: dev = db_controller.get_storage_device_by_id(result['UUID']) - if dev.status == NVMeDevice.STATUS_ONLINE: + dev_node = db_controller.get_storage_node_by_id(dev.node_id) + if dev.status == NVMeDevice.STATUS_ONLINE and dev_node.status in [ + StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: try: remote_bdev = storage_node_ops.connect_device( f"remote_{dev.alceml_bdev}", dev, node, @@ -435,37 +455,51 @@ def _check_node_lvstore( distr_controller.send_dev_status_event(dev, dev.status, node) except Exception as e: logger.error(f"Failed to connect to {dev.get_id()}: {e}") + else: + distr_controller.send_dev_status_event(dev, dev.status, node) + if result['Kind'] == "Node": n = db_controller.get_storage_node_by_id(result['UUID']) distr_controller.send_node_status_event(n, n.status, node) - ret = rpc_client.distr_get_cluster_map(distr) + + try: + ret = node.rpc_client().distr_get_cluster_map(distr) + except Exception as e: + logger.error(e) + return False if not ret: logger.error("Failed to get cluster map") - lvstore_check = False + return False else: results, is_passed = distr_controller.parse_distr_cluster_map(ret, nodes, devices) logger.info(f"Checking Distr map ... {is_passed}") + if not is_passed: + return False else: logger.error("Failed to parse distr cluster map") - lvstore_check &= is_passed + return False else: logger.info(f"Checking distr bdev : {distr} ... not found") - lvstore_check = False + return False if raid: if raid in node_bdev_names: logger.info(f"Checking raid bdev: {raid} ... ok") else: logger.info(f"Checking raid bdev: {raid} ... not found") - lvstore_check = False + return False if bdev_lvstore: - ret = rpc_client.bdev_lvol_get_lvstores(bdev_lvstore) + try: + ret = node.rpc_client().bdev_lvol_get_lvstores(bdev_lvstore) + except Exception as e: + logger.error(e) + return False if ret: logger.info(f"Checking lvstore: {bdev_lvstore} ... ok") else: logger.info(f"Checking lvstore: {bdev_lvstore} ... not found") - lvstore_check = False - return lvstore_check + return False + return True def check_node(node_id, with_devices=True): db_controller = DBController() @@ -508,13 +542,19 @@ def check_node(node_id, with_devices=True): if snode.lvstore_stack_secondary_1: try: n = db_controller.get_storage_node_by_id(snode.lvstore_stack_secondary_1) - lvol_port_check = _check_port_on_node(snode, n.lvol_subsys_port) + lvol_port_check = check_port_on_node(snode, n.lvol_subsys_port) logger.info(f"Check: node {snode.mgmt_ip}, port: {n.lvol_subsys_port} ... {lvol_port_check}") except KeyError: - pass + logger.error("node not found") + except Exception: + logger.error("Check node port failed, connection error") + if not snode.is_secondary_node: - lvol_port_check = _check_port_on_node(snode, snode.lvol_subsys_port) - logger.info(f"Check: node {snode.mgmt_ip}, port: {snode.lvol_subsys_port} ... {lvol_port_check}") + try: + lvol_port_check = check_port_on_node(snode, snode.lvol_subsys_port) + logger.info(f"Check: node {snode.mgmt_ip}, port: {snode.lvol_subsys_port} ... {lvol_port_check}") + except Exception: + logger.error("Check node port failed, connection error") is_node_online = ping_check and node_api_check and node_rpc_check diff --git a/simplyblock_core/controllers/tasks_controller.py b/simplyblock_core/controllers/tasks_controller.py index b7c434f63..dab539943 100644 --- a/simplyblock_core/controllers/tasks_controller.py +++ b/simplyblock_core/controllers/tasks_controller.py @@ -100,11 +100,13 @@ def add_device_mig_task(device_id_list, cluster_id): device = db.get_storage_device_by_id(device_id_list[0]) tasks = db.get_job_tasks(cluster_id) + master_task = None for task in tasks: if task.function_name == JobSchedule.FN_BALANCING_AFTER_NODE_RESTART : if task.status != JobSchedule.STATUS_DONE and task.canceled is False: - logger.info(f"Task found, skip adding new task: {task.get_id()}") - return False + logger.info("Master task found, skip adding new master task") + master_task = task + break for node in db.get_storage_nodes_by_cluster_id(cluster_id): if node.status == StorageNode.STATUS_REMOVED: @@ -117,16 +119,19 @@ def add_device_mig_task(device_id_list, cluster_id): if task_id: sub_tasks.append(task_id) if sub_tasks: - task_obj = JobSchedule() - task_obj.uuid = str(uuid.uuid4()) - task_obj.cluster_id = cluster_id - task_obj.date = int(time.time()) - task_obj.function_name = JobSchedule.FN_BALANCING_AFTER_NODE_RESTART - task_obj.sub_tasks = sub_tasks - task_obj.status = JobSchedule.STATUS_NEW - task_obj.write_to_db(db.kv_store) - tasks_events.task_create(task_obj) - + if master_task: + master_task.sub_tasks.extend(sub_tasks) + master_task.write_to_db() + else: + task_obj = JobSchedule() + task_obj.uuid = str(uuid.uuid4()) + task_obj.cluster_id = cluster_id + task_obj.date = int(time.time()) + task_obj.function_name = JobSchedule.FN_BALANCING_AFTER_NODE_RESTART + task_obj.sub_tasks = sub_tasks + task_obj.status = JobSchedule.STATUS_NEW + task_obj.write_to_db(db.kv_store) + tasks_events.task_create(task_obj) return True @@ -155,13 +160,15 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs): return False data = [] - tasks = db.get_job_tasks(cluster_id, reverse=True, limit=limit) + tasks = db.get_job_tasks(cluster_id, reverse=True) tasks.reverse() if is_json is True: for t in tasks: if t.function_name == JobSchedule.FN_DEV_MIG: continue data.append(t.get_clean_dict()) + if len(data)+1 > limit > 0: + return json.dumps(data, indent=2) return json.dumps(data, indent=2) for task in tasks: @@ -171,7 +178,7 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs): retry = f"{task.retry}/{task.max_retry}" else: retry = f"{task.retry}" - + logger.debug(task) upd = task.updated_at if upd: try: @@ -197,6 +204,8 @@ def list_tasks(cluster_id, is_json=False, limit=50, **kwargs): "Result": task.function_result, "Updated At": upd or "", }) + if len(data)+1 > limit > 0: + return utils.print_table(data) return utils.print_table(data) @@ -239,6 +248,7 @@ def get_subtasks(master_task_id): except Exception as e: logger.error(e) + logger.debug(sub_task) data.append({ "Task ID": sub_task.uuid, "Node ID / Device ID": f"{sub_task.node_id}\n{sub_task.device_id}", @@ -308,7 +318,8 @@ def add_new_device_mig_task(device_id): def add_node_add_task(cluster_id, function_params): - return _add_task(JobSchedule.FN_NODE_ADD, cluster_id, "", "", function_params=function_params) + return _add_task(JobSchedule.FN_NODE_ADD, cluster_id, "", "", + function_params=function_params, max_retry=11) def get_active_node_tasks(cluster_id, node_id): @@ -339,7 +350,7 @@ def get_new_device_mig_task(cluster_id, node_id, distr_name, dev_id=None): def get_device_mig_task(cluster_id, node_id, device_id, distr_name): tasks = db.get_job_tasks(cluster_id) for task in tasks: - if task.function_name == JobSchedule.FN_DEV_MIG and task.node_id == node_id and task.device_id == device_id: + if task.function_name == JobSchedule.FN_DEV_MIG and task.node_id == node_id: if task.status != JobSchedule.STATUS_DONE and task.canceled is False \ and "distr_name" in task.function_params and task.function_params["distr_name"] == distr_name: return task.uuid diff --git a/simplyblock_core/distr_controller.py b/simplyblock_core/distr_controller.py index 57203ebb1..5a169cbd8 100644 --- a/simplyblock_core/distr_controller.py +++ b/simplyblock_core/distr_controller.py @@ -46,8 +46,9 @@ def send_node_status_event(node, node_status, target_node=None): continue logger.info(f"Sending to: {node.get_id()}") rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=3, retry=1) - ret = rpc_client.distr_status_events_update(events) - if not ret: + try: + rpc_client.distr_status_events_update(events) + except Exception: logger.warning("Failed to send event update") @@ -95,9 +96,9 @@ def send_dev_status_event(device, status, target_node=None): "storage_ID": storage_ID, "status": dev_status}]} logger.debug(f"Sending event updates, device: {storage_ID}, status: {dev_status}, node: {node.get_id()}") - rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=3, retry=1) - ret = rpc_client.distr_status_events_update(events) - if not ret: + try: + node.rpc_client(timeout=3, retry=1).distr_status_events_update(events) + except Exception: logger.warning("Failed to send event update") @@ -259,38 +260,26 @@ def parse_distr_cluster_map(map_string, nodes=None, devices=None): return results, passed -def send_cluster_map_to_node(node): +def send_cluster_map_to_node(node: StorageNode): db_controller = DBController() snodes = db_controller.get_storage_nodes_by_cluster_id(node.cluster_id) - rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=10) - - # if node.lvstore_stack_secondary_1: - # for snode in db_controller.get_primary_storage_nodes_by_secondary_node_id(node.get_id()): - # for bdev in snode.lvstore_stack: - # if bdev['type'] == "bdev_distr": - # cluster_map_data = get_distr_cluster_map(snodes, node, bdev["name"]) - # ret = rpc_client.distr_send_cluster_map(cluster_map_data) - # if not ret: - # logger.error("Failed to send cluster map") - # return False - # return True - # else: cluster_map_data = get_distr_cluster_map(snodes, node) - ret = rpc_client.distr_send_cluster_map(cluster_map_data) - if not ret: + try: + node.rpc_client(timeout=10).distr_send_cluster_map(cluster_map_data) + except Exception: logger.error("Failed to send cluster map") logger.info(cluster_map_data) return False return True -def send_cluster_map_to_distr(node, distr_name): +def send_cluster_map_to_distr(node: StorageNode, distr_name: str): db_controller = DBController() snodes = db_controller.get_storage_nodes_by_cluster_id(node.cluster_id) - rpc_client = RPCClient(node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=10) cluster_map_data = get_distr_cluster_map(snodes, node, distr_name) - ret = rpc_client.distr_send_cluster_map(cluster_map_data) - if not ret: + try: + node.rpc_client(timeout=10).distr_send_cluster_map(cluster_map_data) + except Exception: logger.error("Failed to send cluster map") logger.info(cluster_map_data) return False @@ -301,14 +290,13 @@ def send_cluster_map_add_node(snode, target_node): if target_node.status != StorageNode.STATUS_ONLINE: return False logger.info(f"Sending to: {target_node.get_id()}") - rpc_client = RPCClient(target_node.mgmt_ip, target_node.rpc_port, target_node.rpc_username, target_node.rpc_password, timeout=5) - cluster_map_data = get_distr_cluster_map([snode], target_node) cl_map = { "map_cluster": cluster_map_data['map_cluster'], "map_prob": cluster_map_data['map_prob']} - ret = rpc_client.distr_add_nodes(cl_map) - if not ret: + try: + target_node.rpc_client(timeout=10).distr_add_nodes(cl_map) + except Exception: logger.error("Failed to send cluster map") return False return True @@ -362,8 +350,9 @@ def send_cluster_map_add_device(device: NVMeDevice, target_node: StorageNode): "weight": dev_w_gib, }} } - ret = rpc_client.distr_add_devices(cl_map) - if not ret: + try: + rpc_client.distr_add_devices(cl_map) + except Exception: logger.error("Failed to send cluster map") return False return True diff --git a/simplyblock_core/fw_api_client.py b/simplyblock_core/fw_api_client.py index d17255c80..074bcc3dc 100644 --- a/simplyblock_core/fw_api_client.py +++ b/simplyblock_core/fw_api_client.py @@ -41,7 +41,7 @@ def _request(self, method, path, payload=None): response = self.session.request(method, self.url+path, data=data, timeout=self.timeout, params=params) except Exception as e: - raise e + raise FirewallClientException(str(e)) logger.debug("Response: status_code: %s, content: %s", response.status_code, response.content) diff --git a/simplyblock_core/rpc_client.py b/simplyblock_core/rpc_client.py index ce48e1796..d8f51fb43 100644 --- a/simplyblock_core/rpc_client.py +++ b/simplyblock_core/rpc_client.py @@ -922,7 +922,7 @@ def distr_migration_status(self, name): params = {"name": name} return self._request("distr_migration_status", params) - def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=False, job_size=64, jobs=64): + def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=False, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS): params = { "name": name, "storage_ID": storage_ID, @@ -935,7 +935,7 @@ def distr_migration_failure_start(self, name, storage_ID, qos_high_priority=Fals params["jobs"] = jobs return self._request("distr_migration_failure_start", params) - def distr_migration_expansion_start(self, name, qos_high_priority=False, job_size=64, jobs=64): + def distr_migration_expansion_start(self, name, qos_high_priority=False, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS): params = { "name": name, } diff --git a/simplyblock_core/services/capacity_and_stats_collector.py b/simplyblock_core/services/capacity_and_stats_collector.py index 022dd84b5..07a850edd 100644 --- a/simplyblock_core/services/capacity_and_stats_collector.py +++ b/simplyblock_core/services/capacity_and_stats_collector.py @@ -4,7 +4,6 @@ from simplyblock_core import constants, db_controller, utils from simplyblock_core.models.nvme_device import NVMeDevice from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.rpc_client import RPCClient from simplyblock_core.models.stats import DeviceStatObject, NodeStatObject, ClusterStatObject logger = utils.get_logger(__name__) @@ -62,17 +61,17 @@ def add_device_stats(cl, device, capacity_dict, stats_dict): if last_record: time_diff = (now - last_record.date) if time_diff > 0: - data['read_bytes_ps'] = int((data['read_bytes'] - last_record['read_bytes']) / time_diff) - data['read_io_ps'] = int((data['read_io'] - last_record['read_io']) / time_diff) - data['read_latency_ps'] = int((data['read_latency_ticks'] - last_record['read_latency_ticks']) / time_diff) + data['read_bytes_ps'] = abs(int((data['read_bytes'] - last_record['read_bytes']) / time_diff)) + data['read_io_ps'] = abs(int((data['read_io'] - last_record['read_io']) / time_diff)) + data['read_latency_ps'] = abs(int((data['read_latency_ticks'] - last_record['read_latency_ticks']) / time_diff)) - data['write_bytes_ps'] = int((data['write_bytes'] - last_record['write_bytes']) / time_diff) - data['write_io_ps'] = int((data['write_io'] - last_record['write_io']) / time_diff) - data['write_latency_ps'] = int((data['write_latency_ticks'] - last_record['write_latency_ticks']) / time_diff) + data['write_bytes_ps'] = abs(int((data['write_bytes'] - last_record['write_bytes']) / time_diff)) + data['write_io_ps'] = abs(int((data['write_io'] - last_record['write_io']) / time_diff)) + data['write_latency_ps'] = abs(int((data['write_latency_ticks'] - last_record['write_latency_ticks']) / time_diff)) - data['unmap_bytes_ps'] = int((data['unmap_bytes'] - last_record['unmap_bytes']) / time_diff) - data['unmap_io_ps'] = int((data['unmap_io'] - last_record['unmap_io']) / time_diff) - data['unmap_latency_ps'] = int((data['unmap_latency_ticks'] - last_record['unmap_latency_ticks']) / time_diff) + data['unmap_bytes_ps'] = abs(int((data['unmap_bytes'] - last_record['unmap_bytes']) / time_diff)) + data['unmap_io_ps'] = abs(int((data['unmap_io'] - last_record['unmap_io']) / time_diff)) + data['unmap_latency_ps'] = abs(int((data['unmap_latency_ticks'] - last_record['unmap_latency_ticks']) / time_diff)) else: logger.warning("last record not found") @@ -188,15 +187,15 @@ def add_cluster_stats(cl, records): logger.error("No devices found in node: %s", node.get_id()) continue - rpc_client = RPCClient( - node.mgmt_ip, node.rpc_port, - node.rpc_username, node.rpc_password, - timeout=5, retry=2) - + rpc_client = node.rpc_client(timeout=5, retry=2) node_devs_stats = {} - ret = rpc_client.get_lvol_stats() - if ret: - node_devs_stats = {b['name']: b for b in ret['bdevs']} + try: + ret = rpc_client.get_lvol_stats() + if ret: + node_devs_stats = {b['name']: b for b in ret['bdevs']} + except Exception as e: + logger.error(e) + continue devices_records = [] for device in node.nvme_devices: @@ -204,7 +203,11 @@ def add_cluster_stats(cl, records): if device.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, NVMeDevice.STATUS_CANNOT_ALLOCATE]: logger.info(f"Device is skipped: {device.get_id()} status: {device.status}") continue - capacity_dict = rpc_client.alceml_get_capacity(device.alceml_name) + try: + capacity_dict = rpc_client.alceml_get_capacity(device.alceml_name) + except Exception as e: + logger.error(e) + continue if device.nvme_bdev in node_devs_stats: stats_dict = node_devs_stats[device.nvme_bdev] record = add_device_stats(cl, device, capacity_dict, stats_dict) diff --git a/simplyblock_core/services/health_check_service.py b/simplyblock_core/services/health_check_service.py index f822e199f..fc7be7fb1 100644 --- a/simplyblock_core/services/health_check_service.py +++ b/simplyblock_core/services/health_check_service.py @@ -45,6 +45,7 @@ def set_device_health_check(cluster_id, device, health_check_status): def check_node(snode, logger): + snode = db.get_storage_node_by_id(snode.get_id()) logger.info("Node: %s, status %s", snode.get_id(), snode.status) if snode.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_UNREACHABLE, @@ -139,7 +140,7 @@ def check_node(snode, logger): for remote_device in snode.remote_devices: org_dev = db.get_storage_device_by_id(remote_device.get_id()) org_node = db.get_storage_node_by_id(remote_device.node_id) - if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status == StorageNode.STATUS_ONLINE: + if org_dev.status == NVMeDevice.STATUS_ONLINE and org_node.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: if health_controller.check_bdev(remote_device.remote_bdev, bdev_names=node_bdev_names): connected_devices.append(remote_device.get_id()) continue @@ -242,11 +243,14 @@ def check_node(snode, logger): ports.append(second_node_1.lvol_subsys_port) for port in ports: - lvol_port_check = health_controller._check_port_on_node(snode, port) - logger.info( - f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}") - if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED: - tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port) + try: + lvol_port_check = health_controller.check_port_on_node(snode, port) + logger.info( + f"Check: node {snode.mgmt_ip}, port: {port} ... {lvol_port_check}") + if not lvol_port_check and snode.status != StorageNode.STATUS_SUSPENDED: + tasks_controller.add_port_allow_task(snode.cluster_id, snode.get_id(), port) + except Exception: + logger.error("Check node port failed, connection error") health_check_status = is_node_online and node_devices_check and node_remote_devices_check and lvstore_check set_node_health_check(snode, bool(health_check_status)) @@ -260,7 +264,10 @@ def loop_for_node(snode): logger_handler.setFormatter(logging.Formatter(f'%(asctime)s: node:{snode.mgmt_ip} %(levelname)s: %(message)s')) logger.addHandler(logger_handler) while True: - check_node(snode, logger) + try: + check_node(snode, logger) + except Exception as e: + logger.error(e) time.sleep(constants.HEALTH_CHECK_INTERVAL_SEC) diff --git a/simplyblock_core/services/lvol_monitor.py b/simplyblock_core/services/lvol_monitor.py index 8486f3a32..ddb845169 100644 --- a/simplyblock_core/services/lvol_monitor.py +++ b/simplyblock_core/services/lvol_monitor.py @@ -159,195 +159,201 @@ def process_lvol_delete_try_again(lvol): lvol.write_to_db() -# get DB controller -db = db_controller.DBController() +def check_node(snode): + node_bdev_names = [] + node_lvols_nqns = {} + sec_node_bdev_names = {} + sec_node_lvols_nqns = {} + sec_node = None -logger.info("Starting LVol monitor...") -while True: + if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + node_bdevs = snode.rpc_client().get_bdevs() + if node_bdevs: + node_bdev_names = [b['name'] for b in node_bdevs] + for bdev in node_bdevs: + if "aliases" in bdev and bdev["aliases"]: + node_bdev_names.extend(bdev['aliases']) + ret = snode.rpc_client().subsystem_list() + if ret: + for sub in ret: + node_lvols_nqns[sub['nqn']] = sub - for cluster in db.get_clusters(): + if snode.secondary_node_id: + sec_node = db.get_storage_node_by_id(snode.secondary_node_id) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + sec_rpc_client = RPCClient( + sec_node.mgmt_ip, sec_node.rpc_port, + sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) + ret = sec_rpc_client.get_bdevs() + if ret: + for bdev in ret: + sec_node_bdev_names[bdev['name']] = bdev + + ret = sec_rpc_client.subsystem_list() + if ret: + for sub in ret: + sec_node_lvols_nqns[sub['nqn']] = sub + + for lvol in db.get_lvols_by_node_id(snode.get_id()): + + if lvol.status == LVol.STATUS_IN_CREATION: + continue + + if lvol.status == lvol.STATUS_IN_DELETION: + # check leadership + leader_node = None + snode = db.get_storage_node_by_id(snode.get_id()) + if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if not ret: + raise Exception("Failed to get LVol info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = snode + + if not leader_node and sec_node: + ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if not ret: + raise Exception("Failed to get LVol info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = sec_node + + if not leader_node: + raise Exception("Failed to get leader node") + + if lvol.deletion_status == "" or lvol.deletion_status != leader_node.get_id(): + lvol_controller.delete_lvol_from_node(lvol.get_id(), leader_node.get_id()) + time.sleep(3) + + try: + ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status( + f"{lvol.lvs_name}/{lvol.lvol_bdev}") + except Exception as e: + logger.error(e) + # timeout detected, check other node + break + + if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed + process_lvol_delete_finish(lvol) + + elif ret == 1: # Async lvol deletion is in progress or queued + logger.info(f"LVol deletion in progress, id: {lvol.get_id()}") + pre_lvol_delete_rebalance() + + elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Async deletion is done, but leadership has changed (sync deletion is now blocked)") + + elif ret == 4: # No async delete request exists for this lvol + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("No async delete request exists for this lvol") + lvol = db.get_lvol_by_id(lvol.get_id()) + lvol.io_error = True + lvol.write_to_db() + set_lvol_status(lvol, LVol.STATUS_OFFLINE) + + elif ret == -1: # Operation not permitted + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Operation not permitted") + lvol = db.get_lvol_by_id(lvol.get_id()) + lvol.io_error = True + lvol.write_to_db() + set_lvol_status(lvol, LVol.STATUS_OFFLINE) + + elif ret == -2: # No such file or directory + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("No such file or directory") + process_lvol_delete_finish(lvol) + + elif ret == -5: # I/O error + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("I/O error") + process_lvol_delete_try_again(lvol) + + elif ret == -11: # Try again + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Try again") + process_lvol_delete_try_again(lvol) + + elif ret == -12: # Out of memory + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Out of memory") + process_lvol_delete_try_again(lvol) + + elif ret == -16: # Device or resource busy + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Device or resource busy") + process_lvol_delete_try_again(lvol) + + elif ret == -19: # No such device + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Finishing lvol delete") + process_lvol_delete_finish(lvol) + + elif ret == -35: # Leadership changed + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Leadership changed") + process_lvol_delete_try_again(lvol) + + elif ret == -36: # Failed to update lvol for deletion + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Failed to update lvol for deletion") + process_lvol_delete_try_again(lvol) + + else: # Failed to update lvol for deletion + logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") + logger.error("Failed to update lvol for deletion") - if cluster.status in [Cluster.STATUS_INACTIVE, Cluster.STATUS_UNREADY, Cluster.STATUS_IN_ACTIVATION]: - logger.warning(f"Cluster {cluster.get_id()} is in {cluster.status} state, skipping") continue - for snode in db.get_storage_nodes_by_cluster_id(cluster.get_id()): - node_bdev_names = [] - node_lvols_nqns = {} - sec_node_bdev_names = {} - sec_node_lvols_nqns = {} - sec_node = None + passed = True + ret = health_controller.check_lvol_on_node( + lvol.get_id(), lvol.node_id, node_bdev_names, node_lvols_nqns) + if not ret: + passed = False - if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - node_bdevs = snode.rpc_client().get_bdevs() - if node_bdevs: - node_bdev_names = [b['name'] for b in node_bdevs] - for bdev in node_bdevs: - if "aliases" in bdev and bdev["aliases"]: - node_bdev_names.extend(bdev['aliases']) - ret = snode.rpc_client().subsystem_list() - if ret: - for sub in ret: - node_lvols_nqns[sub['nqn']] = sub - - if snode.secondary_node_id: - sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: - sec_rpc_client = RPCClient( - sec_node.mgmt_ip, sec_node.rpc_port, - sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) - ret = sec_rpc_client.get_bdevs() - if ret: - for bdev in ret: - sec_node_bdev_names[bdev['name']] = bdev - - ret = sec_rpc_client.subsystem_list() - if ret: - for sub in ret: - sec_node_lvols_nqns[sub['nqn']] = sub - - for lvol in db.get_lvols_by_node_id(snode.get_id()): - - if lvol.status == LVol.STATUS_IN_CREATION: - continue - - if lvol.status == lvol.STATUS_IN_DELETION: - # check leadership - leader_node = None - snode = db.get_storage_node_by_id(snode.get_id()) - if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if not ret: - raise Exception("Failed to get LVol info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = snode - - if not leader_node and sec_node: - ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if not ret: - raise Exception("Failed to get LVol info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = sec_node - - if not leader_node: - raise Exception("Failed to get leader node") - - if lvol.deletion_status == "" or lvol.deletion_status != leader_node.get_id(): - lvol_controller.delete_lvol_from_node(lvol.get_id(), leader_node.get_id()) - time.sleep(3) - - try: - ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status( - f"{lvol.lvs_name}/{lvol.lvol_bdev}") - except Exception as e: - logger.error(e) - # timeout detected, check other node - break - - if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed - process_lvol_delete_finish(lvol) - - elif ret == 1: # Async lvol deletion is in progress or queued - logger.info(f"LVol deletion in progress, id: {lvol.get_id()}") - pre_lvol_delete_rebalance() - - elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Async deletion is done, but leadership has changed (sync deletion is now blocked)") - - elif ret == 4: # No async delete request exists for this lvol - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("No async delete request exists for this lvol") - lvol = db.get_lvol_by_id(lvol.get_id()) - lvol.io_error = True - lvol.write_to_db() - set_lvol_status(lvol, LVol.STATUS_OFFLINE) - - elif ret == -1: # Operation not permitted - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Operation not permitted") - lvol = db.get_lvol_by_id(lvol.get_id()) - lvol.io_error = True - lvol.write_to_db() - set_lvol_status(lvol, LVol.STATUS_OFFLINE) - - elif ret == -2: # No such file or directory - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("No such file or directory") - process_lvol_delete_finish(lvol) - - elif ret == -5: # I/O error - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("I/O error") - process_lvol_delete_try_again(lvol) - - elif ret == -11: # Try again - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Try again") - process_lvol_delete_try_again(lvol) - - elif ret == -12: # Out of memory - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Out of memory") - process_lvol_delete_try_again(lvol) - - elif ret == -16: # Device or resource busy - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Device or resource busy") - process_lvol_delete_try_again(lvol) - - elif ret == -19: # No such device - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Finishing lvol delete") - process_lvol_delete_finish(lvol) - - elif ret == -35: # Leadership changed - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Leadership changed") - process_lvol_delete_try_again(lvol) - - elif ret == -36: # Failed to update lvol for deletion - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Failed to update lvol for deletion") - process_lvol_delete_try_again(lvol) - - else: # Failed to update lvol for deletion - logger.info(f"LVol deletion error, id: {lvol.get_id()}, error code: {ret}") - logger.error("Failed to update lvol for deletion") - - continue - - passed = True + if lvol.ha_type == "ha": + sec_node = db.get_storage_node_by_id(snode.secondary_node_id) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: ret = health_controller.check_lvol_on_node( - lvol.get_id(), lvol.node_id, node_bdev_names, node_lvols_nqns) + lvol.get_id(), snode.secondary_node_id, sec_node_bdev_names, sec_node_lvols_nqns) if not ret: passed = False + else: + passed = True + + if snode.lvstore_status == "ready": - if lvol.ha_type == "ha": - sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - ret = health_controller.check_lvol_on_node( - lvol.get_id(), snode.secondary_node_id, sec_node_bdev_names, sec_node_lvols_nqns) - if not ret: - passed = False - else: - passed = True + logger.info(f"LVol: {lvol.get_id()}, is healthy: {passed}") + set_lvol_health_check(lvol, passed) + if passed: + set_lvol_status(lvol, LVol.STATUS_ONLINE) - if snode.lvstore_status == "ready": + if snode.lvstore_status == "ready": - logger.info(f"LVol: {lvol.get_id()}, is healthy: {passed}") - set_lvol_health_check(lvol, passed) - if passed: - set_lvol_status(lvol, LVol.STATUS_ONLINE) + for snap in db.get_snapshots_by_node_id(snode.get_id()): + present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) + set_snapshot_health_check(snap, present) + + + +# get DB controller +db = db_controller.DBController() - if snode.lvstore_status == "ready": +logger.info("Starting LVol monitor...") +while True: - for snap in db.get_snapshots_by_node_id(snode.get_id()): - present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) - set_snapshot_health_check(snap, present) + for cluster in db.get_clusters(): + if cluster.status in [Cluster.STATUS_INACTIVE, Cluster.STATUS_UNREADY, Cluster.STATUS_IN_ACTIVATION]: + logger.warning(f"Cluster {cluster.get_id()} is in {cluster.status} state, skipping") + continue + for snode in db.get_storage_nodes_by_cluster_id(cluster.get_id()): + try: + check_node(snode) + except Exception as e: + logger.error(e) time.sleep(constants.LVOL_MONITOR_INTERVAL_SEC) diff --git a/simplyblock_core/services/lvol_stat_collector.py b/simplyblock_core/services/lvol_stat_collector.py index 1933b6703..18f09d4ce 100644 --- a/simplyblock_core/services/lvol_stat_collector.py +++ b/simplyblock_core/services/lvol_stat_collector.py @@ -7,7 +7,6 @@ from simplyblock_core.models.lvol_model import LVol from simplyblock_core.models.stats import LVolStatObject, PoolStatObject from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.rpc_client import RPCClient logger = utils.get_logger(__name__) @@ -212,68 +211,66 @@ def add_pool_stats(pool, records): continue if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + try: + rpc_client = snode.rpc_client(timeout=3, retry=2) + if snode.get_id() in all_node_bdev_names and all_node_bdev_names[snode.get_id()]: + node_bdev_names = all_node_bdev_names[snode.get_id()] + else: + node_bdevs = rpc_client.get_bdevs() + if node_bdevs: + node_bdev_names = {b['name']: b for b in node_bdevs} + all_node_bdev_names[snode.get_id()] = node_bdev_names - rpc_client = RPCClient( - snode.mgmt_ip, snode.rpc_port, - snode.rpc_username, snode.rpc_password, timeout=3, retry=2) - - if snode.get_id() in all_node_bdev_names and all_node_bdev_names[snode.get_id()]: - node_bdev_names = all_node_bdev_names[snode.get_id()] - else: - node_bdevs = rpc_client.get_bdevs() - if node_bdevs: - node_bdev_names = {b['name']: b for b in node_bdevs} - all_node_bdev_names[snode.get_id()] = node_bdev_names - - if snode.get_id() in all_node_lvols_nqns and all_node_lvols_nqns[snode.get_id()]: - node_lvols_nqns = all_node_lvols_nqns[snode.get_id()] - else: - ret = rpc_client.subsystem_list() - if ret: - node_lvols_nqns = {} - for sub in ret: - node_lvols_nqns[sub['nqn']] = sub - all_node_lvols_nqns[snode.get_id()] = node_lvols_nqns - - if snode.get_id() in all_node_lvols_stats and all_node_lvols_stats[snode.get_id()]: - node_lvols_stats = all_node_lvols_stats[snode.get_id()] - else: - ret = rpc_client.get_lvol_stats() - if ret: - node_lvols_stats = {} - for st in ret['bdevs']: - node_lvols_stats[st['name']] = st - all_node_lvols_stats[snode.get_id()] = node_lvols_stats - - if snode.secondary_node_id: - sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: - sec_rpc_client = RPCClient( - sec_node.mgmt_ip, sec_node.rpc_port, - sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) - - if sec_node.get_id() not in all_node_bdev_names or not all_node_bdev_names[sec_node.get_id()]: - ret = sec_rpc_client.get_bdevs() - if ret: - # node_bdev_names = {} - node_bdev_names = {b['name']: b for b in ret} - all_node_bdev_names[sec_node.get_id()] = node_bdev_names - - if sec_node.get_id() not in all_node_lvols_nqns or not all_node_lvols_nqns[sec_node.get_id()]: - ret = sec_rpc_client.subsystem_list() + if snode.get_id() in all_node_lvols_nqns and all_node_lvols_nqns[snode.get_id()]: + node_lvols_nqns = all_node_lvols_nqns[snode.get_id()] + else: + ret = rpc_client.subsystem_list() if ret: node_lvols_nqns = {} for sub in ret: node_lvols_nqns[sub['nqn']] = sub - all_node_lvols_nqns[sec_node.get_id()] = node_lvols_nqns + all_node_lvols_nqns[snode.get_id()] = node_lvols_nqns - if sec_node.get_id() not in all_node_lvols_stats or not all_node_lvols_stats[sec_node.get_id()]: - ret = sec_rpc_client.get_lvol_stats() + if snode.get_id() in all_node_lvols_stats and all_node_lvols_stats[snode.get_id()]: + node_lvols_stats = all_node_lvols_stats[snode.get_id()] + else: + ret = rpc_client.get_lvol_stats() if ret: - sec_node_lvols_stats = {} + node_lvols_stats = {} for st in ret['bdevs']: - sec_node_lvols_stats[st['name']] = st - all_node_lvols_stats[sec_node.get_id()] = sec_node_lvols_stats + node_lvols_stats[st['name']] = st + all_node_lvols_stats[snode.get_id()] = node_lvols_stats + except Exception as e: + logger.error(e) + + if snode.secondary_node_id: + sec_node = db.get_storage_node_by_id(snode.secondary_node_id) + if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: + try: + sec_rpc_client = sec_node.rpc_client(timeout=3, retry=2) + if sec_node.get_id() not in all_node_bdev_names or not all_node_bdev_names[sec_node.get_id()]: + ret = sec_rpc_client.get_bdevs() + if ret: + # node_bdev_names = {} + node_bdev_names = {b['name']: b for b in ret} + all_node_bdev_names[sec_node.get_id()] = node_bdev_names + if sec_node.get_id() not in all_node_lvols_nqns or not all_node_lvols_nqns[sec_node.get_id()]: + ret = sec_rpc_client.subsystem_list() + if ret: + node_lvols_nqns = {} + for sub in ret: + node_lvols_nqns[sub['nqn']] = sub + all_node_lvols_nqns[sec_node.get_id()] = node_lvols_nqns + + if sec_node.get_id() not in all_node_lvols_stats or not all_node_lvols_stats[sec_node.get_id()]: + ret = sec_rpc_client.get_lvol_stats() + if ret: + sec_node_lvols_stats = {} + for st in ret['bdevs']: + sec_node_lvols_stats[st['name']] = st + all_node_lvols_stats[sec_node.get_id()] = sec_node_lvols_stats + except Exception as e: + logger.error(e) for lvol in lvol_list: if lvol.status in [LVol.STATUS_IN_CREATION, LVol.STATUS_IN_DELETION]: diff --git a/simplyblock_core/services/snapshot_monitor.py b/simplyblock_core/services/snapshot_monitor.py index a99ed89f3..5006eb431 100644 --- a/simplyblock_core/services/snapshot_monitor.py +++ b/simplyblock_core/services/snapshot_monitor.py @@ -8,7 +8,6 @@ from simplyblock_core.controllers import health_controller, snapshot_events, tasks_controller from simplyblock_core.models.snapshot import SnapShot from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.rpc_client import RPCClient logger = utils.get_logger(__name__) @@ -95,6 +94,115 @@ def set_snap_offline(snap): sn.write_to_db() +def process_snap_delete(snap, snode): + # check leadership + leader_node = None + if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, + StorageNode.STATUS_DOWN]: + ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if not ret: + raise Exception("Failed to get LVol store info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = snode + + if not leader_node and sec_node: + ret = sec_node.rpc_client().bdev_lvol_get_lvstores(sec_node.lvstore) + if not ret: + raise Exception("Failed to get LVol store info") + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + leader_node = sec_node + + if not leader_node: + raise Exception("Failed to get leader node") + + if snap.deletion_status == "" or snap.deletion_status != leader_node.get_id(): + + ret, _ = leader_node.rpc_client().delete_lvol(snap.snap_bdev) + if not ret: + logger.error(f"Failed to delete snap from node: {snode.get_id()}") + return False + snap = db.get_snapshot_by_id(snap.get_id()) + snap.deletion_status = leader_node.get_id() + snap.write_to_db() + + time.sleep(3) + + try: + ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(snap.snap_bdev) + except Exception as e: + logger.error(e) + # timeout detected, check other node + return False + + if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed + process_snap_delete_finish(snap, leader_node) + + elif ret == 1: # Async lvol deletion is in progress or queued + logger.info(f"Snap deletion in progress, id: {snap.get_id()}") + + elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error( + "Async deletion is done, but leadership has changed (sync deletion is now blocked)") + + elif ret == 4: # No async delete request exists for this Snap + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("No async delete request exists for this snap") + set_snap_offline(snap) + + elif ret == -1: # Operation not permitted + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Operation not permitted") + process_snap_delete_try_again(snap) + + elif ret == -2: # No such file or directory + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("No such file or directory") + process_snap_delete_finish(snap, leader_node) + + elif ret == -5: # I/O error + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("I/O error") + process_snap_delete_try_again(snap) + + elif ret == -11: # Try again + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Try again") + process_snap_delete_try_again(snap) + + elif ret == -12: # Out of memory + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Out of memory") + process_snap_delete_try_again(snap) + + elif ret == -16: # Device or resource busy + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Device or resource busy") + process_snap_delete_try_again(snap) + + elif ret == -19: # No such device + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("No such device") + set_snap_offline(snap) + + elif ret == -35: # Leadership changed + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Leadership changed") + process_snap_delete_try_again(snap) + + elif ret == -36: # Failed to update lvol for deletion + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Failed to update snapshot for deletion") + process_snap_delete_try_again(snap) + + else: # Failed to update lvol for deletion + logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") + logger.error("Failed to update snapshot for deletion") + + + # get DB controller db = db_controller.DBController() @@ -109,159 +217,46 @@ def set_snap_offline(snap): for snode in db.get_storage_nodes_by_cluster_id(cluster.get_id()): node_bdev_names = [] - node_lvols_nqns = {} sec_node_bdev_names = {} - sec_node_lvols_nqns = {} sec_node = None if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: - - rpc_client = RPCClient( - snode.mgmt_ip, snode.rpc_port, - snode.rpc_username, snode.rpc_password, timeout=3, retry=2) - node_bdevs = rpc_client.get_bdevs() + rpc_client = snode.rpc_client(timeout=3, retry=2) + try: + node_bdevs = rpc_client.get_bdevs() + except Exception as e: + logger.error(e) + continue if node_bdevs: node_bdev_names = [b['name'] for b in node_bdevs] for bdev in node_bdevs: if "aliases" in bdev and bdev["aliases"]: node_bdev_names.extend(bdev['aliases']) - ret = rpc_client.subsystem_list() - if ret: - for sub in ret: - node_lvols_nqns[sub['nqn']] = sub - if snode.secondary_node_id: sec_node = db.get_storage_node_by_id(snode.secondary_node_id) - if sec_node and sec_node.status==StorageNode.STATUS_ONLINE: - sec_rpc_client = RPCClient( - sec_node.mgmt_ip, sec_node.rpc_port, - sec_node.rpc_username, sec_node.rpc_password, timeout=3, retry=2) - ret = sec_rpc_client.get_bdevs() + if sec_node and sec_node.status in [ + StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, StorageNode.STATUS_DOWN]: + sec_rpc_client = sec_node.rpc_client(timeout=3, retry=2) + try: + ret = sec_rpc_client.get_bdevs() + except Exception as e: + logger.error(e) + continue if ret: for bdev in ret: sec_node_bdev_names[bdev['name']] = bdev - ret = sec_rpc_client.subsystem_list() - if ret: - for sub in ret: - sec_node_lvols_nqns[sub['nqn']] = sub - - if snode.lvstore_status == "ready": - - for snap in db.get_snapshots_by_node_id(snode.get_id()): - if snap.status == SnapShot.STATUS_ONLINE: - - present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) + for snap in db.get_snapshots_by_node_id(snode.get_id()): + if snap.status == SnapShot.STATUS_ONLINE: + present = health_controller.check_bdev(snap.snap_bdev, bdev_names=node_bdev_names) + if snode.lvstore_status == "ready": set_snapshot_health_check(snap, present) - elif snap.status == SnapShot.STATUS_IN_DELETION: - - # check leadership - leader_node = None - if snode.status in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_SUSPENDED, - StorageNode.STATUS_DOWN]: - ret = snode.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if not ret: - raise Exception("Failed to get LVol store info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = snode - - if not leader_node and sec_node: - ret = sec_node.rpc_client().bdev_lvol_get_lvstores(sec_node.lvstore) - if not ret: - raise Exception("Failed to get LVol store info") - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - leader_node = sec_node - - if not leader_node: - raise Exception("Failed to get leader node") - - if snap.deletion_status == "" or snap.deletion_status != leader_node.get_id(): - - ret, _ = leader_node.rpc_client().delete_lvol(snap.snap_bdev) - if not ret: - logger.error(f"Failed to delete snap from node: {snode.get_id()}") - continue - snap = db.get_snapshot_by_id(snap.get_id()) - snap.deletion_status = leader_node.get_id() - snap.write_to_db() - - time.sleep(3) - - try: - ret = leader_node.rpc_client().bdev_lvol_get_lvol_delete_status(snap.snap_bdev) - except Exception as e: - logger.error(e) - # timeout detected, check other node - break - - if ret == 0 or ret == 2: # Lvol may have already been deleted (not found) or delete completed - process_snap_delete_finish(snap, leader_node) - - elif ret == 1: # Async lvol deletion is in progress or queued - logger.info(f"Snap deletion in progress, id: {snap.get_id()}") - - elif ret == 3: # Async deletion is done, but leadership has changed (sync deletion is now blocked) - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error( - "Async deletion is done, but leadership has changed (sync deletion is now blocked)") - - elif ret == 4: # No async delete request exists for this Snap - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("No async delete request exists for this snap") - set_snap_offline(snap) - - elif ret == -1: # Operation not permitted - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Operation not permitted") - process_snap_delete_try_again(snap) - - elif ret == -2: # No such file or directory - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("No such file or directory") - process_snap_delete_finish(snap, leader_node) - - elif ret == -5: # I/O error - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("I/O error") - process_snap_delete_try_again(snap) - - elif ret == -11: # Try again - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Try again") - process_snap_delete_try_again(snap) - - elif ret == -12: # Out of memory - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Out of memory") - process_snap_delete_try_again(snap) - - elif ret == -16: # Device or resource busy - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Device or resource busy") - process_snap_delete_try_again(snap) - - elif ret == -19: # No such device - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("No such device") - set_snap_offline(snap) - - elif ret == -35: # Leadership changed - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Leadership changed") - process_snap_delete_try_again(snap) - - elif ret == -36: # Failed to update lvol for deletion - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Failed to update snapshot for deletion") - process_snap_delete_try_again(snap) - - else: # Failed to update lvol for deletion - logger.info(f"Snap deletion error, id: {snap.get_id()}, error code: {ret}") - logger.error("Failed to update snapshot for deletion") - + elif snap.status == SnapShot.STATUS_IN_DELETION: + try: + process_snap_delete(snap, snode) + except Exception as e: + logger.error(e) time.sleep(constants.LVOL_MONITOR_INTERVAL_SEC) diff --git a/simplyblock_core/services/storage_node_monitor.py b/simplyblock_core/services/storage_node_monitor.py index d1417ed3f..fffd346e5 100644 --- a/simplyblock_core/services/storage_node_monitor.py +++ b/simplyblock_core/services/storage_node_monitor.py @@ -72,13 +72,15 @@ def get_next_cluster_status(cluster_id): continue online_nodes += 1 # check for jm rep tasks: - ret = node.rpc_client().jc_get_jm_status(node.jm_vuid) - if ret: + try: + ret = node.rpc_client().jc_get_jm_status(node.jm_vuid) for jm in ret: if ret[jm] is False: # jm is not ready (has active replication task) jm_replication_tasks = True logger.warning("Replication task found!") break + except Exception: + logger.warning("Failed to get replication task!") elif node.status == StorageNode.STATUS_REMOVED: pass else: @@ -131,7 +133,7 @@ def update_cluster_status(cluster_id): for task in db.get_job_tasks(cluster_id): if task.status != JobSchedule.STATUS_DONE and task.function_name in [ JobSchedule.FN_DEV_MIG, JobSchedule.FN_NEW_DEV_MIG, JobSchedule.FN_FAILED_DEV_MIG]: - if task.retry == 0: + if "migration" not in task.function_params: first_iter_task_pending += 1 cluster = db.get_cluster_by_id(cluster_id) @@ -280,13 +282,6 @@ def node_port_check_fun(snode): node_port_check = True if snode.lvstore_status == "ready": ports = [snode.nvmf_port] - if snode.lvstore_stack_secondary_1: - for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()): - if n.lvstore_status == "ready": - ports.append(n.lvol_subsys_port) - if not snode.is_secondary_node: - ports.append(snode.lvol_subsys_port) - ports = [snode.nvmf_port] if snode.lvstore_stack_secondary_1: for n in db.get_primary_storage_nodes_by_secondary_node_id(snode.get_id()): if n.lvstore_status == "ready": @@ -295,9 +290,12 @@ def node_port_check_fun(snode): ports.append(snode.lvol_subsys_port) for port in ports: - ret = health_controller._check_port_on_node(snode, port) - logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}") - node_port_check &= ret + try: + ret = health_controller.check_port_on_node(snode, port) + logger.info(f"Check: node port {snode.mgmt_ip}, {port} ... {ret}") + node_port_check &= ret + except Exception: + logger.error("Check node port failed, connection error") node_data_nic_ping_check = False for data_nic in snode.data_nics: @@ -325,6 +323,7 @@ def check_node(snode): logger.info(f"Checking node {snode.hostname}") + # 1- check node ping ping_check = health_controller._check_node_ping(snode.mgmt_ip) logger.info(f"Check: ping mgmt ip {snode.mgmt_ip} ... {ping_check}") @@ -358,7 +357,6 @@ def check_node(snode): return False except Exception as e: logger.debug(e) - set_node_unreachable(snode) return False # 4- check node rpc interface @@ -419,5 +417,8 @@ def loop_for_node(snode): t.start() threads_maps[node_id] = t - time.sleep(constants.NODE_MONITOR_INTERVAL_SEC) - update_cluster_status(cluster_id) + try: + update_cluster_status(cluster_id) + except Exception: + logger.error("Error while updating cluster status") + time.sleep(constants.NODE_MONITOR_INTERVAL_SEC) diff --git a/simplyblock_core/services/tasks_runner_failed_migration.py b/simplyblock_core/services/tasks_runner_failed_migration.py index 7d0b3e89f..e3baeb7f0 100644 --- a/simplyblock_core/services/tasks_runner_failed_migration.py +++ b/simplyblock_core/services/tasks_runner_failed_migration.py @@ -87,8 +87,12 @@ def task_runner(task): qos_high_priority = False if db.get_cluster_by_id(snode.cluster_id).is_qos_set(): qos_high_priority = True - rsp = rpc_client.distr_migration_failure_start( - distr_name, device.cluster_device_order, qos_high_priority, job_size=64, jobs=constants.MIG_PARALLEL_JOBS) + try: + rsp = rpc_client.distr_migration_failure_start( + distr_name, device.cluster_device_order, qos_high_priority, job_size=constants.MIG_JOB_SIZE, jobs=constants.MIG_PARALLEL_JOBS) + except Exception as e: + logger.error(e) + rsp = False if not rsp: logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}") task.function_result = "Failed to start device migration task" diff --git a/simplyblock_core/services/tasks_runner_jc_comp.py b/simplyblock_core/services/tasks_runner_jc_comp.py index 676156af3..738b588b1 100644 --- a/simplyblock_core/services/tasks_runner_jc_comp.py +++ b/simplyblock_core/services/tasks_runner_jc_comp.py @@ -86,7 +86,11 @@ jm_vuid = node.jm_vuid if "jm_vuid" in task.function_params: jm_vuid = task.function_params["jm_vuid"] - ret, err = rpc_client.jc_compression_start(jm_vuid=jm_vuid) + try: + ret, err = rpc_client.jc_compression_start(jm_vuid=jm_vuid) + except Exception as e: + logger.error(e) + continue if ret: task.function_result = f"JC {node.jm_vuid} compression resumed on node" task.status = JobSchedule.STATUS_DONE diff --git a/simplyblock_core/services/tasks_runner_migration.py b/simplyblock_core/services/tasks_runner_migration.py index e325e3d7e..c1abf823c 100644 --- a/simplyblock_core/services/tasks_runner_migration.py +++ b/simplyblock_core/services/tasks_runner_migration.py @@ -62,16 +62,6 @@ def task_runner(task): except Exception as e: logger.error(f"Failed to get online since: {e}") - for dev in node.nvme_devices: - if dev.status not in [NVMeDevice.STATUS_ONLINE, - NVMeDevice.STATUS_FAILED_AND_MIGRATED, - NVMeDevice.STATUS_CANNOT_ALLOCATE]: - task.function_result = f"Some dev status is {dev.status }, retrying" - task.status = JobSchedule.STATUS_SUSPENDED - task.retry += 1 - task.write_to_db(db.kv_store) - return False - task.status = JobSchedule.STATUS_RUNNING task.function_result = "" task.write_to_db(db.kv_store) @@ -93,8 +83,12 @@ def task_runner(task): qos_high_priority = False if db.get_cluster_by_id(snode.cluster_id).is_qos_set(): qos_high_priority = True - rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=64, - jobs=constants.MIG_PARALLEL_JOBS) + try: + rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=constants.MIG_JOB_SIZE, + jobs=constants.MIG_PARALLEL_JOBS) + except Exception as e: + logger.error(e) + rsp = False if not rsp: logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}") task.function_result = "Failed to start device migration task, retry later" @@ -219,9 +213,12 @@ def _set_master_task_status(master_task, status): continue rpc_client = RPCClient( node.mgmt_ip, node.rpc_port, node.rpc_username, node.rpc_password, timeout=5, retry=2) - ret, err = rpc_client.jc_compression_start(jm_vuid=node.jm_vuid) - if err and "code" in err and err["code"] != -2: - logger.info("Failed to resume JC compression adding task...") - tasks_controller.add_jc_comp_resume_task(task.cluster_id, task.node_id, node.jm_vuid) + try: + ret, err = rpc_client.jc_compression_start(jm_vuid=node.jm_vuid) + if err and "code" in err and err["code"] != -2: + logger.info("Failed to resume JC compression adding task...") + tasks_controller.add_jc_comp_resume_task(task.cluster_id, task.node_id, node.jm_vuid) + except Exception as e: + logger.error(e) time.sleep(3) diff --git a/simplyblock_core/services/tasks_runner_new_dev_migration.py b/simplyblock_core/services/tasks_runner_new_dev_migration.py index 9feec7a56..db4143eec 100644 --- a/simplyblock_core/services/tasks_runner_new_dev_migration.py +++ b/simplyblock_core/services/tasks_runner_new_dev_migration.py @@ -98,8 +98,12 @@ def task_runner(task): qos_high_priority = False if db.get_cluster_by_id(snode.cluster_id).is_qos_set(): qos_high_priority = True - rsp = rpc_client.distr_migration_expansion_start(distr_name, qos_high_priority, job_size=64, - jobs=constants.MIG_PARALLEL_JOBS) + try: + rsp = rpc_client.distr_migration_expansion_start( + distr_name, qos_high_priority, job_size=constants.MIG_JOB_SIZE,jobs=constants.MIG_PARALLEL_JOBS) + except Exception as e: + logger.error(f"Failed to start migration : {e}") + rsp = False if not rsp: logger.error(f"Failed to start device migration task, storage_ID: {device.cluster_device_order}") task.function_result = "Failed to start device migration task" diff --git a/simplyblock_core/services/tasks_runner_node_add.py b/simplyblock_core/services/tasks_runner_node_add.py index daeba918e..819e611d7 100644 --- a/simplyblock_core/services/tasks_runner_node_add.py +++ b/simplyblock_core/services/tasks_runner_node_add.py @@ -2,7 +2,7 @@ import time -from simplyblock_core import db_controller, storage_node_ops, utils +from simplyblock_core import db_controller, storage_node_ops, utils, constants from simplyblock_core.models.job_schedule import JobSchedule from simplyblock_core.models.cluster import Cluster @@ -13,46 +13,67 @@ db = db_controller.DBController() -logger.info("Starting Tasks runner...") -while True: +def process_task(task): + if task.canceled: + task.function_result = "canceled" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return False + + if task.retry >= task.max_retry: + task.function_result = "max retry reached" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return True + + if db.get_cluster_by_id(cl.get_id()).status == Cluster.STATUS_IN_ACTIVATION: + task.function_result = "Cluster is in_activation, waiting" + task.status = JobSchedule.STATUS_NEW + task.write_to_db(db.kv_store) + return False + + if task.status != JobSchedule.STATUS_RUNNING: + task.status = JobSchedule.STATUS_RUNNING + task.write_to_db(db.kv_store) + + try: + res = storage_node_ops.add_node(**task.function_params) + msg = f"Node add result: {res}" + logger.info(msg) + task.function_result = msg + if res: + task.status = JobSchedule.STATUS_DONE + else: + task.retry += 1 + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return True + except Exception as e: + logger.error(e) + return False + + +logger.info("Starting Tasks runner node add...") +while True: clusters = db.get_clusters() if not clusters: logger.error("No clusters found!") else: for cl in clusters: - if cl.status == Cluster.STATUS_IN_ACTIVATION: - continue - tasks = db.get_job_tasks(cl.get_id(), reverse=False) for task in tasks: - + delay_seconds = constants.TASK_EXEC_INTERVAL_SEC if task.function_name == JobSchedule.FN_NODE_ADD: - if task.status != JobSchedule.STATUS_DONE: - + while task.status != JobSchedule.STATUS_DONE: # get new task object because it could be changed from cancel task task = db.get_task_by_id(task.uuid) - - if task.canceled: - task.function_result = "canceled" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - continue - - if db.get_cluster_by_id(cl.get_id()).status == Cluster.STATUS_IN_ACTIVATION: - task.function_result = "Cluster is in_activation, waiting" - task.status = JobSchedule.STATUS_NEW - task.write_to_db(db.kv_store) - continue - - if task.status != JobSchedule.STATUS_RUNNING: - task.status = JobSchedule.STATUS_RUNNING - task.write_to_db(db.kv_store) - - res = storage_node_ops.add_node(**task.function_params) - logger.info(f"Node add result: {res}") - task.function_result = str(res) - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - - time.sleep(5) + res = process_task(task) + if res: + if task.status == JobSchedule.STATUS_DONE: + break + else: + delay_seconds *= 2 + time.sleep(delay_seconds) + + time.sleep(constants.TASK_EXEC_INTERVAL_SEC) diff --git a/simplyblock_core/services/tasks_runner_port_allow.py b/simplyblock_core/services/tasks_runner_port_allow.py index d49d6c19b..5ca9b7fdd 100644 --- a/simplyblock_core/services/tasks_runner_port_allow.py +++ b/simplyblock_core/services/tasks_runner_port_allow.py @@ -9,7 +9,6 @@ from simplyblock_core.models.cluster import Cluster from simplyblock_core.models.nvme_device import NVMeDevice, RemoteDevice from simplyblock_core.models.storage_node import StorageNode -from simplyblock_core.snode_client import SNodeClient logger = utils.get_logger(__name__) @@ -17,9 +16,226 @@ db = db_controller.DBController() +def exec_port_allow_task(task): + # get new task object because it could be changed from cancel task + task = db.get_task_by_id(task.uuid) + + if task.canceled: + task.function_result = "canceled" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return + + node = db.get_storage_node_by_id(task.node_id) + + if not node: + task.function_result = "node not found" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + return + + if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]: + msg = f"Node is {node.status}, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + # check node ping + ping_check = health_controller._check_node_ping(node.mgmt_ip) + logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}") + if not ping_check: + time.sleep(1) + ping_check = health_controller._check_node_ping(node.mgmt_ip) + logger.info(f"Check 2: ping mgmt ip {node.mgmt_ip} ... {ping_check}") + + if not ping_check: + msg = "Node ping is false, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + # check node ping + logger.info("connect to remote devices") + nodes = db.get_storage_nodes_by_cluster_id(node.cluster_id) + # connect to remote devs + try: + node_bdevs = node.rpc_client().get_bdevs() + logger.debug(node_bdevs) + if node_bdevs: + node_bdev_names = {} + for b in node_bdevs: + node_bdev_names[b['name']] = b + for al in b['aliases']: + node_bdev_names[al] = b + else: + node_bdev_names = {} + remote_devices = [] + for nd in nodes: + if nd.get_id() == node.get_id() or nd.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: + continue + logger.info(f"Connecting to node {nd.get_id()}") + for index, dev in enumerate(nd.nvme_devices): + + if dev.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, + NVMeDevice.STATUS_CANNOT_ALLOCATE]: + logger.debug(f"Device is not online: {dev.get_id()}, status: {dev.status}") + continue + + if not dev.alceml_bdev: + raise ValueError(f"device alceml bdev not found!, {dev.get_id()}") + + remote_device = RemoteDevice() + remote_device.uuid = dev.uuid + remote_device.alceml_name = dev.alceml_name + remote_device.node_id = dev.node_id + remote_device.size = dev.size + remote_device.nvmf_multipath = dev.nvmf_multipath + remote_device.status = NVMeDevice.STATUS_ONLINE + remote_device.remote_bdev = storage_node_ops.connect_device( + f"remote_{dev.alceml_bdev}", dev, node, + bdev_names=list(node_bdev_names), reattach=False) + + remote_devices.append(remote_device) + if not remote_devices: + msg = "Node unable to connect to remote devs, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + else: + node = db.get_storage_node_by_id(task.node_id) + node.remote_devices = remote_devices + node.write_to_db() + + logger.info("connect to remote JM devices") + remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(node) + if not remote_jm_devices or len(remote_jm_devices) < 2: + msg = "Node unable to connect to remote JMs, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + else: + node = db.get_storage_node_by_id(task.node_id) + node.remote_jm_devices = remote_jm_devices + node.write_to_db() + + + except Exception as e: + logger.error(e) + msg = "Error when connect to remote devs, retry task" + logger.info(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + logger.info("Sending device status event") + for db_dev in node.nvme_devices: + distr_controller.send_dev_status_event(db_dev, db_dev.status, node) + + logger.info("Finished sending device status and now waiting 5s for JMs to connect") + time.sleep(5) + + sec_node = db.get_storage_node_by_id(node.secondary_node_id) + snode = db.get_storage_node_by_id(node.get_id()) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + try: + ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) + if ret: + lvs_info = ret[0] + if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: + # is_sec_node_leader = True + # check jc_compression status + jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(snode.jm_vuid) + retries = 10 + while jc_compression_is_active: + if retries <= 0: + logger.warning("Timeout waiting for JC compression task to finish") + break + retries -= 1 + logger.info( + f"JC compression task found on node: {sec_node.get_id()}, retrying in 60 seconds") + time.sleep(60) + jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status( + snode.jm_vuid) + except Exception as e: + logger.error(e) + return + + if node.lvstore_status == "ready": + lvstore_check = health_controller._check_node_lvstore(node.lvstore_stack, node, auto_fix=True) + if not lvstore_check: + msg = "Node LVolStore check fail, retry later" + logger.warning(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + if node.secondary_node_id: + primary_hublvol_check = health_controller._check_node_hublvol(node) + if not primary_hublvol_check: + msg = "Node hublvol check fail, retry later" + logger.warning(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + sec_node = db.get_storage_node_by_id(node.secondary_node_id) + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + secondary_hublvol_check = health_controller._check_sec_node_hublvol(sec_node, auto_fix=True) + if not secondary_hublvol_check: + msg = "Secondary node hublvol check fail, retry later" + logger.warning(msg) + task.function_result = msg + task.status = JobSchedule.STATUS_SUSPENDED + task.write_to_db(db.kv_store) + return + + if task.status != JobSchedule.STATUS_RUNNING: + task.status = JobSchedule.STATUS_RUNNING + task.write_to_db(db.kv_store) + + try: + # wait for lvol sync delete + lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id) + while lvol_sync_del_found: + logger.info("Lvol sync delete task found, waiting") + time.sleep(3) + lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id) + + if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: + sec_rpc_client = sec_node.rpc_client() + sec_rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False, bs_nonleadership=True) + + except Exception as e: + logger.error(e) + return + + port_number = task.function_params["port_number"] + logger.info(f"Allow port {port_number} on node {node.get_id()}") + fw_api = FirewallClient(snode, timeout=5, retry=2) + port_type = "tcp" + if node.active_rdma: + port_type = "udp" + fw_api.firewall_set_port(port_number, port_type, "allow", node.rpc_port) + tcp_ports_events.port_allowed(node, port_number) + + task.function_result = f"Port {port_number} allowed on node" + task.status = JobSchedule.STATUS_DONE + task.write_to_db(db.kv_store) + + logger.info("Starting Tasks runner...") while True: - clusters = db.get_clusters() if not clusters: logger.error("No clusters found!") @@ -27,208 +243,10 @@ for cl in clusters: if cl.status == Cluster.STATUS_IN_ACTIVATION: continue - tasks = db.get_job_tasks(cl.get_id(), reverse=False) for task in tasks: - if task.function_name == JobSchedule.FN_PORT_ALLOW: if task.status != JobSchedule.STATUS_DONE: - - # get new task object because it could be changed from cancel task - task = db.get_task_by_id(task.uuid) - - if task.canceled: - task.function_result = "canceled" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - continue - - node = db.get_storage_node_by_id(task.node_id) - - if not node: - task.function_result = "node not found" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) - continue - - if node.status not in [StorageNode.STATUS_DOWN, StorageNode.STATUS_ONLINE]: - msg = f"Node is {node.status}, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - # check node ping - ping_check = health_controller._check_node_ping(node.mgmt_ip) - logger.info(f"Check: ping mgmt ip {node.mgmt_ip} ... {ping_check}") - if not ping_check: - time.sleep(1) - ping_check = health_controller._check_node_ping(node.mgmt_ip) - logger.info(f"Check 2: ping mgmt ip {node.mgmt_ip} ... {ping_check}") - - if not ping_check: - msg = "Node ping is false, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - # check node ping - logger.info("connect to remote devices") - nodes = db.get_storage_nodes_by_cluster_id(node.cluster_id) - # connect to remote devs - try: - node_bdevs = node.rpc_client().get_bdevs() - logger.debug(node_bdevs) - if node_bdevs: - node_bdev_names = {} - for b in node_bdevs: - node_bdev_names[b['name']] = b - for al in b['aliases']: - node_bdev_names[al] = b - else: - node_bdev_names = {} - remote_devices = [] - for nd in nodes: - if nd.get_id() == node.get_id() or nd.status not in [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN]: - continue - logger.info(f"Connecting to node {nd.get_id()}") - for index, dev in enumerate(nd.nvme_devices): - - if dev.status not in [NVMeDevice.STATUS_ONLINE, NVMeDevice.STATUS_READONLY, - NVMeDevice.STATUS_CANNOT_ALLOCATE]: - logger.debug(f"Device is not online: {dev.get_id()}, status: {dev.status}") - continue - - if not dev.alceml_bdev: - raise ValueError(f"device alceml bdev not found!, {dev.get_id()}") - - remote_device = RemoteDevice() - remote_device.uuid = dev.uuid - remote_device.alceml_name = dev.alceml_name - remote_device.node_id = dev.node_id - remote_device.size = dev.size - remote_device.nvmf_multipath = dev.nvmf_multipath - remote_device.status = NVMeDevice.STATUS_ONLINE - remote_device.remote_bdev = storage_node_ops.connect_device( - f"remote_{dev.alceml_bdev}", dev, node, - bdev_names=list(node_bdev_names), reattach=False) - - remote_devices.append(remote_device) - if not remote_devices: - msg = "Node unable to connect to remote devs, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - else: - node = db.get_storage_node_by_id(task.node_id) - node.remote_devices = remote_devices - node.write_to_db() - - logger.info("connect to remote JM devices") - remote_jm_devices = storage_node_ops._connect_to_remote_jm_devs(node) - if not remote_jm_devices or len(remote_jm_devices) < 2: - msg = "Node unable to connect to remote JMs, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - else: - node = db.get_storage_node_by_id(task.node_id) - node.remote_jm_devices = remote_jm_devices - node.write_to_db() - - - except Exception as e: - logger.error(e) - msg = "Error when connect to remote devs, retry task" - logger.info(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - logger.info("Sending device status event") - for db_dev in node.nvme_devices: - distr_controller.send_dev_status_event(db_dev, db_dev.status) - - logger.info("Finished sending device status and now waiting 5s for JMs to connect") - time.sleep(5) - - sec_node = db.get_storage_node_by_id(node.secondary_node_id) - snode = db.get_storage_node_by_id(node.get_id()) - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - ret = sec_node.rpc_client().bdev_lvol_get_lvstores(snode.lvstore) - if ret: - lvs_info = ret[0] - if "lvs leadership" in lvs_info and lvs_info['lvs leadership']: - # is_sec_node_leader = True - # check jc_compression status - jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status(snode.jm_vuid) - retries = 10 - while jc_compression_is_active: - if retries <= 0: - logger.warning("Timeout waiting for JC compression task to finish") - break - retries -= 1 - logger.info( - f"JC compression task found on node: {sec_node.get_id()}, retrying in 60 seconds") - time.sleep(60) - jc_compression_is_active = sec_node.rpc_client().jc_compression_get_status( - snode.jm_vuid) - - lvstore_check = True - if node.lvstore_status == "ready": - lvstore_check &= health_controller._check_node_lvstore(node.lvstore_stack, node, auto_fix=True) - if node.secondary_node_id: - lvstore_check &= health_controller._check_node_hublvol(node) - sec_node = db.get_storage_node_by_id(node.secondary_node_id) - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - lvstore_check &= health_controller._check_sec_node_hublvol(sec_node, auto_fix=True) - - if lvstore_check is False: - msg = "Node LVolStore check fail, retry later" - logger.warning(msg) - task.function_result = msg - task.status = JobSchedule.STATUS_SUSPENDED - task.write_to_db(db.kv_store) - continue - - if task.status != JobSchedule.STATUS_RUNNING: - task.status = JobSchedule.STATUS_RUNNING - task.write_to_db(db.kv_store) - - # wait for lvol sync delete - lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id) - while lvol_sync_del_found: - logger.info("Lvol sync delete task found, waiting") - can_continue = False - time.sleep(3) - lvol_sync_del_found = tasks_controller.get_lvol_sync_del_task(task.cluster_id, task.node_id) - - if sec_node and sec_node.status == StorageNode.STATUS_ONLINE: - sec_rpc_client = sec_node.rpc_client() - sec_rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False, bs_nonleadership=True) - - port_number = task.function_params["port_number"] - snode_api = SNodeClient(f"{node.mgmt_ip}:5000", timeout=3, retry=2) - - logger.info(f"Allow port {port_number} on node {node.get_id()}") - - fw_api = FirewallClient(snode, timeout=5, retry=2) - port_type = "tcp" - if node.active_rdma: - port_type = "udp" - fw_api.firewall_set_port(port_number, port_type, "allow", node.rpc_port) - tcp_ports_events.port_allowed(node, port_number) - - task.function_result = f"Port {port_number} allowed on node" - task.status = JobSchedule.STATUS_DONE - task.write_to_db(db.kv_store) + exec_port_allow_task(task) time.sleep(5) diff --git a/simplyblock_core/services/tasks_runner_restart.py b/simplyblock_core/services/tasks_runner_restart.py index 2cfc82a53..e816f2b80 100644 --- a/simplyblock_core/services/tasks_runner_restart.py +++ b/simplyblock_core/services/tasks_runner_restart.py @@ -191,19 +191,26 @@ def task_runner_node(task): return False - # shutting down node - logger.info(f"Shutdown node {node.get_id()}") - ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True) - if ret: - logger.info("Node shutdown succeeded") - - time.sleep(3) + try: + # shutting down node + logger.info(f"Shutdown node {node.get_id()}") + ret = storage_node_ops.shutdown_storage_node(node.get_id(), force=True) + if ret: + logger.info("Node shutdown succeeded") + time.sleep(3) + except Exception as e: + logger.error(e) + return False - # resetting node - logger.info(f"Restart node {node.get_id()}") - ret = storage_node_ops.restart_storage_node(node.get_id(), force=True) - if ret: - logger.info("Node restart succeeded") + try: + # resetting node + logger.info(f"Restart node {node.get_id()}") + ret = storage_node_ops.restart_storage_node(node.get_id(), force=True) + if ret: + logger.info("Node restart succeeded") + except Exception as e: + logger.error(e) + return False time.sleep(3) node = db.get_storage_node_by_id(task.node_id) diff --git a/simplyblock_core/snode_client.py b/simplyblock_core/snode_client.py index 6f1bee0db..f51742c68 100644 --- a/simplyblock_core/snode_client.py +++ b/simplyblock_core/snode_client.py @@ -40,8 +40,7 @@ def _request(self, method, path, payload=None): response = self.session.request(method, self.url+path, data=data, timeout=self.timeout, params=params) except Exception as e: - logger.error("Request failed: %s", e) - raise e + raise SNodeClientException(str(e)) logger.debug("Response: status_code: %s, content: %s", response.status_code, response.content) @@ -69,11 +68,15 @@ def _request(self, method, path, payload=None): if ret_code == 422: raise SNodeClientException(f"Request validation failed: '{response.text}'") - logger.error("Unknown http status: %s", ret_code) - return None, None + raise SNodeClientException(f"Unknown http status: {ret_code}") def is_live(self): - return self._request("GET", "check") + try: + return self._request("GET", "check") + except SNodeClientException: + logger.warning("Failed to call snode/check, trying snode/info") + return self.info() + def info(self): return self._request("GET", "info") diff --git a/simplyblock_core/storage_node_ops.py b/simplyblock_core/storage_node_ops.py index d026677d3..23e057777 100644 --- a/simplyblock_core/storage_node_ops.py +++ b/simplyblock_core/storage_node_ops.py @@ -222,9 +222,9 @@ def _create_jm_stack_on_raid(rpc_client, jm_nvme_bdevs, snode, after_restart): return False for iface in snode.data_nics: - logger.info(f"adding {iface.trtype} listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address)) - ret = rpc_client.listeners_create(subsystem_nqn, iface.trtype, iface.ip4_address, snode.nvmf_port) - ip_list.append(iface.ip4_address) + logger.info(f"adding {iface.trtype} listener for %s on IP %s" % (subsystem_nqn, iface.ip4_address)) + ret = rpc_client.listeners_create(subsystem_nqn, iface.trtype, iface.ip4_address, snode.nvmf_port) + ip_list.append(iface.ip4_address) if len(ip_list) > 1: IP = ",".join(ip_list) @@ -788,6 +788,10 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): if jm_dev and jm_dev not in remote_devices: remote_devices.append(jm_dev) + logger.debug(f"remote_devices: {remote_devices}") + allowed_node_statuses = [StorageNode.STATUS_ONLINE, StorageNode.STATUS_DOWN, StorageNode.STATUS_RESTARTING] + allowed_dev_statuses = [NVMeDevice.STATUS_ONLINE] + new_devs = [] for jm_dev in remote_devices: if not jm_dev.jm_bdev: @@ -804,6 +808,14 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): if not org_dev or org_dev in new_devs or org_dev_node and org_dev_node.get_id() == this_node.get_id(): continue + if org_dev_node is not None and org_dev_node.status not in allowed_node_statuses: + logger.warning(f"Skipping node:{org_dev_node.get_id()} with status: {org_dev_node.status}") + continue + + if org_dev is not None and org_dev.status not in allowed_dev_statuses: + logger.warning(f"Skipping device:{org_dev.get_id()} with status: {org_dev.status}") + continue + remote_device = RemoteJMDevice() remote_device.uuid = org_dev.uuid remote_device.alceml_name = org_dev.alceml_name @@ -814,8 +826,8 @@ def _connect_to_remote_jm_devs(this_node, jm_ids=None): remote_device.nvmf_multipath = org_dev.nvmf_multipath try: remote_device.remote_bdev = connect_device( - f"remote_{org_dev.jm_bdev}", org_dev, this_node, - bdev_names=node_bdev_names, reattach=True, + f"remote_{org_dev.jm_bdev}", org_dev, this_node, + bdev_names=node_bdev_names, reattach=True, ) except RuntimeError: logger.error(f'Failed to connect to {org_dev.get_id()}') @@ -1058,12 +1070,12 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list, logger.debug(f"Data nics ports are: {names}") for nic in names: device = node_info['network_interface'][nic] - base_ifc_cfg={ - 'uuid': str(uuid.uuid4()), - 'if_name': nic, - 'ip4_address': device['ip'], - 'status': device['status'], - 'net_type': device['net_type'],} + base_ifc_cfg = { + 'uuid': str(uuid.uuid4()), + 'if_name': nic, + 'ip4_address': device['ip'], + 'status': device['status'], + 'net_type': device['net_type'], } if fabric_rdma and snode_api.ifc_is_roce(nic): cfg = base_ifc_cfg.copy() cfg['trtype'] = "RDMA" @@ -1299,8 +1311,8 @@ def add_node(cluster_id, node_addr, iface_name,data_nics_list, logger.info("Setting Alcemls QOS weights") ret = rpc_client.alceml_set_qos_weights(qos_controller.get_qos_weights_list(cluster_id)) if not ret: - logger.error("Failed to set Alcemls QOS") - return False + logger.error("Failed to set Alcemls QOS") + return False logger.info("Connecting to remote devices") remote_devices = _connect_to_remote_devs(snode) @@ -2154,8 +2166,8 @@ def list_storage_devices(node_id, is_json): "Health": snode.jm_device.health_check }) - for device in snode.remote_devices: - logger.debug(device) + for remote_device in snode.remote_devices: + logger.debug(remote_device) logger.debug("*" * 20) name = remote_device.alceml_name status = remote_device.status @@ -2360,34 +2372,32 @@ def suspend_storage_node(node_id, force=False): if snode.lvstore_stack_secondary_1: nodes = db_controller.get_primary_storage_nodes_by_secondary_node_id(node_id) if nodes: - for node in nodes: + for node in nodes: try: fw_api.firewall_set_port( node.hublvol.nvmf_port, port_type, "block", snode.rpc_port, is_reject=True) fw_api.firewall_set_port( node.lvol_subsys_port, port_type, "block", snode.rpc_port, is_reject=True) + time.sleep(0.5) + rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False) + rpc_client.bdev_distrib_force_to_non_leader(node.jm_vuid) except Exception as e: logger.error(e) return False - time.sleep(0.5) - rpc_client.bdev_lvol_set_leader(node.lvstore, leader=False) - rpc_client.bdev_distrib_force_to_non_leader(node.jm_vuid) try: fw_api.firewall_set_port( snode.hublvol.nvmf_port, port_type, "block", snode.rpc_port, is_reject=True) fw_api.firewall_set_port( snode.lvol_subsys_port, port_type, "block", snode.rpc_port, is_reject=True) + time.sleep(0.5) + rpc_client.bdev_lvol_set_leader(snode.lvstore, leader=False) + rpc_client.bdev_distrib_force_to_non_leader(snode.jm_vuid) + time.sleep(1) except Exception as e: logger.error(e) return False - time.sleep(0.5) - rpc_client.bdev_lvol_set_leader(snode.lvstore, leader=False) - rpc_client.bdev_distrib_force_to_non_leader(snode.jm_vuid) - time.sleep(1) - - logger.info("Done") return True @@ -2439,7 +2449,7 @@ def resume_storage_node(node_id): port_type = "udp" nodes = db_controller.get_primary_storage_nodes_by_secondary_node_id(node_id) if nodes: - for node in nodes: + for node in nodes: try: fw_api.firewall_set_port( node.lvol_subsys_port, port_type, "allow", snode.rpc_port) @@ -3374,7 +3384,7 @@ def get_sorted_ha_jms(current_node): continue mgmt_ips.append(jm_dev_to_mgmt_ip[jm_id]) out.append(jm_id) - return out[:constants.HA_JM_COUNT-1] + return out[:current_node.ha_jm_count - 1] def get_node_jm_names(current_node, remote_node=None): @@ -3405,7 +3415,7 @@ def get_node_jm_names(current_node, remote_node=None): if jm_dev.get_id() == jm_id: jm_list.append(jm_dev.remote_bdev) break - return jm_list[:constants.HA_JM_COUNT] + return jm_list[:current_node.ha_jm_count] def get_secondary_nodes(current_node): diff --git a/simplyblock_core/utils/__init__.py b/simplyblock_core/utils/__init__.py index 2bc006b56..4ac48a8e3 100644 --- a/simplyblock_core/utils/__init__.py +++ b/simplyblock_core/utils/__init__.py @@ -514,15 +514,15 @@ def generate_mask(cores): def calculate_pool_count(alceml_count, number_of_distribs, cpu_count, poller_count): ''' Small pool count Large pool count - Create JM 32 For each JM + Create JM 256 32 For each JM - RAID 32 2 one for raid of JM and one for raid of ditribs + RAID 256 32 2 one for raid of JM and one for raid of ditribs - Create Alceml 32 For each Alceml + Create Alceml 256 32 For each Alceml - Create Distrib 32 For each distrib + Create Distrib 256 32 For each distrib - First Send cluster map 32 Calculated or one time + First Send cluster map 256 32 Calculated or one time NVMF transport TCP 127 * poll_groups_mask||CPUCount + 384 15 * poll_groups_mask||CPUCount + 384 Calculated or one time @@ -530,23 +530,30 @@ def calculate_pool_count(alceml_count, number_of_distribs, cpu_count, poller_cou ####Create snapshot 512 64 For each snapshot - ####Clone lvol 32 For each clone + ####Clone lvol 256 32 For each clone ''' poller_number = poller_count if poller_count else cpu_count small_pool_count = 384 * (alceml_count + number_of_distribs + 3 + poller_count) + ( - 6 + alceml_count + number_of_distribs) * + poller_number * 127 + 384 + 128 * poller_number + constants.EXTRA_SMALL_POOL_COUNT + 6 + alceml_count + number_of_distribs) * 256 + poller_number * 127 + 384 + 128 * poller_number + constants.EXTRA_SMALL_POOL_COUNT large_pool_count = 48 * (alceml_count + number_of_distribs + 3 + poller_count) + ( 6 + alceml_count + number_of_distribs) * 32 + poller_number * 15 + 384 + 16 * poller_number + constants.EXTRA_LARGE_POOL_COUNT - return int(small_pool_count), int(large_pool_count) + return int(4.0 * small_pool_count), int(2.5 * large_pool_count) def calculate_minimum_hp_memory(small_pool_count, large_pool_count, lvol_count, max_prov, cpu_count): - - pool_consumption = (small_pool_count * 8 + large_pool_count * 128) / 1024 - memory_consumption = (4 * cpu_count + 1.1 * pool_consumption + 22 * lvol_count) * (1024 * 1024) + constants.EXTRA_HUGE_PAGE_MEMORY + ''' + 1092 (initial consumption) + 4 * CPU + 1.0277 * POOL_COUNT(Sum in MB) + (25) * lvol_count + then you can amend the expected memory need for the creation of lvols (6MB), + connection number over lvols (7MB per connection), creation of snaps (12MB), + extra buffer 2GB + return: minimum_hp_memory in bytes + ''' + pool_consumption = (small_pool_count * 8 + large_pool_count * 128) / 1024 + 1092 + memory_consumption = (4 * cpu_count + 1.0277 * pool_consumption + 25 * lvol_count) * (1024 * 1024) + ( + 250 * 1024 * 1024) * 1.1 * convert_size(max_prov, 'TiB') + constants.EXTRA_HUGE_PAGE_MEMORY return int(1.2 * memory_consumption) diff --git a/simplyblock_web/api/internal/storage_node/docker.py b/simplyblock_web/api/internal/storage_node/docker.py index 68a2fb10a..d1ee4f9f0 100644 --- a/simplyblock_web/api/internal/storage_node/docker.py +++ b/simplyblock_web/api/internal/storage_node/docker.py @@ -155,8 +155,7 @@ def spdk_process_start(body: SPDKParams): ssd_pcie_list = " ".join(body.ssd_pcie) if body.ssd_pcie else "none" spdk_debug = '1' if body.spdk_debug else '' total_mem_mib = core_utils.convert_size(core_utils.parse_size(body.total_mem), 'MiB') if body.total_mem else '' - # spdk_mem_mib = core_utils.convert_size(body.spdk_mem, 'MiB') - spdk_mem_mib = 0 + spdk_mem_mib = core_utils.convert_size(body.spdk_mem, 'MiB') node_docker = get_docker_client(timeout=60 * 3) for name in {f"/spdk_{body.rpc_port}", f"/spdk_proxy_{body.rpc_port}"}: From 25a4becea99d2f450ccb5de7a6555c9c5029d94c Mon Sep 17 00:00:00 2001 From: hamdykhader Date: Tue, 2 Dec 2025 01:38:14 +0300 Subject: [PATCH 5/5] fix type issues --- simplyblock_core/controllers/health_controller.py | 4 ++-- simplyblock_core/services/storage_node_monitor.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/simplyblock_core/controllers/health_controller.py b/simplyblock_core/controllers/health_controller.py index 7c8b95036..0dde91243 100644 --- a/simplyblock_core/controllers/health_controller.py +++ b/simplyblock_core/controllers/health_controller.py @@ -128,10 +128,10 @@ def _check_node_api(ip): return False -def _check_spdk_process_up(ip, rpc_port): +def _check_spdk_process_up(ip, rpc_port, cluster_id): snode_api = SNodeClient(f"{ip}:5000", timeout=90, retry=2) logger.debug(f"Node API={ip}:5000") - is_up, _ = snode_api.spdk_process_is_up(rpc_port) + is_up, _ = snode_api.spdk_process_is_up(rpc_port, cluster_id) logger.debug(f"SPDK is {is_up}") return is_up diff --git a/simplyblock_core/services/storage_node_monitor.py b/simplyblock_core/services/storage_node_monitor.py index fffd346e5..b3c04d27a 100644 --- a/simplyblock_core/services/storage_node_monitor.py +++ b/simplyblock_core/services/storage_node_monitor.py @@ -349,7 +349,7 @@ def check_node(snode): # 3- check spdk process through node API try: snode_api = SNodeClient(f"{snode.mgmt_ip}:5000", timeout=20, retry=2) - is_up, _ = snode_api.spdk_process_is_up( snode.rpc_port) + is_up, _ = snode_api.spdk_process_is_up( snode.rpc_port, snode.cluster_id) logger.info(f"Check: spdk process {snode.mgmt_ip}:5000 ... {bool(is_up)}") if not is_up: logger.info("Check: node API failed, setting node offline")