diff --git a/README.md b/README.md index b20e760..d6a1ec8 100644 --- a/README.md +++ b/README.md @@ -152,17 +152,33 @@ $ xe host-call-plugin host-uuid= plugin=lsblk.py fn=list_block_devices ## Smartctl parser -A xapi plugin to get information and health of physical disks on the host +This XAPI plugin provides information and health details for the physical disks on the host. + +It uses the `smartctl --scan` command to retrieve the list of devices. For devices managed by +MegaRAID, the device names may be identical. To handle this, the plugin returns information +for each unique "name:type" pair. + +The plugin parses the JSON output from the `smartctl` command to gather information and health +data. As a result, it requires a version of `smartctl` capable of producing JSON output. +This functionality is available in **XCP-ng 8.3**, but not in **XCP-ng 8.2**. + ### `information`: + +This function returns information about all detected devices. The JSON can be quite big. + ``` xe host-call-plugin host-uuid= plugin=smartctl.py fn=information -{"/dev/sdf": {"power_on_time": {"hours": 9336}, "ata_version": {"minor_value": 94, "string": "ACS-4 T13/BSR INCITS 529 revision 5", "major_value": 2556}, "form_factor": {"ata_value": 3, "name": "2.5 inches"}, "firmware_version": "SVQ02B6Q", "wwn": {"oui": 9528, "naa": 5, "id": 65536604056}, "smart_status": {"passed": true}, "smartctl": {"build_info": "(local build)", "exit_status": 0, "argv": ["smartctl", "-j", "-a", "/dev/sdf"], "version": [7, 0], "svn_revision": "4883", "platform_info": "x86_64-linux-4.19.0+1"}, "temperature": {"current": 35}, "rotation_rate": 0, "interface_speed": {"current": {"sata_value": 3, "units_per_second": 60, "string": "6.0 Gb/s", "bits_per_unit": 100000000}, [...] } +{"/dev/nvme1:nvme": {"smart_status": {"nvme": {"value": 0}, "passed": true}, "nvme_controller_id": 0, "smartctl": {"build_info": "(local build)", "exit_status": 0, "argv": ["smartctl", "-j", "-a", "-d", "nvme", + "/dev/nvme1"], "version": [7, 0], "svn_revision": "4883", "platform_info": "x86_64-linux-4.19.0+1"}, "temperature": {"current": 32}, ... ``` ### `health`: + +This function returns health status per detected devices. + ``` xe host-call-plugin host-uuid= plugin=smartctl.py fn=health -{"/dev/sdf": "PASSED", "/dev/sdg": "PASSED", "/dev/sdd": "PASSED", "/dev/sde": "PASSED", "/dev/sdb": "PASSED", "/dev/sdc": "PASSED", "/dev/sda": "PASSED"} +{"/dev/nvme1:nvme": "PASSED", "/dev/sda:scsi": "PASSED", "/dev/nvme0:nvme": "PASSED", "/dev/bus/0:megaraid,1": "PASSED", "/dev/bus/0:megaraid,0": "PASSED"} ``` ## Netdata diff --git a/SOURCES/etc/xapi.d/plugins/smartctl.py b/SOURCES/etc/xapi.d/plugins/smartctl.py index 053f223..d4c55bb 100755 --- a/SOURCES/etc/xapi.d/plugins/smartctl.py +++ b/SOURCES/etc/xapi.d/plugins/smartctl.py @@ -10,36 +10,37 @@ from xcpngutils.operationlocker import OperationLocker @error_wrapped -def _list_disks(): - disks = [] +def _list_devices(): + devices = [] result = run_command(['smartctl', '--scan']) for line in result['stdout'].splitlines(): - if line.startswith('/dev/') and not line.startswith('/dev/bus/'): - disks.append(line.split()[0]) - return disks + devices.append({'name': line.split()[0], 'type': line.split()[2]}) + return devices @error_wrapped def get_information(session, args): results = {} with OperationLocker(): - disks = _list_disks() - for disk in disks: - cmd = run_command(["smartctl", "-j", "-a", disk], check=False) - results[disk] = json.loads(cmd['stdout']) + devices = _list_devices() + for device in devices: + cmd = run_command(["smartctl", "-j", "-a", "-d", device['type'], device['name']], check=False) + # We use the name + type as a key because we can have several megaraid with the same name. + # So we use the type to differenciate them. + results[device['name'] + ":" + device['type']] = json.loads(cmd['stdout']) return json.dumps(results) @error_wrapped def get_health(session, args): results = {} with OperationLocker(): - disks = _list_disks() - for disk in disks: - cmd = run_command(["smartctl", "-j", "-H", disk]) + devices = _list_devices() + for device in devices: + cmd = run_command(["smartctl", "-j", "-H", "-d", device['type'], device['name']], check=False) json_output = json.loads(cmd['stdout']) if json_output['smart_status']['passed']: - results[disk] = "PASSED" + results[device['name'] + ":" + device['type']] = "PASSED" else: - results[disk] = "FAILED" + results[device['name'] + ":" + device['type']] = "FAILED" return json.dumps(results) diff --git a/tests/smartctl_outputs/__init__.py b/tests/smartctl_outputs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/smartctl_outputs/smartctl_expected_output.py b/tests/smartctl_outputs/smartctl_expected_output.py new file mode 100644 index 0000000..192327d --- /dev/null +++ b/tests/smartctl_outputs/smartctl_expected_output.py @@ -0,0 +1,30 @@ +import json +from smartctl_outputs.smartctl_sda import INFO_SDA +from smartctl_outputs.smartctl_nvme1 import INFO_NVME1 +from smartctl_outputs.smartctl_megaraid0 import INFO_MEGARAID0 +from smartctl_outputs.smartctl_megaraid1 import INFO_MEGARAID1 + +# Parse the INFO JSON string for each devices +info_sda_dict = json.loads(INFO_SDA) +info_nvme1_dict = json.loads(INFO_NVME1) +info_megaraid0_dict = json.loads(INFO_MEGARAID0) +info_megaraid1_dict = json.loads(INFO_MEGARAID1) + +expected_info_dict = { + "/dev/sda:sat": info_sda_dict, + "/dev/nvme1:nvme": info_nvme1_dict, + "/dev/bus/0:megaraid,0": info_megaraid0_dict, + "/dev/bus/0:megaraid,1": info_megaraid1_dict, +} + +# Convert the result back to a JSON string +EXPECTED_INFO = json.dumps(expected_info_dict, indent=2) + +expected_health_dict = { + "/dev/sda:sat": "PASSED", + "/dev/nvme1:nvme": "PASSED", + "/dev/bus/0:megaraid,0": "PASSED", + "/dev/bus/0:megaraid,1": "PASSED", +} + +EXPECTED_HEALTH = json.dumps(expected_health_dict, indent=2) diff --git a/tests/smartctl_outputs/smartctl_megaraid0.py b/tests/smartctl_outputs/smartctl_megaraid0.py new file mode 100644 index 0000000..c41e5f3 --- /dev/null +++ b/tests/smartctl_outputs/smartctl_megaraid0.py @@ -0,0 +1,774 @@ +# Outputs are from real hardware: +# INFO -> smartctl -j -a -d megaraid,0 /dev/bus/0 | jq . +# HEALTH -> smartctl -j -H -d megaraid,0 /dev/bus/0 | jq . + +INFO_MEGARAID0 = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "megaraid,0", + "/dev/bus/0" + ], + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/bus/0", + "info_name": "/dev/bus/0 [megaraid_disk_00] [SAT]", + "type": "sat+megaraid,0", + "protocol": "ATA" + }, + "model_family": "Phison Driven SSDs", + "model_name": "KINGSTON SA400S37240G", + "serial_number": "50026B73815A6C11", + "wwn": { + "naa": 5, + "oui": 9911, + "id": 15055088657 + }, + "firmware_version": "SAI20102", + "user_capacity": { + "blocks": 468862128, + "bytes": 240057409536 + }, + "logical_block_size": 512, + "physical_block_size": 512, + "rotation_rate": 0, + "in_smartctl_database": true, + "ata_version": { + "string": "ACS-3 T13/2161-D revision 4", + "major_value": 2040, + "minor_value": 283 + }, + "sata_version": { + "string": "SATA 3.2", + "value": 255 + }, + "interface_speed": { + "max": { + "sata_value": 14, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 3, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + } + }, + "local_time": { + "time_t": 1738053887, + "asctime": "Tue Jan 28 09:44:47 2025 CET" + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 0, + "string": "was never started" + }, + "completion_seconds": 120 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 10 + } + }, + "capabilities": { + "values": [ + 17, + 2 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": false, + "offline_surface_scan_supported": false, + "self_tests_supported": true, + "conveyance_self_test_supported": false, + "selective_self_test_supported": false, + "attribute_autosave_enabled": false, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_smart_attributes": { + "revision": 1, + "table": [ + { + "id": 1, + "name": "Raw_Read_Error_Rate", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 100, + "string": "100" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 23872, + "string": "23872" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 10, + "string": "10" + } + }, + { + "id": 148, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 149, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_ ailed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 167, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 168, + "name": "SATA_Phy_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 18, + "string": "-O--C- ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 169, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 170, + "name": "Bad_Blk_Ct_Erl/Lat", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0/0" + } + }, + { + "id": 172, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 173, + "name": "MaxAvgErase_Ct", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 181, + "name": "Program_Fail_Cnt_Total", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 182, + "name": "Erase_Fail_Count_Total", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 187, + "name": "Reported_Uncorrect", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 192, + "name": "Unsafe_Shutdown_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 18, + "string": "-O--C- ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 8, + "string": "8" + } + }, + { + "id": 194, + "name": "Temperature_Celsius", + "value": 26, + "worst": 37, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 34, + "string": "-O---K ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 81606803482, + "string": "26 (Min/Max 19/37)" + } + }, + { + "id": 196, + "name": "Not_In_Use", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 199, + "name": "CRC_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 218, + "name": "CRC_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 231, + "name": "SSD_Life_Left", + "value": 97, + "worst": 97, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 97, + "string": "97" + } + }, + { + "id": 233, + "name": "Flash_Writes_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 1171, + "string": "1171" + } + }, + { + "id": 241, + "name": "Lifetime_Writes_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 2168, + "string": "2168" + } + }, + { + "id": 242, + "name": "Lifetime_Reads_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 1360, + "string": "1360" + } + }, + { + "id": 244, + "name": "Average_Erase_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 35, + "string": "35" + } + }, + { + "id": 245, + "name": "Max_Erase_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 118, + "string": "118" + } + }, + { + "id": 246, + "name": "Total_Erase_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 58523, + "string": "58523" + } + } + ] + }, + "power_on_time": { + "hours": 23872 + }, + "power_cycle_count": 10, + "temperature": { + "current": 26 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "table": [ + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 3618 + } + ], + "count": 1, + "error_count_total": 0, + "error_count_outdated": 0 + } + } +}""" + +HEALTH_MEGARAID0 = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-H", + "-d", + "megaraid,0", + "/dev/bus/0" + ], + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/bus/0", + "info_name": "/dev/bus/0 [megaraid_disk_00] [SAT]", + "type": "sat+megaraid,0", + "protocol": "ATA" + }, + "smart_status": { + "passed": true + } +}""" diff --git a/tests/smartctl_outputs/smartctl_megaraid1.py b/tests/smartctl_outputs/smartctl_megaraid1.py new file mode 100644 index 0000000..727a2ba --- /dev/null +++ b/tests/smartctl_outputs/smartctl_megaraid1.py @@ -0,0 +1,774 @@ +# Outputs are from real hardware: +# INFO -> smartctl -j -a -d megaraid,1 /dev/bus/0 | jq . +# HEALTH -> smartctl -j -H -d megaraid,1 /dev/bus/0 | jq . + +INFO_MEGARAID1 = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "megaraid,1", + "/dev/bus/0" + ], + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/bus/0", + "info_name": "/dev/bus/0 [megaraid_disk_01] [SAT]", + "type": "sat+megaraid,1", + "protocol": "ATA" + }, + "model_family": "Phison Driven SSDs", + "model_name": "KINGSTON SA400S37240G", + "serial_number": "50026B73815A7309", + "wwn": { + "naa": 5, + "oui": 9911, + "id": 15055090441 + }, + "firmware_version": "SAI20102", + "user_capacity": { + "blocks": 468862128, + "bytes": 240057409536 + }, + "logical_block_size": 512, + "physical_block_size": 512, + "rotation_rate": 0, + "in_smartctl_database": true, + "ata_version": { + "string": "ACS-3 T13/2161-D revision 4", + "major_value": 2040, + "minor_value": 283 + }, + "sata_version": { + "string": "SATA 3.2", + "value": 255 + }, + "interface_speed": { + "max": { + "sata_value": 14, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + }, + "current": { + "sata_value": 3, + "string": "6.0 Gb/s", + "units_per_second": 60, + "bits_per_unit": 100000000 + } + }, + "local_time": { + "time_t": 1738053894, + "asctime": "Tue Jan 28 09:44:54 2025 CET" + }, + "smart_status": { + "passed": true + }, + "ata_smart_data": { + "offline_data_collection": { + "status": { + "value": 0, + "string": "was never started" + }, + "completion_seconds": 120 + }, + "self_test": { + "status": { + "value": 0, + "string": "completed without error", + "passed": true + }, + "polling_minutes": { + "short": 2, + "extended": 10 + } + }, + "capabilities": { + "values": [ + 17, + 2 + ], + "exec_offline_immediate_supported": true, + "offline_is_aborted_upon_new_cmd": false, + "offline_surface_scan_supported": false, + "self_tests_supported": true, + "conveyance_self_test_supported": false, + "selective_self_test_supported": false, + "attribute_autosave_enabled": false, + "error_logging_supported": true, + "gp_logging_supported": true + } + }, + "ata_smart_attributes": { + "revision": 1, + "table": [ + { + "id": 1, + "name": "Raw_Read_Error_Rate", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 100, + "string": "100" + } + }, + { + "id": 9, + "name": "Power_On_Hours", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 23892, + "string": "23892" + } + }, + { + "id": 12, + "name": "Power_Cycle_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 10, + "string": "10" + } + }, + { + "id": 148, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 149, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 167, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 168, + "name": "SATA_Phy_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 18, + "string": "-O--C- ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 169, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 170, + "name": "Bad_Blk_Ct_Erl/Lat", + "value": 100, + "worst": 100, + "thresh": 10, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0/0" + } + }, + { + "id": 172, + "name": "Unknown_Attribute", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 173, + "name": "MaxAvgErase_Ct", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 181, + "name": "Program_Fail_Cnt_Total", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 182, + "name": "Erase_Fail_Count_Total", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 187, + "name": "Reported_Uncorrect", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 192, + "name": "Unsafe_Shutdown_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 18, + "string": "-O--C- ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": false + }, + "raw": { + "value": 8, + "string": "8" + } + }, + { + "id": 194, + "name": "Temperature_Celsius", + "value": 23, + "worst": 36, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 34, + "string": "-O---K ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": true + }, + "raw": { + "value": 77311770647, + "string": "23 (Min/Max 18/36)" + } + }, + { + "id": 196, + "name": "Not_In_Use", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 199, + "name": "CRC_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 218, + "name": "CRC_Error_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 0, + "string": "0" + } + }, + { + "id": 231, + "name": "SSD_Life_Left", + "value": 97, + "worst": 97, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 97, + "string": "97" + } + }, + { + "id": 233, + "name": "Flash_Writes_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 1173, + "string": "1173" + } + }, + { + "id": 241, + "name": "Lifetime_Writes_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 2168, + "string": "2168" + } + }, + { + "id": 242, + "name": "Lifetime_Reads_GiB", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 50, + "string": "-O--CK ", + "prefailure": false, + "updated_online": true, + "performance": false, + "error_rate": false, + "event_count": true, + "auto_keep": true + }, + "raw": { + "value": 1630, + "string": "1630" + } + }, + { + "id": 244, + "name": "Average_Erase_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 35, + "string": "35" + } + }, + { + "id": 245, + "name": "Max_Erase_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 124, + "string": "124" + } + }, + { + "id": 246, + "name": "Total_Erase_Count", + "value": 100, + "worst": 100, + "thresh": 0, + "when_failed": "", + "flags": { + "value": 0, + "string": "------ ", + "prefailure": false, + "updated_online": false, + "performance": false, + "error_rate": false, + "event_count": false, + "auto_keep": false + }, + "raw": { + "value": 58560, + "string": "58560" + } + } + ] + }, + "power_on_time": { + "hours": 23892 + }, + "power_cycle_count": 10, + "temperature": { + "current": 23 + }, + "ata_smart_error_log": { + "summary": { + "revision": 1, + "count": 0 + } + }, + "ata_smart_self_test_log": { + "standard": { + "revision": 1, + "table": [ + { + "type": { + "value": 1, + "string": "Short offline" + }, + "status": { + "value": 0, + "string": "Completed without error", + "passed": true + }, + "lifetime_hours": 3612 + } + ], + "count": 1, + "error_count_total": 0, + "error_count_outdated": 0 + } + } +}""" + +HEALTH_MEGARAID1 = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-H", + "-d", + "megaraid,1", + "/dev/bus/0" + ], + "messages": [ + { + "string": "Warning: This result is based on an Attribute check.", + "severity": "warning" + } + ], + "exit_status": 4 + }, + "device": { + "name": "/dev/bus/0", + "info_name": "/dev/bus/0 [megaraid_disk_01] [SAT]", + "type": "sat+megaraid,1", + "protocol": "ATA" + }, + "smart_status": { + "passed": true + } +}""" diff --git a/tests/smartctl_outputs/smartctl_nvme1.py b/tests/smartctl_outputs/smartctl_nvme1.py new file mode 100644 index 0000000..ea1338c --- /dev/null +++ b/tests/smartctl_outputs/smartctl_nvme1.py @@ -0,0 +1,137 @@ +# Outputs are from real hardware: +# INFO -> smartctl -j -a -d nvme /dev/nvme1 | jq . +# HEALTH -> smartctl -j -H -d nvme /dev/nvme1 | jq . +INFO_NVME1 = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "-d", + "nvme", + "/dev/nvme1" + ], + "exit_status": 0 + }, + "device": { + "name": "/dev/nvme1", + "info_name": "/dev/nvme1", + "type": "nvme", + "protocol": "NVMe" + }, + "model_name": "INTEL SSDPED1D280GA", + "serial_number": "PHMB7466015Y280CGN", + "firmware_version": "E2010325", + "nvme_pci_vendor": { + "id": 32902, + "subsystem_id": 32902 + }, + "nvme_ieee_oui_identifier": 6083300, + "nvme_controller_id": 0, + "nvme_number_of_namespaces": 1, + "nvme_namespaces": [ + { + "id": 1, + "size": { + "blocks": 547002288, + "bytes": 280065171456 + }, + "capacity": { + "blocks": 547002288, + "bytes": 280065171456 + }, + "utilization": { + "blocks": 547002288, + "bytes": 280065171456 + }, + "formatted_lba_size": 512 + } + ], + "user_capacity": { + "blocks": 547002288, + "bytes": 280065171456 + }, + "logical_block_size": 512, + "local_time": { + "time_t": 1738053854, + "asctime": "Tue Jan 28 09:44:14 2025 CET" + }, + "smart_status": { + "passed": true, + "nvme": { + "value": 0 + } + }, + "nvme_smart_health_information_log": { + "critical_warning": 0, + "temperature": 31, + "available_spare": 100, + "available_spare_threshold": 0, + "percentage_used": 0, + "data_units_read": 67343004, + "data_units_written": 65516455, + "host_reads": 3265697818, + "host_writes": 2714560339, + "controller_busy_time": 628, + "power_cycles": 26, + "power_on_hours": 42772, + "unsafe_shutdowns": 14, + "media_errors": 0, + "num_err_log_entries": 4 + }, + "temperature": { + "current": 31 + }, + "power_cycle_count": 26, + "power_on_time": { + "hours": 42772 + } +}""" + +HEALTH_NVME1 = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-H", + "-d", + "nvme", + "/dev/nvme1" + ], + "exit_status": 0 + }, + "device": { + "name": "/dev/nvme1", + "info_name": "/dev/nvme1", + "type": "nvme", + "protocol": "NVMe" + }, + "smart_status": { + "passed": true, + "nvme": { + "value": 0 + } + } +}""" diff --git a/tests/smartctl_outputs/smartctl_sda.py b/tests/smartctl_outputs/smartctl_sda.py new file mode 100644 index 0000000..7427c9b --- /dev/null +++ b/tests/smartctl_outputs/smartctl_sda.py @@ -0,0 +1,56 @@ +# Unlike other logs this one has been truncated. +# But it corresponds to reality +INFO_SDA = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-a", + "/dev/sda" + ], + "exit_status": 0 + } +}""" + +HEALTH_SDA = """{ + "json_format_version": [ + 1, + 0 + ], + "smartctl": { + "version": [ + 7, + 0 + ], + "svn_revision": "4883", + "platform_info": "x86_64-linux-4.19.0+1", + "build_info": "(local build)", + "argv": [ + "smartctl", + "-j", + "-H", + "/dev/sda" + ], + "exit_status": 0 + }, + "device": { + "name": "/dev/sda", + "info_name": "/dev/sda [SAT]", + "type": "sat", + "protocol": "ATA" + }, + "smart_status": { + "passed": true + } +}""" diff --git a/tests/test_smartctl.py b/tests/test_smartctl.py index 92209e8..ccc2bf4 100644 --- a/tests/test_smartctl.py +++ b/tests/test_smartctl.py @@ -4,101 +4,50 @@ import XenAPIPlugin from smartctl import get_information, get_health - -SMARTCTL_HEALTH = """{ - "json_format_version": [ - 1, - 0 - ], - "smartctl": { - "version": [ - 7, - 0 - ], - "svn_revision": "4883", - "platform_info": "x86_64-linux-4.19.0+1", - "build_info": "(local build)", - "argv": [ - "smartctl", - "-j", - "-H", - "/dev/sda" - ], - "exit_status": 0 - }, - "device": { - "name": "/dev/sda", - "info_name": "/dev/sda [SAT]", - "type": "sat", - "protocol": "ATA" - }, - "smart_status": { - "passed": true - } -}""" - -SMARTCTL_HEALTH_EXPECTED = """{ - "/dev/sda": "PASSED" -}""" - -SMARTCTL_INFO = """{ - "json_format_version": [ - 1, - 0 - ], - "smartctl": { - "version": [ - 7, - 0 - ], - "svn_revision": "4883", - "platform_info": "x86_64-linux-4.19.0+1", - "build_info": "(local build)", - "argv": [ - "smartctl", - "-j", - "-a", - "/dev/sda" - ], - "exit_status": 0 - } -}""" - -SMARTCTL_INFO_EXPECTED = """{ - "/dev/sda": { - "json_format_version": [1, 0], - "smartctl": { - "argv": ["smartctl", "-j", "-a", "/dev/sda"], - "build_info": "(local build)", - "exit_status": 0, - "platform_info": "x86_64-linux-4.19.0+1", - "svn_revision": "4883", - "version": [7, 0] - } - } -}""" +from smartctl_outputs.smartctl_sda import INFO_SDA, HEALTH_SDA +from smartctl_outputs.smartctl_nvme1 import INFO_NVME1, HEALTH_NVME1 +from smartctl_outputs.smartctl_megaraid0 import INFO_MEGARAID0, HEALTH_MEGARAID0 +from smartctl_outputs.smartctl_megaraid1 import INFO_MEGARAID1, HEALTH_MEGARAID1 +from smartctl_outputs.smartctl_expected_output import EXPECTED_INFO, EXPECTED_HEALTH + +LIST_OF_DEVICES = [ + {"name": "/dev/sda", "type": "sat"}, + {"name": "/dev/nvme1", "type": "nvme"}, + {"name": "/dev/bus/0", "type": "megaraid,0"}, + {"name": "/dev/bus/0", "type": "megaraid,1"}, +] @mock.patch("smartctl.run_command", autospec=True) -@mock.patch("smartctl._list_disks", autospec=True) +@mock.patch("smartctl._list_devices", autospec=True) class TestSmartctl: - def test_smartctl_error(self, _list_disks, run_command, fs): - _list_disks.side_effect = Exception("Error!") + def test_smartctl_error(self, _list_devices, run_command, fs): + _list_devices.side_effect = Exception("Error!") with pytest.raises(XenAPIPlugin.Failure) as e: get_health(None, None) assert e.value.params[0] == '-1' assert e.value.params[1] == 'Error!' - def test_smartctl_information(self, _list_disks, run_command, fs): - _list_disks.return_value = ["/dev/sda"] - run_command.return_value = {"stdout": SMARTCTL_INFO} + def test_smartctl_information(self, _list_devices, run_command, fs): + _list_devices.return_value = LIST_OF_DEVICES + run_command.side_effect = [ + {"stdout": INFO_SDA}, + {"stdout": INFO_NVME1}, + {"stdout": INFO_MEGARAID0}, + {"stdout": INFO_MEGARAID1}, + ] res = get_information(None, None) - assert json.loads(res) == json.loads(SMARTCTL_INFO_EXPECTED) + assert json.loads(res) == json.loads(EXPECTED_INFO) - def test_smartctl_health(self, _list_disks, run_command, fs): - _list_disks.return_value = ["/dev/sda"] - run_command.return_value = {"stdout": SMARTCTL_HEALTH} + def test_smartctl_health(self, _list_devices, run_command, fs): + _list_devices.return_value = LIST_OF_DEVICES + run_command.side_effect = [ + {"stdout": HEALTH_SDA}, + {"stdout": HEALTH_NVME1}, + {"stdout": HEALTH_MEGARAID0}, + {"stdout": HEALTH_MEGARAID1}, + ] res = get_health(None, None) - assert json.loads(res) == json.loads(SMARTCTL_HEALTH_EXPECTED) + assert json.loads(res) == json.loads(EXPECTED_HEALTH)