Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-17120 test: replace pcmd with run_remote #15915

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
26 changes: 12 additions & 14 deletions src/tests/ftest/control/dmg_storage_scan_scm.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""
(C) Copyright 2020-2022 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os

from control_test_base import ControlTestBase
from general_utils import pcmd, run_pcmd
from run_utils import run_remote


class DmgStorageScanSCMTest(ControlTestBase):
Expand Down Expand Up @@ -42,21 +43,18 @@ def verify_storage_scan_scm(self, storage_dict):
for scm_namespace in storage_dict["scm_namespaces"]:
# Verify that all namespaces exist under /dev.
pmem_name = scm_namespace["blockdev"]
lscmd = "{} {}".format("ls", os.path.join("/dev", pmem_name))
# rc is a dictionary where return code is the key.
rc = pcmd(hosts=self.hostlist_servers, command=lscmd)

if 0 not in rc:
errors.append("{} didn't exist under /dev!".format(pmem_name))
ls_cmd = f"ls {os.path.join('/dev', pmem_name)}"
if not run_remote(self.log, self.hostlist_servers, ls_cmd).passed:
errors.append(f"{pmem_name} didn't exist under /dev!")

# Verify the Socket ID.
numa_node_path = "/sys/class/block/{}/device/numa_node".format(
pmem_name)
command = "cat {}".format(numa_node_path)
out_list = run_pcmd(hosts=self.hostlist_servers, command=command)

# This one is in str.
expected_numa_node = out_list[0]["stdout"][0]
numa_node_path = os.path.join(
os.sep, "sys", "class", "block", pmem_name, "device", "numa_node")
command = f"cat {numa_node_path}"
result = run_remote(self.log, self.hostlist_servers, command)
if not result.passed:
errors.append(f"{command} failed on {result.failed_hosts}")
expected_numa_node = result.joined_stdout
actual_numa_node = str(scm_namespace["numa_node"])

if expected_numa_node != actual_numa_node:
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/control/log_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def test_control_log_entry(self):
self.log_step('Restart server')
expected = [r'Starting I/O Engine instance', r'Listening on']
with self.verify_journalctl(expected):
self.server_managers[0].restart(list(kill_host), wait=True)
self.server_managers[0].restart(kill_host, wait=True)

self.log_step('Reintegrate all ranks and wait for rebuild')
expected = [fr'rank {rank}.*start reintegration' for rank in kill_ranks] \
Expand Down
38 changes: 19 additions & 19 deletions src/tests/ftest/control/ssd_socket.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""
(C) Copyright 2020-2022 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os
from textwrap import wrap

from ClusterShell.NodeSet import NodeSet
from control_test_base import ControlTestBase
from general_utils import pcmd, run_pcmd
from run_utils import run_remote


class SSDSocketTest(ControlTestBase):
Expand Down Expand Up @@ -36,10 +38,10 @@ def debug_numa_node(self, pci_addr_heads):
for pci_addr_head in pci_addr_heads:
self.log.debug(
"----- Search PCI Addr Head %s in /sys -----", pci_addr_head)
run_pcmd(
hosts=self.hostlist_servers,
command="find /sys -name \"{}\"".format(pci_addr_head),
verbose=True)
run_remote(
self.log,
self.hostlist_servers,
f'find /sys -name "{pci_addr_head}"')

# Another way to obtain the Socket ID is to use hwloc-ls --whole-io
# --verbose. It contains something like:
Expand All @@ -55,9 +57,10 @@ def debug_numa_node(self, pci_addr_heads):
# much more cumbersome than reading the numa_node, so it's called here
# for mainly debugging purpose.
self.log.debug("----- Show PCI Address in hwloc-ls -----")
pcmd(
hosts=self.hostlist_servers,
command="hwloc-ls --whole-io --verbose")
run_remote(
self.log,
self.hostlist_servers,
"hwloc-ls --whole-io --verbose")

def verify_ssd_sockets(self, storage_dict):
"""Verify SSD sockets.
Expand Down Expand Up @@ -98,17 +101,14 @@ def verify_ssd_sockets(self, storage_dict):
pci_addr_heads.append(pci_addr_head)

# Call cat on the server host, not necessarily the local test host.
results = run_pcmd(
hosts=self.hostlist_servers[0:1], command="cat {}".format(numa_node_path))

# Obtain the numa_node content.
fs_socket_id = ""
for result in results:
# Test that the content is expected.
fs_socket_id = result["stdout"][-1]
if fs_socket_id != str(cmd_socket_id):
errors.append(
"Unexpected socket ID! Cmd: {}; FS: {}".format(cmd_socket_id, fs_socket_id))
command = f"cat {numa_node_path}"
result = run_remote(
self.log, NodeSet(self.hostlist_servers[0]), command)
if not result.passed:
errors.append(f"{command} failed on {result.failed_hosts}")
fs_socket_id = result.joined_stdout
if fs_socket_id != str(cmd_socket_id):
errors.append(f"Unexpected socket ID! Cmd: {cmd_socket_id}; FS: {fs_socket_id}")

if errors:
# Since we're dealing with system files and we don't have access to
Expand Down
17 changes: 8 additions & 9 deletions src/tests/ftest/control/super_block_versioning.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
"""
(C) Copyright 2020-2023 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""


import os

from apricot import TestWithServers
from general_utils import check_file_exists, pcmd
from command_utils import command_as_user
from general_utils import check_file_exists
from run_utils import run_remote


class SuperBlockVersioning(TestWithServers):
Expand Down Expand Up @@ -39,9 +40,7 @@ def test_super_block_version_basic(self):
self.fail("{}: {} not found".format(check_result[1], fname))

# Make sure that 'version' is in the file, run task to check
cmd = "sudo cat {} | grep -F \"version\"".format(fname)
result = pcmd(self.hostlist_servers, cmd, timeout=20)

# Determine if the command completed successfully across all the hosts
if len(result) > 1 or 0 not in result:
self.fail("Was not able to find version in {} file".format(fname))
cmd = command_as_user(f'cat {fname} | grep -F "version"', "root")
result = run_remote(self.log, self.hostlist_servers, cmd, timeout=20)
if not result.passed:
self.fail(f"Was not able to find version in {fname} file")
53 changes: 18 additions & 35 deletions src/tests/ftest/control/version.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
'''
(C) Copyright 2018-2023 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
'''
import json
import re

from apricot import TestWithServers
from general_utils import append_error, report_errors, run_pcmd
from ClusterShell.NodeSet import NodeSet
from general_utils import append_error, report_errors
from run_utils import run_remote
from server_utils_base import DaosServerCommandRunner


Expand Down Expand Up @@ -37,31 +40,17 @@ def test_version(self):
"""
# Get RPM version.
rpm_command = "rpm -qa | grep daos-server"
output = run_pcmd(hosts=self.hostlist_servers, command=rpm_command)
self.log.debug("RPM output = %s", output)
rc = output[0]["exit_status"]
stdout = output[0]["stdout"]
if rc != 0:
report_errors(self, ["DAOS RPMs not properly installed: rc={}".format(rc)])
rpm_version = None
for rpm in stdout:
result = re.findall(r"daos-server-[tests-|tests_openmpi-]*([\d.]+)", rpm)
if result:
rpm_version = result[0]
break
if not result:
report_errors(self, ["RPM version could not be defined"])
result = run_remote(self.log, self.hostlist_servers, rpm_command)
if not result.passed:
self.fail("Failed to list daos-server RPMs")
if not result.homogeneous:
self.fail("Non-homogenous daos-server RPMs")
match = re.findall(r"daos-server-[tests-|tests_openmpi-]*([\d.]+)", result.joined_stdout)
if not match:
self.fail("Failed to get version from daos-server RPMs")
rpm_version = match[0]
self.log.info("RPM version = %s", rpm_version)

# Remove configuration files
cleanup_cmds = [
"sudo find /etc/daos/certs -type f -delete -print",
"sudo rm -fv /etc/daos/daos_server.yml /etc/daos/daos_control.yml"
" /etc/daos/daos_agent.yml",
]
for cmd in cleanup_cmds:
run_pcmd(hosts=self.hostlist_servers, command=cmd)

# Get dmg version.
dmg_version = self.get_dmg_command().version()["response"]["version"]
self.log.info("dmg version = %s", dmg_version)
Expand All @@ -75,17 +64,11 @@ def test_version(self):
# Get daos_agent version.
daos_agent_version = None
daos_agent_cmd = "daos_agent --json version"
output = run_pcmd(hosts=self.hostlist_servers, command=daos_agent_cmd)
self.log.debug("DAOS Agent output = %s", output)
rc = output[0]["exit_status"]
stdout = output[0]["stdout"]
if rc != 0:
msg = "DAOS Agent not properly installed: rc={}".format(rc)
append_error(errors, msg, stdout)
else:
self.log.info("DAOS Agent stdout = %s", "".join(stdout))
daos_agent_version = json.loads("".join(stdout))["response"]["version"]
self.log.info("daos_agent version = %s", daos_agent_version)
result = run_remote(self.log, NodeSet(self.hostlist_servers[0]), daos_agent_cmd)
if not result.passed:
self.fail("Failed to get daos_agent version")
daos_agent_version = json.loads(result.joined_stdout)["response"]["version"]
self.log.info("daos_agent version = %s", daos_agent_version)

# Get daos_server version
daos_server_cmd = DaosServerCommandRunner(path=self.bin)
Expand Down
6 changes: 4 additions & 2 deletions src/tests/ftest/daos_racer/parallel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/python3
"""
(C) Copyright 2021-2022 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -52,7 +53,8 @@ def test_daos_racer_parallel(self):
job_manager.run()

except CommandFailure as error:
self.log.error("DAOS Racer Failed: %s", str(error))
self.fail("Test was expected to pass but it failed.\n")
msg = f"daos_racer failed: {error}"
self.log.error(msg)
self.fail(msg)

self.log.info("Test passed!")
5 changes: 3 additions & 2 deletions src/tests/ftest/deployment/agent_failure.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2022-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -241,7 +242,7 @@ def test_agent_failure_isolation(self):
# 6. On the killed client, verify journalctl shows the log that the agent is
# stopped.
results = get_journalctl(
hosts=[agent_host_kill], since=since, until=until,
hosts=NodeSet(agent_host_kill), since=since, until=until,
journalctl_type="daos_agent")
self.log.info("journalctl results (kill) = %s", results)
if "shutting down" not in results[0]["data"]:
Expand All @@ -252,7 +253,7 @@ def test_agent_failure_isolation(self):
# 7. On the other client where agent is still running, verify that the journalctl
# in the previous step doesn't show that the agent is stopped.
results = get_journalctl(
hosts=[agent_host_keep], since=since, until=until,
hosts=NodeSet(agent_host_keep), since=since, until=until,
journalctl_type="daos_agent")
self.log.info("journalctl results (keep) = %s", results)
if "shutting down" in results[0]["data"]:
Expand Down
16 changes: 8 additions & 8 deletions src/tests/ftest/deployment/network_failure.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
from ClusterShell.NodeSet import NodeSet
from command_utils_base import CommandFailure
from dmg_utils import check_system_query_status
from general_utils import report_errors, run_pcmd
from general_utils import report_errors
from ior_test_base import IorTestBase
from ior_utils import IorCommand
from job_manager_utils import get_job_manager
from network_utils import NetworkInterface
from run_utils import run_remote


class NetworkFailureTest(IorTestBase):
Expand Down Expand Up @@ -98,16 +99,15 @@ def create_ip_to_host(self):

"""
command = "hostname -i"
results = run_pcmd(hosts=self.hostlist_servers, command=command)
self.log.info("hostname -i results = %s", results)
result = run_remote(self.log, self.hostlist_servers, command)
if not result.passed:
self.fail("Failed to get hostname on servers")

ip_to_host = {}
for result in results:
ips_str = result["stdout"][0]
for hosts, stdout in result.all_stdout.items():
# There may be multiple IP addresses for one host.
ip_addresses = ips_str.split()
for ip_address in ip_addresses:
ip_to_host[ip_address] = NodeSet(str(result["hosts"]))
for ip_address in stdout.split():
ip_to_host[ip_address] = NodeSet(hosts)

return ip_to_host

Expand Down
28 changes: 13 additions & 15 deletions src/tests/ftest/dfuse/posix_stat.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""
(C) Copyright 2018-2023 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""

from general_utils import get_remote_file_size, run_pcmd
from general_utils import get_remote_file_size
from ior_test_base import IorTestBase
from run_utils import run_remote


class POSIXStatTest(IorTestBase):
Expand Down Expand Up @@ -55,31 +57,27 @@ def test_stat_parameters(self):
create_cont=False, test_file_suffix=test_file_suffix)

# Get current epoch.
current_epoch = -1
output = run_pcmd(hosts=self.hostlist_clients, command="date +%s")
stdout = output[0]["stdout"]
self.log.info("date stdout = %s", stdout)
current_epoch = stdout[-1]
result = run_remote(self.log, self.hostlist_clients, "date +%s")
if not result.passed:
self.fail("Failed to get date on clients")
current_epoch = int(result.output[0].stdout[-1])

# Get epoch of the created file. (technically %Z is for last status
# change. %W is file birth, but it returns 0.)
creation_epoch = -1
# As in date command, run stat command in the client node.
stat_command = "stat -c%Z {}".format(self.ior_cmd.test_file.value)
output = run_pcmd(hosts=self.hostlist_clients, command=stat_command)
stdout = output[0]["stdout"]
self.log.info("stat stdout = %s", stdout)
creation_epoch = stdout[-1]
result = run_remote(self.log, self.hostlist_clients, stat_command)
if not result.passed:
self.fail(f"{stat_command} failed on clients")
creation_epoch = int(result.output[0].stdout[-1])

# Calculate the epoch difference between the creation time and the
# value in the file metadata. They're usually 2 sec apart.
creation_epoch_int = int(creation_epoch)
current_epoch_int = int(current_epoch)
diff_epoch = creation_epoch_int - current_epoch_int
diff_epoch = creation_epoch - current_epoch
if diff_epoch > 10:
msg = "Unexpected creation time! Expected = {}; Actual = {}"
error_list.append(
msg.format(current_epoch_int, creation_epoch_int))
msg.format(current_epoch, creation_epoch))

# 2. Verify file size.
# Get file size.
Expand Down
Loading