Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16072 test: Support non-root user client commands #14661

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/tests/ftest/harness/advanced.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2021-2023 Intel Corporation.
(C) Copyright 2021-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -102,11 +102,11 @@ def test_launch_failures(self):
host = NodeSet(choice(self.server_managers[0].hosts)) # nosec
self.log.info("Creating launch.py failure trigger files on %s", host)
failure_trigger = "00_trigger-launch-failure_00"
failure_trigger_dir = os.path.join(self.base_test_dir, failure_trigger)
failure_trigger_dir = os.path.join(self.test_env.log_dir, failure_trigger)
failure_trigger_files = [
os.path.join(self.base_test_dir, "{}_local.yaml".format(failure_trigger)),
os.path.join(self.test_env.log_dir, "{}_local.yaml".format(failure_trigger)),
os.path.join(os.sep, "etc", "daos", "daos_{}.yml".format(failure_trigger)),
os.path.join(self.base_test_dir, "{}.log".format(failure_trigger)),
os.path.join(self.test_env.log_dir, "{}.log".format(failure_trigger)),
os.path.join(failure_trigger_dir, "{}.log".format(failure_trigger)),
os.path.join(os.sep, "tmp", "daos_dump_{}.txt".format(failure_trigger)),
os.path.join(self.tmp, "valgrind_{}".format(failure_trigger)),
Expand Down
53 changes: 43 additions & 10 deletions src/tests/ftest/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def _run(self, args):
else:
set_test_environment(
logger, test_env, args.test_servers, args.test_clients, args.provider,
args.insecure_mode, self.details)
args.insecure_mode, self.details, args.agent_user, args.test_log_dir)
except TestEnvironmentException as error:
message = f"Error setting up test environment: {str(error)}"
return self.get_exit_status(1, message, "Setup", sys.exc_info())
Expand Down Expand Up @@ -318,12 +318,13 @@ def _run(self, args):
return self.get_exit_status(0, "Listing tests complete")

# Setup the fuse configuration
try:
setup_fuse_config(logger, args.test_servers | args.test_clients)
except LaunchException:
# Warn but don't fail
message = "Issue detected setting up the fuse configuration"
setup_result.warn_test(logger, "Setup", message, sys.exc_info())
if args.fuse_setup:
try:
setup_fuse_config(logger, args.test_servers | args.test_clients)
except LaunchException:
# Warn but don't fail
message = "Issue detected setting up the fuse configuration"
setup_result.warn_test(logger, "Setup", message, sys.exc_info())

# Get the core file pattern information
core_files = {}
Expand All @@ -347,8 +348,8 @@ def _run(self, args):
group.update_test_yaml(
logger, args.scm_size, args.scm_mount, args.extra_yaml,
args.timeout_multiplier, args.override, args.verbose, args.include_localhost)
except (RunException, YamlException) as e:
message = "Error modifying the test yaml files: {}".format(e)
except (RunException, YamlException) as error:
message = f"Error modifying the test yaml files: {str(error)}"
status |= self.get_exit_status(1, message, "Setup", sys.exc_info())
except StorageException:
message = "Error detecting storage information for test yaml files"
Expand Down Expand Up @@ -529,6 +530,12 @@ def main():
"-a", "--archive",
action="store_true",
help="archive host log files in the avocado job-results directory")
parser.add_argument(
"-au", "--agent_user",
action="store",
default=None,
type=str,
help="user account to use when running the daos_agent")
parser.add_argument(
"-c", "--clear_mounts",
action="append",
Expand All @@ -551,6 +558,10 @@ def main():
"--failfast",
action="store_true",
help="stop the test suite after the first failure")
parser.add_argument(
"-fs", "--fuse_setup",
action="store_true",
help="enable setting up fuse configuration files")
parser.add_argument(
"-i", "--include_localhost",
action="store_true",
Expand All @@ -573,7 +584,7 @@ def main():
help="modify the test yaml files but do not run the tests")
parser.add_argument(
"-mo", "--mode",
choices=['normal', 'manual', 'ci'],
choices=['normal', 'manual', 'ci', 'agent_user'],
default='normal',
help="provide the mode of test to be run under. Default is normal, "
"in which the final return code of launch.py is still zero if "
Expand Down Expand Up @@ -670,6 +681,12 @@ def main():
default=NodeSet(),
help="comma-separated list of hosts to use as replacement values for "
"client placeholders in each test's yaml file")
parser.add_argument(
"-tld", "--test_log_dir",
action="store",
default=None,
type=str,
help="test log directory base path")
parser.add_argument(
"-th", "--logs_threshold",
action="store",
Expand Down Expand Up @@ -733,10 +750,26 @@ def main():
args.slurm_install = True
args.slurm_setup = True
args.user_create = True
args.fuse_setup = True
args.clear_mounts.append("/mnt/daos")
args.clear_mounts.append("/mnt/daos0")
args.clear_mounts.append("/mnt/daos1")

elif args.mode == "agent_user":
if args.agent_user is None:
# Run the agent with the current user by default
args.agent_user = getpass.getuser()
if os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir) is None:
# Use a user-specific test log dir by default
args.test_log_dir = os.path.join(
os.sep, "var", "tmp", f"daos_testing_{args.agent_user}")
args.process_cores = False
args.logs_threshold = None
args.slurm_install = False
args.slurm_setup = False
args.user_create = False
args.fuse_setup = False

# Setup the Launch object
launch = Launch(args.name, args.mode, args.slurm_install, args.slurm_setup)

Expand Down
20 changes: 10 additions & 10 deletions src/tests/ftest/util/agent_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""
(C) Copyright 2019-2023 Intel Corporation.
(C) Copyright 2019-2024 Intel Corporation.

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os
import re
import socket
from getpass import getuser

from agent_utils_params import DaosAgentTransportCredentials, DaosAgentYamlParameters
from ClusterShell.NodeSet import NodeSet
Expand Down Expand Up @@ -65,7 +64,7 @@ def get_agent_command(group, cert_dir, bin_dir, config_file, config_temp=None):
class DaosAgentCommand(YamlCommand):
"""Defines an object representing a daos_agent command."""

def __init__(self, path="", yaml_cfg=None, timeout=15):
def __init__(self, path="", yaml_cfg=None, timeout=15, run_user=None):
"""Create a daos_agent command object.

Args:
Expand All @@ -74,10 +73,12 @@ def __init__(self, path="", yaml_cfg=None, timeout=15):
parameters. Defaults to None.
timeout (int, optional): number of seconds to wait for patterns to
appear in the subprocess output. Defaults to 60 seconds.
run_user (str, optional): user to run as. Defaults to None, which uses the current user.
"""
super().__init__(
"/run/agent_config/*", "daos_agent", path, yaml_cfg, timeout)
"/run/agent_config/*", "daos_agent", path, yaml_cfg, timeout, run_user)
self.pattern = "listening on "
self.run_user = run_user

# If specified use the configuration file from the YamlParameters object
default_yaml_file = None
Expand Down Expand Up @@ -206,7 +207,7 @@ class DaosAgentManager(SubprocessManager):
"""Manages the daos_agent execution on one or more hosts."""

def __init__(self, group, bin_dir, cert_dir, config_file, config_temp=None,
manager="Orterun", outputdir=None):
manager="Orterun", outputdir=None, run_user=None):
"""Initialize a DaosAgentManager object.

Args:
Expand All @@ -223,8 +224,8 @@ def __init__(self, group, bin_dir, cert_dir, config_file, config_temp=None,
outputdir (str, optional): path to avocado test outputdir. Defaults
to None.
"""
agent_command = get_agent_command(
group, cert_dir, bin_dir, config_file, config_temp)
agent_command = get_agent_command(group, cert_dir, bin_dir, config_file, config_temp)
agent_command.run_user = 'root'
super().__init__(agent_command, manager)

# Set the correct certificate file ownership
Expand Down Expand Up @@ -265,11 +266,10 @@ def start(self):
self._hosts, self.manager.command)

# Copy certificates
self.manager.job.copy_certificates(
get_log_file("daosCA/certs"), self._hosts)
self.manager.job.copy_certificates(get_log_file("daosCA/certs"), self._hosts)

# Verify the socket directory exists when using a non-systemctl manager
self.verify_socket_directory(getuser())
self.verify_socket_directory(self.manager.job.certificate_owner)

super().start()

Expand Down
23 changes: 9 additions & 14 deletions src/tests/ftest/util/apricot/apricot/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def __init__(self, *args, **kwargs):
self.test_id = self.get_test_name()

# Define a test unique temporary directory
self.base_test_dir = os.getenv("DAOS_TEST_LOG_DIR", "/tmp")
self.test_dir = os.path.join(self.base_test_dir, self.test_id)
self.test_env = TestEnvironment()
self.test_dir = os.path.join(self.test_env.log_dir, self.test_id)
if not os.path.exists(self.test_dir):
os.makedirs(self.test_dir)

Expand Down Expand Up @@ -545,7 +545,8 @@ def tearDown(self):
"""Tear down after each test case."""
self.report_timeout()
if self.fault_injection:
self._teardown_errors.extend(self.fault_injection.stop())
self._teardown_errors.extend(
self.fault_injection.stop(self.test_env.agent_user or "root"))
super().tearDown()

def stop_leftover_processes(self, processes, hosts):
Expand Down Expand Up @@ -744,12 +745,6 @@ def setUp(self):
self.__dump_engine_ult_on_failure = self.params.get(
"dump_engine_ult_on_failure", "/run/setup/*", True)

# # Find a configuration that meets the test requirements
# self.config = Configuration(
# self.params, self.hostlist_servers, debug=self.debug)
# if not self.config.set_config(self):
# self.cancel("Test requirements not met!")

# Create host files - In the future this should be the responsibility of
# tests/classes that need a host file and hostfile_clients should not be
# a property of this class.
Expand All @@ -769,8 +764,8 @@ def setUp(self):
hosts.add(self.hostlist_clients)
# Copy the fault injection files to the hosts.
self.fault_injection.copy_fault_files(hosts)
lines = get_file_listing(hosts, self.test_dir).stdout_text.splitlines()
for line in lines:
listing = get_file_listing(hosts, self.test_dir, self.test_env.agent_user or "root")
for line in listing.stdout_text.splitlines():
self.log.debug(" %s", line)

if not self.start_servers_once or self.name.uid == 1:
Expand Down Expand Up @@ -1117,8 +1112,7 @@ def add_server_manager(self, group=None, svr_config_file=None,
group = self.server_group
if svr_config_file is None and self.server_manager_class == "Systemctl":
svr_config_file = get_default_config_file("server")
svr_config_temp = self.get_config_file(
group, "server", self.test_dir)
svr_config_temp = self.get_config_file(group, "server", self.test_dir)
elif svr_config_file is None:
svr_config_file = self.get_config_file(group, "server")
svr_config_temp = None
Expand Down Expand Up @@ -1345,7 +1339,8 @@ def remove_temp_test_dir(self):
"Removing temporary test files in %s from %s",
self.test_dir, str(NodeSet.fromlist(all_hosts)))
result = run_remote(
self.log, all_hosts, command_as_user("rm -fr {}".format(self.test_dir), "root"))
self.log, all_hosts,
command_as_user("rm -fr {}".format(self.test_dir), self.test_env.agent_user or "root"))
if not result.passed:
errors.append("Error removing temporary test files on {}".format(result.failed_hosts))
return errors
Expand Down
Loading