Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aurora/2.6 test branch #14539

Draft
wants to merge 9 commits into
base: release/2.6
Choose a base branch
from
9 changes: 5 additions & 4 deletions src/tests/ftest/deployment/agent_failure.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2022-2024 Intel Corporation.

Check failure on line 2 in src/tests/ftest/deployment/agent_failure.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -122,7 +122,7 @@
# 5. Verify journalctl shows the log that the agent is stopped.
results = get_journalctl(
hosts=self.hostlist_clients, since=since, until=until,
journalctl_type="daos_agent")
journalctl_type="daos_agent", run_user=self.test_env.agent_user)
self.log.info("journalctl results = %s", results)
if "shutting down" not in results[0]["data"]:
msg = "Agent shut down message not found in journalctl! Output = {}".format(
Expand Down Expand Up @@ -209,7 +209,8 @@
since = journalctl_time()
self.log.info("Stopping agent on %s", agent_host_kill)
pattern = self.agent_managers[0].manager.job.command_regex
detected, running = stop_processes(self.log, hosts=agent_host_kill, pattern=pattern)
detected, running = stop_processes(self.log, hosts=agent_host_kill, pattern=pattern,
user=self.agent_managers[0].manager.job.run_user)
if not detected:
msg = "No daos_agent process killed on {}!".format(agent_host_kill)
errors.append(msg)
Expand Down Expand Up @@ -240,7 +241,7 @@
# stopped.
results = get_journalctl(
hosts=[agent_host_kill], since=since, until=until,
journalctl_type="daos_agent")
journalctl_type="daos_agent", run_user=self.test_env.agent_user)
self.log.info("journalctl results (kill) = %s", results)
if "shutting down" not in results[0]["data"]:
msg = ("Agent shut down message not found in journalctl on killed client! "
Expand All @@ -251,7 +252,7 @@
# in the previous step doesn't show that the agent is stopped.
results = get_journalctl(
hosts=[agent_host_keep], since=since, until=until,
journalctl_type="daos_agent")
journalctl_type="daos_agent", run_user=self.test_env.agent_user)
self.log.info("journalctl results (keep) = %s", results)
if "shutting down" in results[0]["data"]:
msg = ("Agent shut down message found in journalctl on keep client! "
Expand Down
74 changes: 63 additions & 11 deletions src/tests/ftest/launch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
(C) Copyright 2018-2024 Intel Corporation.

Check failure on line 3 in src/tests/ftest/launch.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -282,7 +282,8 @@
else:
set_test_environment(
logger, test_env, args.test_servers, args.test_clients, args.provider,
args.insecure_mode, self.details)
args.insecure_mode, self.details, args.agent_user, args.test_log_dir,
args.server_ld_lib)
except TestEnvironmentException as error:
message = f"Error setting up test environment: {str(error)}"
return self.get_exit_status(1, message, "Setup", sys.exc_info())
Expand Down Expand Up @@ -320,12 +321,13 @@
return self.get_exit_status(0, "Listing tests complete")

# Setup the fuse configuration
try:
setup_fuse_config(logger, args.test_servers | args.test_clients)
except LaunchException:
# Warn but don't fail
message = "Issue detected setting up the fuse configuration"
setup_result.warn_test(logger, "Setup", message, sys.exc_info())
if args.fuse_setup:
try:
setup_fuse_config(logger, args.test_servers | args.test_clients)
except LaunchException:
# Warn but don't fail
message = "Issue detected setting up the fuse configuration"
setup_result.warn_test(logger, "Setup", message, sys.exc_info())

# Setup override systemctl files
try:
Expand All @@ -351,15 +353,15 @@
# Determine if bullseye code coverage collection is enabled
code_coverage = CodeCoverage(test_env)
# pylint: disable=unsupported-binary-operation
code_coverage.check(logger, args.test_servers | self.local_host)
# code_coverage.check(logger, args.test_servers | self.local_host)

# Update the test yaml files for the tests in this test group
try:
group.update_test_yaml(
logger, args.scm_size, args.scm_mount, args.extra_yaml,
args.timeout_multiplier, args.override, args.verbose, args.include_localhost)
except (RunException, YamlException) as e:
message = "Error modifying the test yaml files: {}".format(e)
except (RunException, YamlException) as error:
message = f"Error modifying the test yaml files: {str(error)}"
status |= self.get_exit_status(1, message, "Setup", sys.exc_info())
except StorageException:
message = "Error detecting storage information for test yaml files"
Expand Down Expand Up @@ -540,6 +542,12 @@
"-a", "--archive",
action="store_true",
help="archive host log files in the avocado job-results directory")
parser.add_argument(
"-au", "--agent_user",
action="store",
default=None,
type=str,
help="user account to use when running the daos_agent")
parser.add_argument(
"-c", "--clear_mounts",
action="append",
Expand All @@ -562,6 +570,10 @@
"--failfast",
action="store_true",
help="stop the test suite after the first failure")
parser.add_argument(
"-fs", "--fuse_setup",
action="store_true",
help="enable setting up fuse configuration files")
parser.add_argument(
"-i", "--include_localhost",
action="store_true",
Expand All @@ -584,7 +596,7 @@
help="modify the test yaml files but do not run the tests")
parser.add_argument(
"-mo", "--mode",
choices=['normal', 'manual', 'ci'],
choices=['normal', 'manual', 'ci', 'custom_a'],
default='normal',
help="provide the mode of test to be run under. Default is normal, "
"in which the final return code of launch.py is still zero if "
Expand Down Expand Up @@ -649,6 +661,12 @@
"-si", "--slurm_install",
action="store_true",
help="enable installing slurm RPMs if required by the tests")
parser.add_argument(
"-sl", "--server_ld_lib",
action="store",
default=None,
type=str,
help="LD_LIBRARY_PATH environment variable to use in the daos_server config file")
parser.add_argument(
"--scm_mount",
action="store",
Expand Down Expand Up @@ -681,6 +699,12 @@
default=NodeSet(),
help="comma-separated list of hosts to use as replacement values for "
"client placeholders in each test's yaml file")
parser.add_argument(
"-tld", "--test_log_dir",
action="store",
default=None,
type=str,
help="test log directory base path")
parser.add_argument(
"-th", "--logs_threshold",
action="store",
Expand Down Expand Up @@ -744,10 +768,38 @@
args.slurm_install = True
args.slurm_setup = True
args.user_create = True
args.fuse_setup = True
args.clear_mounts.append("/mnt/daos")
args.clear_mounts.append("/mnt/daos0")
args.clear_mounts.append("/mnt/daos1")

elif args.mode == "custom_a":
if args.agent_user is None:
# Run the agent with the current user by default
args.agent_user = getpass.getuser()
if os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir) is None:
# Use a user-specific test log dir by default
args.test_log_dir = os.path.join(
os.sep, "var", "tmp", f"daos_testing_{args.agent_user}")
if os.environ.get("DAOS_TEST_CONTROL_CONFIG") is None:
os.environ["DAOS_TEST_CONTROL_CONFIG"] = os.path.join(
os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir),
"daos_control.yml")
if os.environ.get("DAOS_TEST_AGENT_CONFIG") is None:
os.environ["DAOS_TEST_AGENT_CONFIG"] = os.path.join(
os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir),
"daos_agent.yml")
if os.environ.get("DAOS_TEST_SERVER_CONFIG") is None:
os.environ["DAOS_TEST_SERVER_CONFIG"] = os.path.join(
os.environ.get("DAOS_TEST_LOG_DIR", args.test_log_dir),
"daos_server.yml")
args.process_cores = False
args.logs_threshold = None
args.slurm_install = False
args.slurm_setup = False
args.user_create = False
args.fuse_setup = False

# Setup the Launch object
launch = Launch(args.name, args.mode, args.slurm_install, args.slurm_setup)

Expand Down
5 changes: 4 additions & 1 deletion src/tests/ftest/network/cart_self_test.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""
(C) Copyright 2018-2024 Intel Corporation.

Check failure on line 2 in src/tests/ftest/network/cart_self_test.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
import os

from apricot import TestWithServers
from command_utils import ExecutableCommand
from command_utils_base import EnvironmentVariables, FormattedParameter
Expand Down Expand Up @@ -70,7 +72,8 @@
self.server_managers[0].get_config_value("provider")
self.cart_env["D_INTERFACE"] = \
self.server_managers[0].get_config_value("fabric_iface")
self.cart_env["DAOS_AGENT_DRPC_DIR"] = "/var/run/daos_agent/"
self.cart_env["DAOS_AGENT_DRPC_DIR"] = os.environ.get(
"DAOS_AGENT_DRPC_DIR", "/var/run/daos_agent/")

self.server_managers[0].manager.assign_environment(self.cart_env, True)
self.server_managers[0].detect_start_via_dmg = True
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/harassers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ hosts:
test_servers: 8
# servers if a server partition is defined
# server_partition: daos_server
client_partition: daos_client
# client_partition: daos_client
# client_reservation: daos-test
orterun:
allow_run_as_root: true
Expand Down
2 changes: 1 addition & 1 deletion src/tests/ftest/soak/smoke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ hosts:
test_servers: 4
# servers if a server partition is defined
# server_partition: daos_server
client_partition: daos_client
# client_partition: daos_client
# client_reservation: daos-test
orterun:
allow_run_as_root: true
Expand Down
7 changes: 1 addition & 6 deletions src/tests/ftest/soak/stress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ hosts:
test_servers: 8
# servers if a server partition is defined
# server_partition: daos_server
client_partition: daos_client
# client_partition: daos_client
# client_reservation: daos-test
orterun:
allow_run_as_root: true
Expand Down Expand Up @@ -138,7 +138,6 @@ ior_stress:
mount_dir: "/tmp/soak_dfuse_ior/"
disable_caching: true
thread_count: 8
cores: '0-7'
fio_stress:
api:
- POSIX
Expand Down Expand Up @@ -180,7 +179,6 @@ fio_stress:
mount_dir: "/tmp/soak_dfuse_fio/"
disable_caching: true
thread_count: 8
cores: '0-7'
daos_racer:
runtime: 120
vpic_stress:
Expand Down Expand Up @@ -217,7 +215,6 @@ lammps_stress:
mount_dir: "/tmp/soak_dfuse_lammps/"
disable_caching: true
thread_count: 8
cores: '0-7'
oclass:
- ["EC_2P1GX", "RP_2GX"]
mdtest_stress:
Expand Down Expand Up @@ -258,7 +255,6 @@ mdtest_stress:
mount_dir: "/tmp/soak_dfuse_mdtest/"
disable_caching: true
thread_count: 8
cores: '0-7'
macsio_stress:
job_timeout: 30
nodesperjob:
Expand Down Expand Up @@ -289,7 +285,6 @@ macsio_stress:
mount_dir: "/tmp/soak_dfuse_macsio/"
disable_caching: true
thread_count: 8
cores: '0-7'
datamover_stress:
job_timeout: 10
nodesperjob:
Expand Down
8 changes: 3 additions & 5 deletions src/tests/ftest/util/agent_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2019-2024 Intel Corporation.

Check failure on line 2 in src/tests/ftest/util/agent_utils.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -229,9 +229,8 @@
the hosts using the config_file specification. Defaults to None.
manager (str, optional): the name of the JobManager class used to
manage the YamlCommand defined through the "job" attribute.
Defaults to "Orterun".
outputdir (str, optional): path to avocado test outputdir. Defaults
to None.
Defaults to "Systemctl".
outputdir (str, optional): path to avocado test outputdir. Defaults to None.
"""
agent_command = get_agent_command(
group, cert_dir, bin_dir, config_file, run_user, config_temp)
Expand Down Expand Up @@ -283,8 +282,7 @@
self._hosts, self.manager.command)

# Copy certificates
self.manager.job.copy_certificates(
get_log_file("daosCA/certs"), self._hosts)
self.manager.job.copy_certificates(get_log_file("daosCA/certs"), self._hosts)

# Verify the socket directory exists when using a non-systemctl manager
if self.verify_socket_dir:
Expand Down
43 changes: 26 additions & 17 deletions src/tests/ftest/util/apricot/apricot/test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
(C) Copyright 2020-2024 Intel Corporation.

Check failure on line 2 in src/tests/ftest/util/apricot/apricot/test.py

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -10,6 +10,7 @@
import re
import sys
from ast import literal_eval
from getpass import getuser
from time import time

from agent_utils import DaosAgentManager, include_local_host
Expand Down Expand Up @@ -743,7 +744,7 @@

# Toggle whether to dump server ULT stacks on failure
self.__dump_engine_ult_on_failure = self.params.get(
"dump_engine_ult_on_failure", "/run/setup/*", True)
"dump_engine_ult_on_failure", "/run/setup/*", self.__dump_engine_ult_on_failure)

# # Find a configuration that meets the test requirements
# self.config = Configuration(
Expand Down Expand Up @@ -1082,12 +1083,10 @@
"""
if group is None:
group = self.server_group
if config_file is None and self.agent_manager_class == "Systemctl":

if config_file is None:
config_file = self.test_env.agent_config
config_temp = self.get_config_file(group, "agent", self.test_dir)
elif config_file is None:
config_file = self.get_config_file(group, "agent")
config_temp = None

# Verify the correct configuration files have been provided
if self.agent_manager_class == "Systemctl" and config_temp is None:
Expand All @@ -1096,10 +1095,12 @@
"file provided for the Systemctl manager class!")

# Define the location of the certificates
if self.agent_manager_class == "Systemctl":
if self.agent_manager_class == "Systemctl" and self.test_env.agent_user != getuser():
# Default directory requiring privileged access
cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
else:
cert_dir = self.workdir
# Test-specific directory not requiring privileged access
cert_dir = os.path.join(self.test_dir, "certs")

self.agent_managers.append(
DaosAgentManager(
Expand Down Expand Up @@ -1132,33 +1133,41 @@
"""
if group is None:
group = self.server_group

# Set default server config files
if svr_config_file is None and self.server_manager_class == "Systemctl":
svr_config_file = self.test_env.server_config
svr_config_temp = self.get_config_file(
group, "server", self.test_dir)
elif svr_config_file is None:
svr_config_file = self.get_config_file(group, "server")
svr_config_temp = None
if dmg_config_file is None and self.server_manager_class == "Systemctl":
dmg_config_file = self.test_env.control_config
dmg_config_temp = self.get_config_file(group, "dmg", self.test_dir)
elif dmg_config_file is None:
dmg_config_file = self.get_config_file(group, "dmg")
dmg_config_temp = None

# Verify the correct configuration files have been provided
if self.server_manager_class == "Systemctl" and svr_config_temp is None:
self.fail(
"Error adding a DaosServerManager: no temporary configuration "
"file provided for the Systemctl manager class!")

# Define the location of the certificates
# Set default dmg config files
if dmg_config_file is None:
if self.server_manager_class == "Systemctl":
dmg_config_file = self.test_env.control_config
dmg_config_temp = self.get_config_file(group, "dmg", self.test_dir)
else:
dmg_config_file = os.path.join(self.test_dir, "daos_control.yml")

# Define server certificate directory
if self.server_manager_class == "Systemctl":
svr_cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
dmg_cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
else:
svr_cert_dir = self.workdir
dmg_cert_dir = self.workdir

# Define dmg certificate directory
if self.server_manager_class == "Systemctl" and self.test_env.agent_user != getuser():
dmg_cert_dir = os.path.join(os.sep, "etc", "daos", "certs")
else:
dmg_cert_dir = os.path.join(self.test_dir, "certs")

self.server_managers.append(
DaosServerManager(
Expand Down Expand Up @@ -1683,7 +1692,7 @@

dmg_cmd = get_dmg_command(
self.server_group, dmg_cert_dir, self.bin, dmg_config_file,
dmg_config_temp, self.access_points_suffix)
dmg_config_temp, self.access_points_suffix, getuser())
dmg_cmd.hostlist = self.access_points
return dmg_cmd

Expand Down
Loading
Loading