Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16834 test: Support testing MD on SSD Phase 2 #15767

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/tests/ftest/ior/small.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
hosts:
test_servers: 2
test_clients: 2

timeout: 700

server_config:
name: daos_server
engines_per_host: 2
Expand All @@ -23,19 +25,29 @@ server_config:
storage: auto
transport_config:
allow_insecure: true

agent_config:
transport_config:
allow_insecure: true

dmg:
transport_config:
allow_insecure: true
pool:
scm_size: 3000000000
nvme_size: 9000000000

pool: !mux
default:
size: 50G
!filter-only : /run/launch/nvme/default # yamllint disable-line rule:colons
md_on_ssd_p2:
size: 50G
mem_ratio: 50
!filter-only : /run/launch/nvme/md_on_ssd_p2 # yamllint disable-line rule:colons

container:
type: POSIX
properties: cksum:crc16,cksum_size:16384,srv_cksum:on
control_method: daos

ior:
env_vars:
- D_IL_REPORT=1
Expand All @@ -62,7 +74,9 @@ ior:
obj_class:
- SX
- RP_2GX

dfuse:
disable_caching: true

hdf5_vol:
plugin_path: /usr/lib64/mpich/lib
20 changes: 17 additions & 3 deletions src/tests/ftest/mdtest/small.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
hosts:
test_servers: 2
test_clients: 2

timeout: 360

server_config:
name: daos_server
engines_per_host: 2
Expand All @@ -23,19 +25,31 @@ server_config:
storage: auto
transport_config:
allow_insecure: True

agent_config:
transport_config:
allow_insecure: True

dmg:
transport_config:
allow_insecure: True
pool:
size: 50G
svcn: 1

pool: !mux
default:
size: 50G
svcn: 1
!filter-only : /run/launch/nvme/default # yamllint disable-line rule:colons
md_on_ssd_p2:
size: 50G
svcn: 1
mem_ratio: 50
!filter-only : /run/launch/nvme/md_on_ssd_p2 # yamllint disable-line rule:colons

container:
type: POSIX
properties: cksum:crc16,cksum_size:16384,srv_cksum:on
control_method: daos

mdtest:
client_processes:
ppn: 32
Expand Down
8 changes: 6 additions & 2 deletions src/tests/ftest/util/dmg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,8 @@ def support_collect_log(self, stop_on_error=None, target_folder=None, archive=No

def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None,
target_list=None, svcn=None, acl_file=None, size=None,
tier_ratio=None, properties=None, label=None, nranks=None):
tier_ratio=None, properties=None, label=None, nranks=None,
mem_ratio=None):
# pylint: disable=too-many-arguments
"""Create a pool with the dmg command.

Expand All @@ -563,6 +564,8 @@ def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None,
Defaults to None
label (str, optional): Pool label. Defaults to None.
nranks (str, optional): Number of ranks to use. Defaults to None
mem_ratio (str, optional): memory file to metadata storage size ratio.
Defaults to None.

Raises:
CommandFailure: if the 'dmg pool create' command fails and
Expand All @@ -584,7 +587,8 @@ def pool_create(self, scm_size, uid=None, gid=None, nvme_size=None,
"acl_file": acl_file,
"properties": properties,
"label": label,
"nranks": nranks
"nranks": nranks,
"mem_ratio": mem_ratio
}

if target_list is not None:
Expand Down
1 change: 1 addition & 0 deletions src/tests/ftest/util/dmg_utils_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ def __init__(self):
self.sys = FormattedParameter("--sys={}", None)
self.properties = FormattedParameter("--properties={}", None)
self.nranks = FormattedParameter("--nranks={}", None)
self.mem_ratio = FormattedParameter("--mem-ratio={}", None)

class DeleteAclSubCommand(CommandWithParameters):
"""Defines an object for the dmg pool delete-acl command."""
Expand Down
31 changes: 30 additions & 1 deletion src/tests/ftest/util/launch_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2022-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -25,7 +26,7 @@
from util.storage_utils import StorageException, StorageInfo
from util.systemctl_utils import SystemctlFailure, create_override_config
from util.user_utils import get_group_id, get_user_groups, groupadd, useradd, userdel
from util.yaml_utils import YamlUpdater, get_yaml_data
from util.yaml_utils import YamlUpdater, get_yaml_data, write_yaml_file

D_TM_SHARED_MEMORY_KEY = 0x10242048

Expand Down Expand Up @@ -1071,6 +1072,9 @@
logger, storage_info, self._yaml_directory, tier_0_type, scm_size, scm_mount,
max_nvme_tiers, control_metadata)

# Generate launch parameter branch extra yaml file
self._add_launch_param_yaml(logger, self._yaml_directory)

# Replace any placeholders in the test yaml file
for test in self.tests:
new_yaml_file = updater.update(test.yaml_file, self._yaml_directory)
Expand All @@ -1097,6 +1101,7 @@
"""Add extra storage yaml definitions for tests requesting automatic storage configurations.

Args:
logger (Logger): logger for the messages produced by this method
storage_info (StorageInfo): the collected storage information from the hosts
yaml_dir (str): path in which to create the extra storage yaml files
tier_0_type (str): storage tier 0 type to define; 'pmem' or 'ram'
Expand Down Expand Up @@ -1137,6 +1142,30 @@
# Allow extra yaml files to be to override the generated storage yaml
test.extra_yaml.insert(0, engine_storage_yaml[engines])

def _add_launch_param_yaml(self, logger, yaml_dir):
"""Add extra yaml for multiplexation launch parameter branches.

Check warning on line 1146 in src/tests/ftest/util/launch_utils.py

View workflow job for this annotation

GitHub Actions / Pylint check

wrong-spelling-in-docstring, Wrong spelling of a word 'multiplexation' in a docstring:

Args:
logger (Logger): logger for the messages produced by this method
yaml_dir (str): path in which to create the extra storage yaml files

Raises:
YamlException: if there was an error writing the yaml file
"""
yaml_file = os.path.join(yaml_dir, "extra_yaml_launch_params.yaml")
lines = ['launch:']
lines.append(' nvme: !mux')
labels = ['default']
if self._nvme.startswith("auto_md_on_ssd"):
labels.append('md_on_ssd_p2')
for label in labels:
lines.append(f' {label}:')
lines.append(' on: true')
write_yaml_file(logger, yaml_file, lines)
Comment on lines +1155 to +1164
Copy link
Contributor

@daltonbohning daltonbohning Mar 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this mean that when we use "auto_md_on_ssd", EVERY test has its variants doubled? And then specifically ior and mdtest small run a further additional variant?
For this to work as intended I think we should not use nvme: !mux

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then specifically ior and mdtest small run a further additional variant

Correction, they still have their variants doubled. But the mux in their configs filters out variants so in total it's still doubled.

But this means other tests will have their variants doubled by the extra yaml, and they will not filter them out

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Running a test other than ior/mdtest small in the md on ssd stage would highlight this

Copy link
Contributor Author

@phender phender Mar 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. We should only add the extra_yaml_launch_params.yaml (with the mux) if we detect it being used as a filter, e.g. search the test yaml file for !filter-only : /run/launch/nvme. I'll also add running an additional test on the next push.


for test in self.tests:
test.extra_yaml.insert(0, yaml_file)

def setup_slurm(self, logger, setup, install, user, result):
"""Set up slurm on the hosts if any tests are using partitions.

Expand Down
12 changes: 4 additions & 8 deletions src/tests/ftest/util/storage_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2022-2023 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand All @@ -14,6 +15,7 @@
from ClusterShell.NodeSet import NodeSet
# pylint: disable=import-error,no-name-in-module
from util.run_utils import run_remote
from util.yaml_utils import write_yaml_file


def find_pci_address(value, *flags):
Expand Down Expand Up @@ -648,6 +650,7 @@ def write_storage_yaml(self, yaml_file, engines, tier_0_type, scm_size=0,

Raises:
StorageException: if an invalid storage type was specified
YamlException: if there was an error writing the yaml file

"""
tiers = 1
Expand Down Expand Up @@ -730,14 +733,7 @@ def write_storage_yaml(self, yaml_file, engines, tier_0_type, scm_size=0,
lines.append(
f' bdev_roles: [{", ".join(get_tier_roles(tier, tiers))}]')

self._log.debug(' Creating %s', yaml_file)
for line in lines:
self._log.debug(' %s', line)
try:
with open(yaml_file, "w", encoding="utf-8") as config_handle:
config_handle.writelines(f'{line}\n' for line in lines)
except IOError as error:
self._raise_error(f"Error writing avocado config file {yaml_file}", error)
write_yaml_file(self._log, yaml_file, lines)

@staticmethod
def _get_numa_devices(devices):
Expand Down
2 changes: 2 additions & 0 deletions src/tests/ftest/util/test_utils_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def __init__(self, context, dmg_command, label_generator=None, namespace=POOL_NA
self.nranks = BasicParameter(None)
self.size = BasicParameter(None)
self.tier_ratio = BasicParameter(None)
self.mem_ratio = BasicParameter(None)
self.scm_size = BasicParameter(None)
self.nvme_size = BasicParameter(None)
self.prop_name = BasicParameter(None) # name of property to be set
Expand Down Expand Up @@ -404,6 +405,7 @@ def create(self):
"gid": self.gid,
"size": self.size.value,
"tier_ratio": self.tier_ratio.value,
"mem_ratio": self.mem_ratio.value,
"scm_size": self.scm_size.value,
"nranks": self.nranks.value,
"properties": self.properties.value,
Expand Down
24 changes: 24 additions & 0 deletions src/tests/ftest/util/yaml_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
(C) Copyright 2020-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP

SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -70,6 +71,29 @@ def ignore_unknown(self, node): # pylint: disable=no-self-use,unused-argument
raise YamlException(f"Error reading {yaml_file}") from error


def write_yaml_file(logger, yaml_file, lines):
"""Write the contents to a yaml file.

Args:
logger (logger): logger for the messages produced by this method
yaml_file (str): yaml file to write
lines (list): yaml file contents

Raises:
YamlException: if there is an error writing the yaml file.
"""
logger.debug(' Creating %s', yaml_file)
for line in lines:
logger.debug(' %s', line)
try:
with open(yaml_file, "w", encoding="utf-8") as yaml_handle:
yaml_handle.writelines(f'{line}\n' for line in lines)
except IOError as error:
message = f"Error writing yaml file {yaml_file}"
logger.error(message)
raise YamlException(message) from error


class YamlUpdater():
"""A class for updating placeholders in test yaml files."""

Expand Down
Loading