Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace run_somd.sh with Pydantic configuration class #25

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,15 @@ python -m pip install --no-deps .
- Activate your a3fe conda environment
- Create a base directory for the calculation and create an directory called `input` within this
- Move your input files into the the input directory. For example, if you have parameterised AMBER-format input files, name these bound_param.rst7, bound_param.prm7, free_param.rst7, and free_param.prm7. For more details see the documentation. Alternatively, copy the example input files from a3fe/a3fe/data/example_run_dir to your input directory.
- Copy run somd.sh and template_config.sh from a3fe/a3fe/data/example_run_dir to your `input` directory, making sure to the SLURM options in run_somd.sh so that the jobs will run on your cluster
- Copy run template_config.cfg from a3fe/a3fe/data/example_run_dir to your `input` directory.
- In the calculation base directory, run the following python code, either through ipython or as a python script (you will likely want to run the script with `nohup`or use ipython through tmux to ensure that the calculation is not killed when you lose connection)

```python
import a3fe as a3
calc = a3.Calculation(ensemble_size=5)
calc = a3.Calculation(
ensemble_size=5, # Use 5 (independently equilibrated) replicate runs
slurm_config=a3.SlurmConfig(partition="<desired partition>"), # Set your desired partition!
)
calc.setup()
calc.get_optimal_lam_vals()
calc.run(adaptive=False, runtime = 5) # Run non-adaptively for 5 ns per replicate
Expand Down
2 changes: 1 addition & 1 deletion a3fe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
enums,
)

from .configuration import SystemPreparationConfig
from .configuration import SystemPreparationConfig, SlurmConfig

_sys.modules["EnsEquil"] = _sys.modules["a3fe"]

Expand Down
74 changes: 33 additions & 41 deletions a3fe/analyse/mbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import numpy as _np

from ..read._process_slurm_files import get_slurm_file_base as _get_slurm_file_base
from ..read._process_somd_files import read_mbar_gradients as _read_mbar_gradients
from ..read._process_somd_files import read_mbar_result as _read_mbar_result
from ..read._process_somd_files import (
Expand All @@ -24,6 +23,8 @@
from ..run._virtual_queue import Job as _Job
from ..run._virtual_queue import VirtualQueue as _VirtualQueue

from ..configuration import SlurmConfig as _SlurmConfig


def run_mbar(
output_dir: str,
Expand Down Expand Up @@ -127,8 +128,8 @@ def run_mbar(
def submit_mbar_slurm(
output_dir: str,
virtual_queue: _VirtualQueue,
slurm_config: _SlurmConfig,
run_nos: _List[int],
run_somd_dir: str,
percentage_end: float = 100,
percentage_start: float = 0,
subsampling: bool = False,
Expand All @@ -144,11 +145,10 @@ def submit_mbar_slurm(
The path to the output directory
virtual_queue : VirtualQueue
The virtual queue to submit the MBAR jobs to.
slurm_config: SlurmConfig
The SLURM configuration to use for the jobs.
run_nos : List[int]
The run numbers to use for MBAR.
run_somd_dir : str
The directory in which to find the `run_somd.sh` script, from
which the slurm header will be copied.
percentage_end : float, Optional, default: 100
The percentage of data after which to truncate the datafiles.
For example, if 100, the full datafile will be used. If 50, only
Expand All @@ -175,35 +175,29 @@ def submit_mbar_slurm(
The paths to the MBAR output files, which will be created
once the jobs complete.

tmp_simfiles : List[str]
The paths to the temporary truncated simfiles, so that they can be
cleaned up later.
tmp_files : List[str]
The paths to temporary files (truncated simfiles and submission scripts),
so that they can be cleaned up later.
"""
tmp_simfiles = _prepare_simfiles(
tmp_files = _prepare_simfiles(
output_dir=output_dir,
run_nos=run_nos,
percentage_end=percentage_end,
percentage_start=percentage_start,
equilibrated=equilibrated,
)

# Read the slurm header
# Get the header from run_somd.sh
header_lines = []
with open(f"{run_somd_dir}/run_somd.sh", "r") as file:
for line in file.readlines():
if line.startswith("#SBATCH") or line.startswith("#!/bin/bash"):
header_lines.append(line)
else:
break

# Add MBAR command and run for each run.
mbar_out_files = []
jobs = []

for run_no in run_nos:
# Get the name of the output file
# Get the name of the output files - the first for the MBAR output, and the second for the SLURM output
outfile = f"{output_dir}/freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start.dat"
mbar_out_files.append(outfile)
slurm_outfile = f"slurm_freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start.out"
slurm_config.output = slurm_outfile

# Create the command.
cmd_list = [
"analyse_freenrg",
Expand All @@ -219,21 +213,19 @@ def submit_mbar_slurm(
if subsampling:
cmd_list.append("--subsampling")
slurm_cmd = " ".join(cmd_list)
slurm_lines = header_lines + [slurm_cmd]
# Write the slurm file
slurm_file = f"{output_dir}/freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start.sh"
with open(slurm_file, "w") as file:
file.writelines(slurm_lines)

# Submit to the virtual queue
cmd_list = [
"--chdir",
f"{output_dir}",
f"{slurm_file}",
] # The virtual queue adds sbatch
slurm_file_base = _get_slurm_file_base(slurm_file)
job = virtual_queue.submit(cmd_list, slurm_file_base=slurm_file_base)
# Update the virtual queue to submit the job
# Create and submit the job
script_name = f"{output_dir}/freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start"
submission_args = slurm_config.get_submission_cmds(
slurm_cmd, output_dir, script_name
)
job = virtual_queue.submit(
submission_args,
slurm_file_base=slurm_config.get_slurm_output_file_base(output_dir),
)
tmp_files += [script_name + ".sh", _os.path.join(output_dir, slurm_outfile)]

# Update the virtual queue to submit the job to the real queue
virtual_queue.update()
jobs.append(job)

Expand All @@ -244,7 +236,7 @@ def submit_mbar_slurm(
_sleep(30)
virtual_queue.update()

return jobs, mbar_out_files, tmp_simfiles
return jobs, mbar_out_files, tmp_files


def collect_mbar_slurm(
Expand All @@ -254,7 +246,7 @@ def collect_mbar_slurm(
mbar_out_files: _List[str],
virtual_queue: _VirtualQueue,
delete_outfiles: bool = False,
tmp_simfiles: _List[str] = [],
tmp_files: _List[str] = [],
) -> _Tuple[_np.ndarray, _np.ndarray, _List[str], _Dict[str, _Dict[str, _np.ndarray]]]:
"""
Collect the results from MBAR slurm jobs.
Expand All @@ -274,9 +266,9 @@ def collect_mbar_slurm(
delete_outfiles : bool, Optional, default: False
Whether to delete the MBAR analysis output files after the free
energy change and errors have been extracted.
tmp_simfiles : List[str], Optional, default: []
The paths to the temporary truncated simfiles, so that they can be
cleaned up later.
tmp_files : List[str], Optional, default: []
The paths to temporary files (truncated simfiles and submission scripts),
so that they can be cleaned up later.

Returns
-------
Expand Down Expand Up @@ -324,8 +316,8 @@ def collect_mbar_slurm(
mbar_out_files = []

# Clean up temporary simfiles
for tmp_simfile in tmp_simfiles:
_subprocess.run(["rm", tmp_simfile])
for tmp_file in tmp_files:
_subprocess.run(["rm", tmp_file])

return free_energies, errors, mbar_out_files, mbar_grads

Expand Down
2 changes: 1 addition & 1 deletion a3fe/analyse/process_grads.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,8 +750,8 @@ def get_time_series_multiwindow_mbar(
_submit_mbar_slurm(
output_dir=output_dir,
virtual_queue=lambda_windows[0].virtual_queue,
slurm_config=lambda_windows[0].analysis_slurm_config,
run_nos=run_nos,
run_somd_dir=lambda_windows[0].input_dir,
percentage_end=end_frac * 100,
percentage_start=start_frac * 100,
subsampling=False,
Expand Down
1 change: 1 addition & 0 deletions a3fe/configuration/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Pydantic configuration classes for the a3fe package."""

from .system_prep_config import SystemPreparationConfig
from .slurm_config import SlurmConfig
149 changes: 149 additions & 0 deletions a3fe/configuration/slurm_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""Configuration classes for SLURM configuration."""

__all__ = [
"SlurmConfig",
]

import yaml as _yaml
import subprocess as _subprocess
import re as _re

from pydantic import BaseModel as _BaseModel
from pydantic import Field as _Field
from pydantic import ConfigDict as _ConfigDict

import os as _os

from typing import List as _List, Dict as _Dict


class SlurmConfig(_BaseModel):
"""
Pydantic model for holding a SLURM configuration.
"""

partition: str = _Field("default", description="SLURM partition to submit to.")
time: str = _Field("24:00:00", description="Time limit for the SLURM job.")
gres: str = _Field("gpu:1", description="Resources to request - normally one GPU.")
nodes: int = _Field(1, ge=1)
ntasks_per_node: int = _Field(1, ge=1)
output: str = _Field(
"slurm-%A.%a.out", description="Output file for the SLURM job."
)
extra_options: _Dict[str, str] = _Field(
{}, description="Extra options to pass to SLURM. For example, {'account': 'qt'}"
)

model_config = _ConfigDict(validate_assignment=True)

def get_submission_cmds(
self, cmd: str, run_dir: str, script_name: str = "a3fe"
) -> _List[str]:
"""
Generates the SLURM submission commands list based on the configuration.

Parameters
----------
cmd : str
Command to run during the SLURM job.

run_dir : str
Directory to submit the SLURM job from.

script_name : str, optional, default="a3fe"
Name of the script file to write. Note that when running many jobs from the
same directory, this should be unique to avoid overwriting the script file.

Returns
-------
List[str]
The list of SLURM arguments.
"""
# First, write the script to a file
script_path = _os.path.join(run_dir, f"{script_name}.sh")

script = (
"#!/bin/bash\n"
f"#SBATCH --partition={self.partition}\n"
f"#SBATCH --time={self.time}\n"
f"#SBATCH --gres={self.gres}\n"
f"#SBATCH --nodes={self.nodes}\n"
f"#SBATCH --ntasks-per-node={self.ntasks_per_node}\n"
f"#SBATCH --output={self.output}\n"
)

for key, value in self.extra_options.items():
script += f"#SBATCH --{key}={value}\n"

script += f"\n{cmd}\n"

with open(script_path, "w") as f:
f.write(script)

return ["rbatch", f"--chdir={run_dir}", script_path]

def get_slurm_output_file_base(self, run_dir: str) -> str:
"""
Get the base name of the SLURM output file.

Parameters
----------
run_dir : str
Directory the job was submitted from.

Returns
-------
str
The base name of the SLURM output file.
"""
return run_dir + "/" + self.output.split("%")[0]

@classmethod
def get_default_partition(cls) -> "str":
"""Get the default SLURM partition."""
sinfo = _subprocess.run(
["sinfo", "-o", "%P", "-h"], stdout=_subprocess.PIPE, text=True
)
# Search for the default queue (marked with "*", then throw away the "*")
return _re.search(r"([^\s]+)(?=\*)", sinfo.stdout).group(1)

def dump(self, save_dir: str) -> None:
"""
Dumps the configuration to a YAML file.

Parameters
----------
save_dir : str
Directory to save the YAML file to.
"""
model_dict = self.model_dump()

save_path = save_dir + "/" + self.get_file_name()
with open(save_path, "w") as f:
_yaml.dump(model_dict, f, default_flow_style=False)

@classmethod
def load(cls, load_dir: str) -> "SlurmConfig":
"""
Loads the configuration from a YAML file.

Parameters
----------
load_dir : str
Directory to load the YAML file from.

Returns
-------
SlurmConfig
The loaded configuration.
"""
with open(load_dir + "/" + cls.get_file_name(), "r") as f:
model_dict = _yaml.safe_load(f)
return cls(**model_dict)

@staticmethod
def get_file_name() -> str:
"""
Get the name of the SLURM configuration file.
"""
return "slurm_config.yaml"
16 changes: 0 additions & 16 deletions a3fe/data/alternative_input/alternative_run_somd.sh

This file was deleted.

This file was deleted.

Loading