michellab · Roy-Haolin-Du · Jan 5, 2025 · Jan 4, 2025 · Jan 4, 2025
diff --git a/README.md b/README.md
@@ -33,12 +33,15 @@ python -m pip install --no-deps .
 - Activate your a3fe conda environment 
 - Create a base directory for the calculation and create an directory called `input` within this
 - Move your input files into the the input directory. For example, if you have parameterised AMBER-format input files, name these bound_param.rst7, bound_param.prm7, free_param.rst7, and free_param.prm7. For more details see the documentation. Alternatively, copy the example input files from a3fe/a3fe/data/example_run_dir to your input directory.
-- Copy run somd.sh and template_config.sh from a3fe/a3fe/data/example_run_dir to your `input` directory, making sure to the SLURM options in run_somd.sh so that the jobs will run on your cluster
+- Copy run template_config.cfg from a3fe/a3fe/data/example_run_dir to your `input` directory.
 - In the calculation base directory, run the following python code, either through ipython or as a python script (you will likely want to run the script with `nohup`or use ipython through tmux to ensure that the calculation is not killed when you lose connection)
 
 ```python
 import a3fe as a3 
-calc = a3.Calculation(ensemble_size=5)
+calc = a3.Calculation(
+    ensemble_size=5, # Use 5 (independently equilibrated) replicate runs
+    slurm_config=a3.SlurmConfig(partition="<desired partition>"),  # Set your desired partition!
+)
 calc.setup()
 calc.get_optimal_lam_vals()
 calc.run(adaptive=False, runtime = 5) # Run non-adaptively for 5 ns per replicate

diff --git a/a3fe/__init__.py b/a3fe/__init__.py
@@ -27,7 +27,7 @@
     enums,
 )
 
-from .configuration import SystemPreparationConfig
+from .configuration import SystemPreparationConfig, SlurmConfig
 
 _sys.modules["EnsEquil"] = _sys.modules["a3fe"]
 

diff --git a/a3fe/analyse/mbar.py b/a3fe/analyse/mbar.py
@@ -15,7 +15,6 @@
 
 import numpy as _np
 
-from ..read._process_slurm_files import get_slurm_file_base as _get_slurm_file_base
 from ..read._process_somd_files import read_mbar_gradients as _read_mbar_gradients
 from ..read._process_somd_files import read_mbar_result as _read_mbar_result
 from ..read._process_somd_files import (
@@ -24,6 +23,8 @@
 from ..run._virtual_queue import Job as _Job
 from ..run._virtual_queue import VirtualQueue as _VirtualQueue
 
+from ..configuration import SlurmConfig as _SlurmConfig
+
 
 def run_mbar(
     output_dir: str,
@@ -127,8 +128,8 @@ def run_mbar(
 def submit_mbar_slurm(
     output_dir: str,
     virtual_queue: _VirtualQueue,
+    slurm_config: _SlurmConfig,
     run_nos: _List[int],
-    run_somd_dir: str,
     percentage_end: float = 100,
     percentage_start: float = 0,
     subsampling: bool = False,
@@ -144,11 +145,10 @@ def submit_mbar_slurm(
         The path to the output directory
     virtual_queue : VirtualQueue
         The virtual queue to submit the MBAR jobs to.
+    slurm_config: SlurmConfig
+        The SLURM configuration to use for the jobs.
     run_nos : List[int]
         The run numbers to use for MBAR.
-    run_somd_dir : str
-        The directory in which to find the `run_somd.sh` script, from
-        which the slurm header will be copied.
     percentage_end : float, Optional, default: 100
         The percentage of data after which to truncate the datafiles.
         For example, if 100, the full datafile will be used. If 50, only
@@ -175,35 +175,29 @@ def submit_mbar_slurm(
         The paths to the MBAR output files, which will be created
         once the jobs complete.
 
-    tmp_simfiles : List[str]
-        The paths to the temporary truncated simfiles, so that they can be
-        cleaned up later.
+    tmp_files : List[str]
+        The paths to temporary files (truncated simfiles and submission scripts),
+        so that they can be cleaned up later.
     """
-    tmp_simfiles = _prepare_simfiles(
+    tmp_files = _prepare_simfiles(
         output_dir=output_dir,
         run_nos=run_nos,
         percentage_end=percentage_end,
         percentage_start=percentage_start,
         equilibrated=equilibrated,
     )
 
-    # Read the slurm header
-    # Get the header from run_somd.sh
-    header_lines = []
-    with open(f"{run_somd_dir}/run_somd.sh", "r") as file:
-        for line in file.readlines():
-            if line.startswith("#SBATCH") or line.startswith("#!/bin/bash"):
-                header_lines.append(line)
-            else:
-                break
-
     # Add MBAR command and run for each run.
     mbar_out_files = []
     jobs = []
+
     for run_no in run_nos:
-        # Get the name of the output file
+        # Get the name of the output files - the first for the MBAR output, and the second for the SLURM output
         outfile = f"{output_dir}/freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start.dat"
         mbar_out_files.append(outfile)
+        slurm_outfile = f"slurm_freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start.out"
+        slurm_config.output = slurm_outfile
+
         # Create the command.
         cmd_list = [
             "analyse_freenrg",
@@ -219,21 +213,19 @@ def submit_mbar_slurm(
         if subsampling:
             cmd_list.append("--subsampling")
         slurm_cmd = " ".join(cmd_list)
-        slurm_lines = header_lines + [slurm_cmd]
-        # Write the slurm file
-        slurm_file = f"{output_dir}/freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start.sh"
-        with open(slurm_file, "w") as file:
-            file.writelines(slurm_lines)
 
-        # Submit to the virtual queue
-        cmd_list = [
-            "--chdir",
-            f"{output_dir}",
-            f"{slurm_file}",
-        ]  # The virtual queue adds sbatch
-        slurm_file_base = _get_slurm_file_base(slurm_file)
-        job = virtual_queue.submit(cmd_list, slurm_file_base=slurm_file_base)
-        # Update the virtual queue to submit the job
+        # Create and submit the job
+        script_name = f"{output_dir}/freenrg-MBAR-run_{str(run_no).zfill(2)}_{round(percentage_end, 3)}_end_{round(percentage_start, 3)}_start"
+        submission_args = slurm_config.get_submission_cmds(
+            slurm_cmd, output_dir, script_name
+        )
+        job = virtual_queue.submit(
+            submission_args,
+            slurm_file_base=slurm_config.get_slurm_output_file_base(output_dir),
+        )
+        tmp_files += [script_name + ".sh", _os.path.join(output_dir, slurm_outfile)]
+
+        # Update the virtual queue to submit the job to the real queue
         virtual_queue.update()
         jobs.append(job)
 
@@ -244,7 +236,7 @@ def submit_mbar_slurm(
                 _sleep(30)
                 virtual_queue.update()
 
-    return jobs, mbar_out_files, tmp_simfiles
+    return jobs, mbar_out_files, tmp_files
 
 
 def collect_mbar_slurm(
@@ -254,7 +246,7 @@ def collect_mbar_slurm(
     mbar_out_files: _List[str],
     virtual_queue: _VirtualQueue,
     delete_outfiles: bool = False,
-    tmp_simfiles: _List[str] = [],
+    tmp_files: _List[str] = [],
 ) -> _Tuple[_np.ndarray, _np.ndarray, _List[str], _Dict[str, _Dict[str, _np.ndarray]]]:
     """
     Collect the results from MBAR slurm jobs.
@@ -274,9 +266,9 @@ def collect_mbar_slurm(
     delete_outfiles : bool, Optional, default: False
         Whether to delete the MBAR analysis output files after the free
         energy change and errors have been extracted.
-    tmp_simfiles : List[str], Optional, default: []
-        The paths to the temporary truncated simfiles, so that they can be
-        cleaned up later.
+    tmp_files : List[str], Optional, default: []
+        The paths to temporary files (truncated simfiles and submission scripts),
+        so that they can be cleaned up later.
 
     Returns
     -------
@@ -324,8 +316,8 @@ def collect_mbar_slurm(
         mbar_out_files = []
 
     # Clean up temporary simfiles
-    for tmp_simfile in tmp_simfiles:
-        _subprocess.run(["rm", tmp_simfile])
+    for tmp_file in tmp_files:
+        _subprocess.run(["rm", tmp_file])
 
     return free_energies, errors, mbar_out_files, mbar_grads
 

diff --git a/a3fe/analyse/process_grads.py b/a3fe/analyse/process_grads.py
@@ -750,8 +750,8 @@ def get_time_series_multiwindow_mbar(
                 _submit_mbar_slurm(
                     output_dir=output_dir,
                     virtual_queue=lambda_windows[0].virtual_queue,
+                    slurm_config=lambda_windows[0].analysis_slurm_config,
                     run_nos=run_nos,
-                    run_somd_dir=lambda_windows[0].input_dir,
                     percentage_end=end_frac * 100,
                     percentage_start=start_frac * 100,
                     subsampling=False,

diff --git a/a3fe/configuration/__init__.py b/a3fe/configuration/__init__.py
@@ -1,3 +1,4 @@
 """Pydantic configuration classes for the a3fe package."""
 
 from .system_prep_config import SystemPreparationConfig
+from .slurm_config import SlurmConfig
diff --git a/a3fe/configuration/slurm_config.py b/a3fe/configuration/slurm_config.py
@@ -0,0 +1,149 @@
+"""Configuration classes for SLURM configuration."""
+
+__all__ = [
+    "SlurmConfig",
+]
+
+import yaml as _yaml
+import subprocess as _subprocess
+import re as _re
+
+from pydantic import BaseModel as _BaseModel
+from pydantic import Field as _Field
+from pydantic import ConfigDict as _ConfigDict
+
+import os as _os
+
+from typing import List as _List, Dict as _Dict
+
+
+class SlurmConfig(_BaseModel):
+    """
+    Pydantic model for holding a SLURM configuration.
+    """
+
+    partition: str = _Field("default", description="SLURM partition to submit to.")
+    time: str = _Field("24:00:00", description="Time limit for the SLURM job.")
+    gres: str = _Field("gpu:1", description="Resources to request - normally one GPU.")
+    nodes: int = _Field(1, ge=1)
+    ntasks_per_node: int = _Field(1, ge=1)
+    output: str = _Field(
+        "slurm-%A.%a.out", description="Output file for the SLURM job."
+    )
+    extra_options: _Dict[str, str] = _Field(
+        {}, description="Extra options to pass to SLURM. For example, {'account': 'qt'}"
+    )
+
+    model_config = _ConfigDict(validate_assignment=True)
+
+    def get_submission_cmds(
+        self, cmd: str, run_dir: str, script_name: str = "a3fe"
+    ) -> _List[str]:
+        """
+        Generates the SLURM submission commands list based on the configuration.
+
+        Parameters
+        ----------
+        cmd : str
+            Command to run during the SLURM job.
+
+        run_dir : str
+            Directory to submit the SLURM job from.
+
+        script_name : str, optional, default="a3fe"
+            Name of the script file to write. Note that when running many jobs from the
+            same directory, this should be unique to avoid overwriting the script file.
+
+        Returns
+        -------
+        List[str]
+            The list of SLURM arguments.
+        """
+        # First, write the script to a file
+        script_path = _os.path.join(run_dir, f"{script_name}.sh")
+
+        script = (
+            "#!/bin/bash\n"
+            f"#SBATCH --partition={self.partition}\n"
+            f"#SBATCH --time={self.time}\n"
+            f"#SBATCH --gres={self.gres}\n"
+            f"#SBATCH --nodes={self.nodes}\n"
+            f"#SBATCH --ntasks-per-node={self.ntasks_per_node}\n"
+            f"#SBATCH --output={self.output}\n"
+        )
+
+        for key, value in self.extra_options.items():
+            script += f"#SBATCH --{key}={value}\n"
+
+        script += f"\n{cmd}\n"
+
+        with open(script_path, "w") as f:
+            f.write(script)
+
+        return ["rbatch", f"--chdir={run_dir}", script_path]
+
+    def get_slurm_output_file_base(self, run_dir: str) -> str:
+        """
+        Get the base name of the SLURM output file.
+
+        Parameters
+        ----------
+        run_dir : str
+            Directory the job was submitted from.
+
+        Returns
+        -------
+        str
+            The base name of the SLURM output file.
+        """
+        return run_dir + "/" + self.output.split("%")[0]
+
+    @classmethod
+    def get_default_partition(cls) -> "str":
+        """Get the default SLURM partition."""
+        sinfo = _subprocess.run(
+            ["sinfo", "-o", "%P", "-h"], stdout=_subprocess.PIPE, text=True
+        )
+        # Search for the default queue (marked with "*", then throw away the "*")
+        return _re.search(r"([^\s]+)(?=\*)", sinfo.stdout).group(1)
+
+    def dump(self, save_dir: str) -> None:
+        """
+        Dumps the configuration to a YAML file.
+
+        Parameters
+        ----------
+        save_dir : str
+            Directory to save the YAML file to.
+        """
+        model_dict = self.model_dump()
+
+        save_path = save_dir + "/" + self.get_file_name()
+        with open(save_path, "w") as f:
+            _yaml.dump(model_dict, f, default_flow_style=False)
+
+    @classmethod
+    def load(cls, load_dir: str) -> "SlurmConfig":
+        """
+        Loads the configuration from a YAML file.
+
+        Parameters
+        ----------
+        load_dir : str
+            Directory to load the YAML file from.
+
+        Returns
+        -------
+        SlurmConfig
+            The loaded configuration.
+        """
+        with open(load_dir + "/" + cls.get_file_name(), "r") as f:
+            model_dict = _yaml.safe_load(f)
+        return cls(**model_dict)
+
+    @staticmethod
+    def get_file_name() -> str:
+        """
+        Get the name of the SLURM configuration file.
+        """
+        return "slurm_config.yaml"
diff --git a/a3fe/data/alternative_input/alternative_run_somd.sh b/a3fe/data/alternative_input/alternative_run_somd.sh
diff --git a/a3fe/data/example_calc_set/mdm2_pip2_short/bound/restrain/input/run_somd.sh b/a3fe/data/example_calc_set/mdm2_pip2_short/bound/restrain/input/run_somd.sh
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,7 +27,7 @@ @@
         enums,
     )
-    from .configuration import SystemPreparationConfig
+    from .configuration import SystemPreparationConfig, SlurmConfig
     _sys.modules["EnsEquil"] = _sys.modules["a3fe"]
@@ Expand Down @@