Skip to content

Commit

Permalink
Add SLURM Pytorch extra hook
Browse files Browse the repository at this point in the history
  • Loading branch information
3XX0 committed Aug 28, 2019
1 parent b7f34e2 commit a5dd946
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ HOOKS := conf/hooks/10-cgroups.sh \

CONFIG_EXTRA := conf/enroot.bash_completion

HOOKS_EXTRA := conf/hooks/50-slurm-pmi.sh
HOOKS_EXTRA := conf/hooks/50-slurm-pmi.sh \
conf/hooks/50-slurm-pytorch.sh

MOUNTS := conf/mounts/10-system.fstab \
conf/mounts/20-config.fstab
Expand Down
37 changes: 37 additions & 0 deletions conf/hooks/50-slurm-pytorch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#! /bin/bash

# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -eu

if ! grep -q "^PYTORCH_VERSION=" "${ENROOT_ENVIRON}"; then
exit 0
fi

if [ -n "${SLURM_NODELIST-}" ] && ! grep -q "^MASTER_ADDR=" "${ENROOT_ENVIRON}" && command -v scontrol > /dev/null; then
printf "MASTER_ADDR=%s\n" "$(scontrol show hostname "${SLURM_NODELIST}" | head -n1)" >> "${ENROOT_ENVIRON}"
fi
if [ -n "${SLURM_JOB_ID-}" ] && ! grep -q "^MASTER_PORT=" "${ENROOT_ENVIRON}"; then
printf "MASTER_PORT=%s\n" "$((${SLURM_JOB_ID} % 16384 + 49152))" >> "${ENROOT_ENVIRON}"
fi
if [ -n "${SLURM_NTASKS-}" ] && ! grep -q "^WORLD_SIZE=" "${ENROOT_ENVIRON}"; then
printf "WORLD_SIZE=%s\n" "${SLURM_NTASKS}" >> "${ENROOT_ENVIRON}"
fi
if [ -n "${SLURM_PROCID-}" ] && ! grep -q "^RANK=" "${ENROOT_ENVIRON}"; then
printf "RANK=%s\n" "${SLURM_PROCID}" >> "${ENROOT_ENVIRON}"
fi
if [ -n "${SLURM_LOCALID-}" ] && ! grep -q "^LOCAL_RANK=" "${ENROOT_ENVIRON}"; then
printf "LOCAL_RANK=%s\n" "${SLURM_LOCALID}" >> "${ENROOT_ENVIRON}"
fi

0 comments on commit a5dd946

Please sign in to comment.