Skip to content

Commit

Permalink
Merge branch 'vlasiator_gpu' of https://github.com/fmihpc/vlasiator i…
Browse files Browse the repository at this point in the history
…nto vlasiator_gpu
  • Loading branch information
hokkanen committed Apr 2, 2024
2 parents 251bb7a + 91c46a6 commit 28fe8be
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 25 deletions.
28 changes: 17 additions & 11 deletions MAKE/Makefile.lumi_hipcc
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@ CMP = hipcc
LNK = clang++

# Modules loaded (after clean shell, no module purging, one-by-one not oneline)
# module load LUMI/22.08
# module load LUMI/23.09
# module load partition/G
# module load cpeAMD
# module load rocm/5.3.3
# module load Boost/1.79.0-cpeAMD-22.08
# module load rocm/5.6.1
# module load Boost/1.82.0-cpeAMD-23.09
# module load papi/7.0.1.1
# module load Eigen/3.4.0
# one-liner:
# module load LUMI/22.08; module load partition/G; module load cpeAMD; module load rocm/5.3.3; module load Boost/1.79.0-cpeAMD-22.08

# module load LUMI/23.09; module load partition/G; module load cpeAMD; module load rocm/5.6.1; module load Boost/1.82.0-cpeAMD-23.09; module load papi/7.0.1.1; module load Eigen/3.4.0
# clang++ linking also requires:
# export PATH=$PATH:/appl/lumi/SW/LUMI-23.09/G/EB/rocm/5.6.1/llvm/bin/

#======== Vectorization ==========
#Set vector backend type for vlasov solvers, sets precision and length.
#Options:
Expand All @@ -35,9 +39,9 @@ USE_HIP=1
# LDFLAGS flags for linker
# Important note: Do not edit COMPFLAGS in this file!

CXXFLAGS += -g -ggdb -O3 -x hip --offload-arch=gfx90a:xnack- -march=znver3 -std=c++17 -funroll-loops -fopenmp -I. -Ihip -Iomp -I${CRAY_MPICH_DIR}/include -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable
CXXFLAGS += -g -ggdb -O3 -x hip --offload-arch=gfx90a:xnack- -march=znver3 -std=c++17 -funroll-loops -fopenmp -I. -Ihip -Iomp -I${CRAY_MPICH_DIR}/include -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable -Wno-ignored-attributes

testpackage: CXXFLAGS = -g -ggdb -O2 -x hip --offload-arch=gfx90a:xnack- -march=znver3 -std=c++17 -fopenmp -I. -Ihip -Iomp -I${CRAY_MPICH_DIR}/include -fgpu-sanitize -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable
testpackage: CXXFLAGS = -g -ggdb -O2 -x hip --offload-arch=gfx90a:xnack- -march=znver3 -std=c++17 -fopenmp -I. -Ihip -Iomp -I${CRAY_MPICH_DIR}/include -fgpu-sanitize -W -Wall -Wno-unused-parameter -Wno-unused-result -Wno-unused-function -Wno-unused-variable -Wno-unknown-pragmas -Wno-deprecated-register -Wno-unused-but-set-variable -Wno-ignored-attributes

LDFLAGS = -fopenmp -lrt -lpthread -L${CRAY_MPICH_DIR}/lib ${PE_MPICH_GTL_DIR_amd_gfx90a} -L${ROCM_PATH}/lib -lamdhip64
LIB_MPI = -lmpi ${PE_MPICH_GTL_LIBS_amd_gfx90a}
Expand All @@ -63,7 +67,7 @@ testpackage: CXXFLAGS += -DPAPI_MEM
#======== Libraries ===========
# Select the base directory based on which project you are using:
# LUMILAPIO
LIBRARY_PREFIX = /projappl/project_462000358/libraries
LIBRARY_PREFIX = /projappl/project_462000358/libraries/23.09

# Compiled libraries
#INC_BOOST = -isystem $(LIBRARY_PREFIX)/boost/include
Expand All @@ -76,8 +80,10 @@ LIB_ZOLTAN = -L$(LIBRARY_PREFIX)/zoltan/lib -lzoltan -Wl,-rpath=$(LIBRARY_PREFIX
#INC_JEMALLOC = -I$(LIBRARY_PREFIX)/jemalloc/include
#LIB_JEMALLOC = -L$(LIBRARY_PREFIX)/jemalloc/lib -ljemalloc -Wl,-rpath=$(LIBRARY_PREFIX)/jemalloc/lib

INC_PAPI = -isystem $(LIBRARY_PREFIX)/papi/include
LIB_PAPI = -lpapi -L$(LIBRARY_PREFIX)/papi/lib -Wl,-rpath=$(LIBRARY_PREFIX)/papi/lib
#INC_PAPI = -isystem $(LIBRARY_PREFIX)/papi/include
#LIB_PAPI = -lpapi -L$(LIBRARY_PREFIX)/papi/lib -Wl,-rpath=$(LIBRARY_PREFIX)/papi/lib
INC_PAPI = -isystem /opt/cray/pe/papi/7.0.1.1/include/
LIB_PAPI = -lpapi -L/opt/cray/pe/papi/7.0.1.1/lib -Wl,-rpath=/opt/cray/pe/papi/7.0.1.1/lib

INC_VLSV = -isystem $(LIBRARY_PREFIX)/vlsv
LIB_VLSV = -L$(LIBRARY_PREFIX)/vlsv -lvlsv -Wl,-rpath=$(LIBRARY_PREFIX)/vlsv
Expand All @@ -87,7 +93,7 @@ LIB_PROFILE = -L$(LIBRARY_PREFIX)/phiprof/lib -lphiprof -lgfortran -Wl,-rpath=$(

#header libraries

INC_EIGEN = -isystem $(LIBRARY_PREFIX)/eigen/
#INC_EIGEN = -isystem $(LIBRARY_PREFIX)/eigen/
INC_FSGRID = -I./submodules/fsgrid
INC_DCCRG = -I./submodules/dccrg
# Vectorclass only for CPU mode
Expand Down
6 changes: 3 additions & 3 deletions grid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,9 @@ void initializeGrids(
for (size_t i=0; i<cells.size(); ++i) {
mpiGrid[cells[i]]->parameters[CellParams::LBWEIGHTCOUNTER] = 0;
#ifdef USE_GPU
SpatialCell* cell = mpiGrid[cells[i]];
cell->prefetchDevice(); // Currently projects still init on host
cell->gpu_advise();
// SpatialCell* cell = mpiGrid[cells[i]];
// cell->prefetchDevice();
// cell->gpu_advise();
#endif
}

Expand Down
36 changes: 25 additions & 11 deletions testpackage/small_test_lumi_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
#SBATCH --partition=small-g

#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --gpus-per-node=8
#SBATCH --ntasks-per-node=1
#SBATCH --gpus-per-node=1
##SBATCH --ntasks-per-node=8
##SBATCH --gpus-per-node=8

#SBATCH --time=24:00:00
##SBATCH --time=3:00:00
#SBATCH --account=project_462000358
#SBATCH --exclusive
#SBATCH --mem=0
Expand All @@ -21,18 +24,21 @@ create_verification_files=0
reference_dir="/scratch/project_462000358/testpackage/"
cd $SLURM_SUBMIT_DIR

bin="/scratch/project_462000358/testpackage/vlasiator_gpu_wid4_tp"
bin="/scratch/project_462000358/testpackage/vlasiator_gpu_2309_tp"
diffbin="/scratch/project_462000358/testpackage/vlsvdiff_DP_gpu"

# compare agains which revision?
reference_revision="current"

# place before exec
#LD_PRELOAD=/users/marbat/git/vlasiator-mempool/libpreload-me-2309.so

# set up GPU/CPU bindings
cat << EOF > select_gpu_${SLURM_JOB_ID}
#!/bin/bash
export ROCR_VISIBLE_DEVICES=\$SLURM_LOCALID
export OMP_NUM_THREADS=7
exec \$*
LD_PRELOAD=/users/marbat/git/vlasiator-mempool/libpreload-me-2309.so exec \$*
EOF
chmod +x ./select_gpu_${SLURM_JOB_ID}
# this should set the ordering correctly: "4 5 2 3 6 7 0 1"
Expand All @@ -41,11 +47,18 @@ CPU_BIND="${CPU_BIND},7e0000,7e000000"
CPU_BIND="${CPU_BIND},7e,7e00"
CPU_BIND="${CPU_BIND},7e00000000,7e0000000000"

module load LUMI/22.08
# module load LUMI/22.08
# module load partition/G
# module load cpeAMD
# module load rocm/5.3.3
# module load Boost/1.79.0-cpeAMD-22.08
module load LUMI/23.09
module load partition/G
module load cpeAMD
module load rocm/5.3.3
module load Boost/1.79.0-cpeAMD-22.08
module load rocm/5.6.1
module load Boost/1.82.0-cpeAMD-23.09
module load papi/7.0.1.1
module load Eigen/3.4.0
module list

export OMP_PLACES=cores
Expand All @@ -56,13 +69,14 @@ export MPICH_GPU_SUPPORT_ENABLED=1
export HSA_XNACK=0
# use extra threads for MPI in background
export MPICH_ASYNC_PROGRESS=1

# allow 16 in-parallel queues
export GPU_MAX_HW_QUEUES=16
# Command for running tests and diffs with MPI
# run_command="srun --cpu-bind=${CPU_BIND} ${SLURM_SUBMIT_DIR}/select_gpu "
# No MPI testing for now
run_command="srun -n 1 --cpu-bind=${CPU_BIND} ${SLURM_SUBMIT_DIR}/select_gpu "
small_run_command="srun -n 1 --cpu-bind=${CPU_BIND} ${SLURM_SUBMIT_DIR}/select_gpu "
run_command_tools="srun -n 1 --cpu-bind=${CPU_BIND} ${SLURM_SUBMIT_DIR}/select_gpu"
run_command="srun -n 1 --cpu-bind=${CPU_BIND} ${SLURM_SUBMIT_DIR}/select_gpu_${SLURM_JOB_ID} "
small_run_command="srun -n 1 --cpu-bind=${CPU_BIND} ${SLURM_SUBMIT_DIR}/select_gpu_${SLURM_JOB_ID} "
run_command_tools="srun -n 1 --cpu-bind=${CPU_BIND} ${SLURM_SUBMIT_DIR}/select_gpu_${SLURM_JOB_ID}"

umask 007

Expand Down

0 comments on commit 28fe8be

Please sign in to comment.