-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
1,123 additions
and
1 deletion.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
FROM centos:7 | ||
|
||
LABEL desc="Slurm simulator made ready" | ||
|
||
#Adding in the code from github to be able to start/stop mysql (and sshd?) | ||
COPY cmd_start /sbin/ | ||
COPY cmd_stop /sbin/ | ||
|
||
|
||
# giving permissions to use the cmd from above | ||
RUN \ | ||
chmod a+rwx /sbin/cmd_start && \ | ||
chmod a+rwx /sbin/cmd_stop && \ | ||
mkdir /install_files && \ | ||
useradd -d /home/slurm -ms /bin/bash slurm && \ | ||
usermod -aG wheel slurm && \ | ||
echo "slurm:slurm"|chpasswd && \ | ||
echo "Added slurm user" && \ | ||
yum -y install git && \ | ||
yum clean all | ||
|
||
# getting file that installs all the R packages | ||
COPY ./package_install.R /install_files | ||
|
||
# creating all the directories needed for larger run command | ||
USER slurm | ||
RUN \ | ||
cd /home/slurm && \ | ||
mkdir slurm_sim_ws && \ | ||
cd slurm_sim_ws && \ | ||
mkdir sim && \ | ||
cd /home/slurm/slurm_sim_ws && \ | ||
git clone https://github.com/ubccr-slurm-simulator/slurm_sim_tools.git | ||
|
||
USER root | ||
|
||
|
||
# installing mysql (mariadb), python, and R, setting everything up all in one Run command | ||
RUN \ | ||
yum -y install mariadb-server && \ | ||
yum -y install mariadb-devel && \ | ||
echo "Done installing Mariadb" && \ | ||
yum -y install gcc-c++ && \ | ||
yum -y install install epel-release && \ | ||
yum -y install python36 python36-libs python36-devel python36-numpy python36-scipy python36-pip | ||
|
||
RUN \ | ||
pip3 install pymysql && \ | ||
pip3 install pandas && \ | ||
echo "Python all installed" && \ | ||
yum -y install R R-Rcpp R-Rcpp-devel && \ | ||
yum -y install python-devel && \ | ||
yum -y install texlive-* && \ | ||
echo "R all installed" && \ | ||
Rscript /install_files/package_install.R && \ | ||
echo "Installed R packages" && \ | ||
yum -y install sudo && \ | ||
yum -y install wget && \ | ||
echo "Sudo, git, wget installed" && \ | ||
wget https://download2.rstudio.org/server/centos6/x86_64/rstudio-server-rhel-1.2.5042-x86_64.rpm && \ | ||
sudo yum -y install rstudio-server-rhel-1.2.5042-x86_64.rpm && \ | ||
yum -y install initscripts && \ | ||
echo "Rstudio server installed" && \ | ||
yum -y install openssh openssh-server openssh-clients openssl-libs && \ | ||
mkdir /var/run/sshd && \ | ||
ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N '' && \ | ||
ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -N '' && \ | ||
ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N '' && \ | ||
echo "Ssh installed" && \ | ||
chmod g+rw /var/lib/mysql /var/log/mariadb /var/run/mariadb && \ | ||
mysql_install_db && \ | ||
chown -R mysql:mysql /var/lib/mysql && \ | ||
cmd_start mysqld && \ | ||
mysql -e "create user 'slurm'@'localhost' identified by 'slurm';" && \ | ||
mysql -e "GRANT ALL PRIVILEGES ON *.* TO 'slurm'@'localhost' IDENTIFIED BY 'slurm';" && \ | ||
cmd_stop mysqld && \ | ||
yum clean all | ||
|
||
|
||
# switch to slurm user so the next directories made are owned by slurm | ||
USER slurm | ||
|
||
# installing slurm simulator | ||
RUN \ | ||
cd /home/slurm/slurm_sim_ws && \ | ||
git clone https://github.com/ubccr-slurm-simulator/slurm_simulator.git && \ | ||
cd slurm_simulator && \ | ||
cd .. && \ | ||
mkdir bld_opt && \ | ||
cd bld_opt && \ | ||
../slurm_simulator/configure --prefix=/home/slurm/slurm_sim_ws/slurm_opt --enable-simulator \ | ||
--enable-pam --without-munge --enable-front-end --with-mysql-config=/usr/bin/ --disable-debug \ | ||
CFLAGS="-g -O3 -D NDEBUG=1" && \ | ||
make -j install | ||
|
||
|
||
# 8787 is the default port that rstudio server uses, so need to expose it to use it | ||
EXPOSE 8787 | ||
|
||
|
||
USER root | ||
|
||
|
||
COPY ./startup_file.sh /install_files | ||
COPY ./initial_test.sh /install_files | ||
COPY ./micro_cluster_setup.py /install_files | ||
COPY ./micro_ws_config.sh /install_files | ||
COPY ./populate_slurmdb.sh /install_files | ||
COPY ./generate_job_trace.sh /install_files | ||
COPY ./run_sim.sh /install_files | ||
COPY ./check_results.R /install_files | ||
|
||
|
||
# need to expose port 22 to allow for ssh to work properly | ||
EXPOSE 22 | ||
|
||
|
||
# expose for mysql use | ||
EXPOSE 3306 | ||
|
||
|
||
# back to root for easier permissions stuff | ||
RUN \ | ||
chmod -R a+rwx /install_files | ||
|
||
|
||
# sets cmd_start as entrypoint, then runs the startup file and the initial test file | ||
ENTRYPOINT ["/sbin/cmd_start"] | ||
CMD ["/install_files/startup_file.sh","/install_files/initial_test.sh"] | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# Slurm Simulator Docker and Apptainer Containers | ||
|
||
```bash | ||
docker build -f docker/slurm_sim/slurm_sim.Dockerfile -t nsimakov/slurm_sim:v3.0 . | ||
|
||
# debug mode | ||
# older docker uses --invoke | ||
# BUILDX_EXPERIMENTAL=1 docker buildx build --invoke /bin/bash -f docker/slurm_sim/slurm_sim.Dockerfile -t nsimakov/slurm_sim:v3.0 . | ||
BUILDX_EXPERIMENTAL=1 docker buildx debug build -f docker/slurm_sim/slurm_sim.Dockerfile -t nsimakov/slurm_sim:v3.0 . | ||
|
||
# run | ||
docker run -p 0.0.0.0:8888:8888 -it --rm \ | ||
--name slurmsim -h slurmsim \ | ||
-v $PWD/tutorials:/home/jovyan/work -v $PWD:/opt/slurm_sim_tools \ | ||
-v /tmp/.X11-unix:/tmp/.X11-unix -v /mnt/wslg:/mnt/wslg \ | ||
-e DISPLAY=$DISPLAY -e WAYLAND_DISPLAY=$WAYLAND_DISPLAY \ | ||
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR -e PULSE_SERVER=$PULSE_SERVER \ | ||
nsimakov/slurm_sim:v3.0 | ||
|
||
docker run -it -v /tmp/.X11-unix:/tmp/.X11-unix \ | ||
-e DISPLAY=$DISPLAY -e WAYLAND_DISPLAY=$WAYLAND_DISPLAY \ | ||
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR -e PULSE_SERVER=$PULSE_SERVER xclock | ||
|
||
docker run -p 0.0.0.0:8888:8888 -it --rm \ | ||
--name slurmsim -h slurmsim \ | ||
-v $PWD/tutorials:/home/jovyan/work -v $PWD:/opt/slurm_sim_tools \ | ||
-v /tmp/.X11-unix:/tmp/.X11-unix -v /mnt/wslg:/mnt/wslg \ | ||
-e DISPLAY=$DISPLAY -e WAYLAND_DISPLAY=$WAYLAND_DISPLAY \ | ||
-e XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR -e PULSE_SERVER=$PULSE_SERVER \ | ||
-e NB_USER="slurm" \ | ||
-e NB_UID="1000" \ | ||
-e NB_GROUP="slurm" \ | ||
-e NB_GID="1000" \ | ||
-e CHOWN_HOME=yes \ | ||
nsimakov/slurm_sim:v3.0 bash | ||
|
||
|
||
``` | ||
Done running hooks in: /usr/local/bin/start-notebook.d | ||
Update jovyan's UID:GID to 1000:100 | ||
Running hooks in: /usr/local/bin/before-notebook.d as uid: 0 gid: 0 | ||
Done running hooks in: /usr/local/bin/before-notebook.d | ||
|
||
|
||
in container run following to set time: | ||
|
||
``` | ||
Run 'dpkg-reconfigure tzdata' if you wish to change it. | ||
``` | ||
|
||
# Slurm Simulator Docker and Singularity Containers v1.2 | ||
|
||
```bash | ||
docker run -p 0.0.0.0:8888:8888 -it --rm -h slurmsim nsimakov/slurm_sim:v3.0 | ||
|
||
``` | ||
|
||
Building | ||
## from repo root | ||
docker build -f ./docker/slurm_sim/Dockerfile -t nsimakov/ub-slurm-sim:v1.2 . | ||
docker push nsimakov/ub-slurm-sim:v1.2 | ||
Running | ||
|
||
|
||
## Singularity Installation | ||
|
||
```bash | ||
export VERSION=1.18 OS=linux ARCH=amd64 && \ # Replace the values as needed | ||
wget https://dl.google.com/go/go$VERSION.$OS-$ARCH.tar.gz && \ # Downloads the required Go package | ||
sudo tar -C /usr/local -xzvf go$VERSION.$OS-$ARCH.tar.gz && \ # Extracts the archive | ||
rm go$VERSION.$OS-$ARCH.tar.gz # Deletes the ``tar`` file | ||
|
||
echo 'export PATH=/usr/local/go/bin:$PATH' >> ~/.bashrc && \ | ||
source ~/.bashrc | ||
|
||
export VERSION=3.9.7 && # adjust this as necessary \ | ||
wget https://github.com/sylabs/singularity/releases/download/v${VERSION}/singularity-ce-${VERSION}.tar.gz && \ | ||
tar -xzf singularity-ce-${VERSION}.tar.gz && \ | ||
cd singularity-ce-${VERSION} | ||
./mconfig && \ | ||
make -C builddir && \ | ||
sudo make -C builddir install | ||
|
||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
# This script gets the results from the simulation and runs some tests on them | ||
# It tests if the requested features were given to the jobs | ||
# Features: cpu type, gpu, big mem | ||
# How implemented - each feature corresponds to a different type of node | ||
|
||
|
||
library(RSlurmSimTools) # needs these libraries to run the tests | ||
library(dplyr) | ||
|
||
# this function allows easier comparison to see if the simulator assigned things correctly | ||
# checks the trace value (requested feature) against the sacct value (Node List, indicating assigning of a feature) | ||
# the check values are there for reuse of the function for more than one type of test | ||
check_nodes <- function(df.joined, row_num, trace_col, trace_check, sacct_col, sacct_check){ | ||
result = TRUE # assumes correct | ||
# df.joined is the joined data frame from trace and sacct data frames | ||
trace_val = df.joined[row_num, trace_col] # trace value (feature) | ||
sacct_val = df.joined[row_num, sacct_col] # sim value (if implemented feature) | ||
|
||
# no feature requested if the value is NA, so check for that | ||
if(!is.na(trace_val)) | ||
{ | ||
# check if the feature (trace_check) was requested | ||
if(trace_val == trace_check) | ||
{ | ||
# checks if the node list has the node corresponding to that feature | ||
if(!(grepl(sacct_check, sacct_val))) | ||
{ | ||
# if improper nodes have been assigned, its a false result (didn't assign properly) | ||
result = FALSE | ||
} | ||
} | ||
} | ||
result # result is returned | ||
} | ||
|
||
|
||
# reads in the csv file of the job traces (jobs submitted) | ||
job_trace <- read.csv(file="/home/slurm/slurm_sim_ws/slurm_sim_tools/reg_testing/micro_cluster/test_trace.csv") | ||
|
||
# reads in log file of resulting data (what jobs were assigned, where, etc) | ||
sacct_base <- read_sacct_out("/home/slurm/slurm_sim_ws/sim/micro/baseline/results/jobcomp.log") | ||
|
||
# creating a joined data frame by job id so that can go through jobs easier | ||
joined <- left_join(job_trace, sacct_base, by = c("sim_job_id" = "local_job_id") ) | ||
|
||
done_well = TRUE # assumes did correctly | ||
|
||
# loops through each row in the joined data frame | ||
for(row in 1:nrow(joined)) | ||
{ | ||
# checks if all features have been met (or weren't present) | ||
done_well = check_nodes(joined, row, "sim_req_mem", 500000, "NodeList", "b") && # big mem | ||
check_nodes(joined, row, "sim_features", "CPU-M", "NodeList", "m") && # M cpu | ||
check_nodes(joined, row, "sim_features", "CPU-N", "NodeList", "n") && # N cpu | ||
check_nodes(joined, row, "sim_gres", "gpu:1", "NodeList", "g") && # 1 gpu | ||
check_nodes(joined, row, "sim_gres", "gpu:2", "NodeList", "g") # 2 gpu | ||
|
||
# if at any point a feature doesn't match, breaks out of the loop | ||
if(!done_well) | ||
{ | ||
# prints out the job id for tracing back what failed | ||
jobid = joined[row, "sim_job_id"] | ||
print(paste("Id of incorrectly assigned job:", jobid)) | ||
break | ||
} | ||
} | ||
# prints overall result | ||
print("Did the simulator do well?.....") | ||
print(done_well) |
Oops, something went wrong.