Skip to content

Commit

Permalink
GPU MPC (#214)
Browse files Browse the repository at this point in the history
* Adding GPU-MPC

* Adding gitignore

* Added weights as a submodule

* Added mnist as a submodule

* Added cutlass as a submodule

* Added SEAL as a submodule

---------

Co-authored-by: Neha J <[email protected]>
  • Loading branch information
neha-jawalkar and Neha J authored May 17, 2024
1 parent 133464a commit 473eb34
Show file tree
Hide file tree
Showing 498 changed files with 115,885 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,15 @@
[submodule "CrypTFlow2/extern/eigen"]
path = SCI/extern/eigen
url = https://gitlab.com/libeigen/eigen
[submodule "GPU-MPC/experiments/orca/weights"]
path = GPU-MPC/experiments/orca/weights
url = https://github.com/neha-jawalkar/weights.git
[submodule "GPU-MPC/experiments/orca/datasets/mnist"]
path = GPU-MPC/experiments/orca/datasets/mnist
url = https://github.com/neha-jawalkar/mnist.git
[submodule "GPU-MPC/ext/cutlass"]
path = GPU-MPC/ext/cutlass
url = https://github.com/NVIDIA/cutlass.git
[submodule "GPU-MPC/ext/sytorch/ext/sci/extern/SEAL"]
path = GPU-MPC/ext/sytorch/ext/sci/extern/SEAL
url = https://github.com/microsoft/SEAL.git
34 changes: 34 additions & 0 deletions GPU-MPC/Dockerfile_Gen
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Author: Tanmay Rajore,Neha Jawalkar
#
# Copyright:
# Copyright (c) 2024 Microsoft Research
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04

WORKDIR /home
RUN ln -sf /bin/bash /bin/sh

RUN apt update && apt upgrade -y && apt install -y git apt-utils; \
apt install -y sudo ; \
sudo apt install -y gcc-9 g++-9; \
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9;\
sudo update-alternatives --config gcc;\
sudo apt install libssl-dev cmake python3-pip libgmp-dev libmpfr-dev -y;\
sudo apt install cmake make libeigen3-dev -y ;

RUN git config --global --add safe.directory *
#RUN git submodule update --init --recursive
132 changes: 132 additions & 0 deletions GPU-MPC/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
CUDA_VERSION ?= $(value CUDA_VERSION)
ifeq ($(CUDA_VERSION),)
CUDA_VERSION = 11.7
endif
CUTLASS_PATH=./ext/cutlass
SYTORCH_PATH=./ext/sytorch
SYTORCH_BUILD_PATH=$(SYTORCH_PATH)/build
LLAMA_PATH=$(SYTORCH_PATH)/ext/llama
CUDA_ARCH =$(GPU_ARCH)

CXX=/usr/local/cuda-$(CUDA_VERSION)/bin/nvcc
FLAGS := -O3 -gencode arch=compute_$(CUDA_ARCH),code=[sm_$(CUDA_ARCH),compute_$(CUDA_ARCH)] -std=c++17 -m64 -Xcompiler="-O3,-w,-std=c++17,-fpermissive,-fpic,-pthread,-fopenmp,-march=native"
LIBS := -lsytorch -lcryptoTools -lLLAMA -lbitpack -lcuda -lcudart -lcurand
SECFLOAT_LIBS := -lSCI-FloatML -lSCI-FloatingPoint -lSCI-BuildingBlocks -lSCI-LinearOT -lSCI-GC -lcrypto

UTIL_FILES := ./utils/gpu_mem.cu ./utils/gpu_file_utils.cpp ./utils/sigma_comms.cpp
OBJ_INCLUDES := -I '$(CUTLASS_PATH)/include' -I '$(CUTLASS_PATH)/tools/util/include' -I '$(SYTORCH_PATH)/include' -I '$(LLAMA_PATH)/include' -I '$(SYTORCH_PATH)/ext/cryptoTools' -I '.'
INCLUDES := $(OBJ_INCLUDES) -L$(CUTLASS_PATH)/build/tools/library -L$(SYTORCH_BUILD_PATH) -L$(SYTORCH_BUILD_PATH)/ext/cryptoTools -L$(SYTORCH_BUILD_PATH)/ext/llama -L$(SYTORCH_BUILD_PATH)/ext/bitpack -L$(SYTORCH_BUILD_PATH)/lib

dpf: tests/fss/dpf.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf

dpf_eval_all: tests/fss/dpf_eval_all.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf_eval_all

dpf_drelu: tests/fss/dpf_drelu.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf_drelu

dpf_lut: tests/fss/dpf_lut.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dpf_lut

gelu: tests/fss/gelu.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/gelu

relu: tests/fss/relu.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/relu

rmsnorm: tests/fss/rmsnorm.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/rmsnorm

softmax: tests/fss/softmax.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/softmax

fc: tests/fss/fc.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/fc

layernorm: tests/fss/layernorm.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/layernorm

silu: tests/fss/silu.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/silu

truncate: tests/fss/truncate.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/truncate

mha: tests/fss/mha.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/mha

secfloat_softmax: tests/fss/secfloat_softmax.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) $(SECFLOAT_LIBS) -o tests/fss/secfloat_softmax

piranha_softmax: tests/fss/piranha_softmax.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/piranha_softmax

orca_dealer: experiments/orca/orca_dealer.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) $(SECFLOAT_LIBS) -o experiments/orca/orca_dealer

orca_evaluator: experiments/orca/orca_evaluator.cu experiments/orca/datasets/mnist.cpp
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) $(SECFLOAT_LIBS) -o experiments/orca/orca_evaluator

dcf: tests/fss/dcf/dcf.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/dcf

aes: tests/fss/dcf/aes.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/aes

dcf_relu_extend: tests/fss/dcf/relu_extend.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/relu_extend

dcf_stochastic_truncate: tests/fss/dcf/stochastic_truncate.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/stochastic_truncate

dcf_relu: tests/fss/dcf/relu.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/fss/dcf/relu

orca_conv2d: tests/nn/orca/conv2d_test.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/conv2d

orca_maxpool: tests/nn/orca/maxpool_test.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/maxpool

orca_relu_extend: tests/nn/orca/relu_extend_test.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/relu_extend

orca_fc: tests/nn/orca/fc_test.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/fc

orca_relu: tests/nn/orca/relu_test.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o tests/nn/orca/relu

orca_inference: experiments/orca/orca_inference.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/orca_inference

orca_inference_u32: experiments/orca/orca_inference.cu
$(CXX) $(FLAGS) -DInfType=u32 $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/orca_inference_u32

sigma: experiments/sigma/sigma.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/sigma/sigma

piranha: experiments/orca/piranha.cu
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/piranha

share_data: experiments/orca/share_data.cpp experiments/orca/datasets/mnist.cpp
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/share_data

model_accuracy: experiments/orca/model_accuracy.cu experiments/orca/datasets/mnist.cpp
$(CXX) $(FLAGS) $(INCLUDES) $^ $(UTIL_FILES) $(LIBS) -o experiments/orca/model_accuracy

orca: orca_dealer orca_evaluator orca_inference orca_inference_u32 piranha

clean:
rm -rf ext/cutlass/build
rm -rf ext/sytorch/build
rm -rf orca/experiments/output
rm -rf sigma/experiments/output
rm experiments/orca/orca_dealer
rm experiments/orca/orca_evaluator
rm experiments/orca/orca_inference
rm experiments/orca/orca_inference_u32
rm experiments/orca/piranha
rm experiments/sigma/sigma

123 changes: 123 additions & 0 deletions GPU-MPC/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@

# Orca: FSS-based Secure Training and Inference with GPUs

Implementation of protocols from the paper [Orca](https://eprint.iacr.org/2023/206).

**Warning**: This is an academic proof-of-concept prototype and has not received careful code review. This implementation is NOT ready for production use.

## Build

This project requires NVIDIA GPUs, and assumes that GPU drivers and the [NVIDIA CUDA Toolkit](https://docs.nvidia.com/cuda/) are already installed. The following has been tested on Ubuntu 20.04 with CUDA 11.7, CMake 3.27.2 and g++-9.

Please note that Sytorch requires CMake version >= 3.17 and the build will fail if this depency is not met.

The code uses CUTLASS version 2.11 by default, so if you change the CUDA version, please make sure that the CUTLASS version being built is compatible with the new CUDA version. To change the version of CUTLASS being built, add `git checkout <branch>;` after line 31 (`cd ext/cutlass;`) of setup.sh.

The last line of `setup.sh` tries to install `matplotlib`, which is needed for generating Figures 5a and 5b. In our experience, the installation fails if the versions of Python and `pip` do not match. In case the installation fails, please install `matplotlib` manually before running `run_experiment.py`.

1. Export environment variables

```
export CUDA_VERSION=11.7
export GPU_ARCH=86
```

2. Set up the environment

```
sh setup.sh
```

3. Make Orca

```
make orca
```

## Run

1. Each party runs two processes: a dealer and an evaluator. The configuration needs to define the GPU on which the dealer will run, and the directory in which it will store FSS keys. This is done in `config.json` as:

```javascript
"dealer" :
{ "gpu": <The ID of the GPU to use>,
"key_dir": <The directory in which the dealer will store keys>
}
```

FSS keys tend to be quite large so please make sure that the key directory has at least 500GB of free space. Please also ensure that it is writeable.

Similarly, the configuration also needs to define the GPU on which the evaluator will run, and the IP address of its peer, i.e., the address of the remote party the evaluator will communicate with for secure training or inference. This is done in `config.json` as:

```javascript
"dealer" :
{ "gpu": <The ID of the GPU to use>,
"peer": <The address of the remote peer>
}
```

You can run Orca to generate Figures 5a and 5b, as well as Tables 3, 4, 6, 7, 8 and 9. Table 5 can be generated by throttling the network bandwidth (with `tc`, for example) and regenerating Table 4. The script reports numbers for Tables 4, 6, 7 and 9 as the average of 10 iterations.

Figure 5b and Table 3 run end-to-end training and so can take a couple of days to finish.

Evaluation runs through `experiments/orca/run_experiment.py`. Here are the relevant options:

```
usage: run_experiment.py [-h] [--figure FIGURE] [--table TABLE] --party 0/1
optional arguments:
--figure FIGURE Figure # to run.
--table TABLE Table # to run.
--all true Run all the experiments.
```

Results are stored in the `output/P<party-number>/Table<table-number>` or `output/P<party-number>/Fig<figure-number>` folders.

Log files (which might help with debugging) are stored in the corresponding experiment folders, i.e., in `output/P<party-number>/Table<table-number>/logs` and `output/P<party-number>/Fig<figure-number>/logs`.

## Docker Build

You can also build the docker image using the provided Dockerfile_Gen for building the Environment.

### Install Nvidia Container Toolkit
- Configure the repository:
```
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey |sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list \
&& sudo apt-get update
```

- Install the NVIDIA Container Toolkit packages:
```
sudo apt-get install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
```
### Build the Docker Image / pull the image from Docker Hub
```
# Local Build
docker build -t gpu_mpc -f Dockerfile_Gen .
# Pull from Docker Hub (Cuda 11.8)
docker pull trajore/gpu_mpc
```
### Run the Docker Container
```
sudo docker run --gpus all --network host -v /home/$USER/path_to_GPU-MPC/:/home -it container_name /bin/bash
```
Then Run setup.sh to configure according to GPU_arch and make orca as mentioned above.

## Citation

You can cite the paper using the following BibTeX entry:

```
@INPROCEEDINGS {,
author = {N. Jawalkar and K. Gupta and A. Basu and N. Chandran and D. Gupta and R. Sharma},
booktitle = {2024 IEEE Symposium on Security and Privacy (SP)},
title = {Orca: FSS-based Secure Training and Inference with GPUs},
year = {2024}
}
```

Loading

0 comments on commit 473eb34

Please sign in to comment.