Minor Changes Timing improvements with std::thread (#191)

* Minor Changes Timing improvements with std::thread * Update sytorchBackendRep.py * module.h fix * fix module.h bug type casting * fixed bug in llama_base.h, input_prng.cpp, module.h, tensor.h * ramdisk_path option added to llama config file * added the ramdrive scripts * Update Toy example- single inference.md
mpc-msri · Sep 7, 2023 · 81e566c · 81e566c
1 parent c6e60b1
commit 81e566c
Show file tree

Hide file tree

Showing 22 changed files with 295 additions and 143 deletions.
diff --git a/OnnxBridge/LLAMA/sytorchBackendRep.py b/OnnxBridge/LLAMA/sytorchBackendRep.py
@@ -108,7 +108,9 @@ def cleartext_post(code_list, program, scale, mode, indent):
         f"""
 
 int main(int argc, char**__argv){'{'}
-
+    std::ios::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    std::cout.tie(NULL);
     prngWeights.SetSeed(osuCrypto::toBlock(0, 0));
     prngStr.SetSeed(osuCrypto::toBlock(time(NULL)));
 
@@ -147,6 +149,9 @@ def cleartext_fp_post(code_list, program, scale, mode, indent):
         f"""
 
 int main(int argc, char**__argv){'{'}
+    std::ios::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    std::cout.tie(NULL);
 
     prngWeights.SetSeed(osuCrypto::toBlock(0, 0));
     prngStr.SetSeed(osuCrypto::toBlock(time(NULL)));
@@ -193,11 +198,14 @@ def llama_post(code_list, program, scale, mode, bitlength, indent):
         f"""
     
 int main(int __argc, char**__argv){'{'}
-    
+    std::ios::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    std::cout.tie(NULL);
     prngWeights.SetSeed(osuCrypto::toBlock(0, 0));
     prngStr.SetSeed(osuCrypto::toBlock(time(NULL)));
 
     int party = atoi(__argv[1]);
+    bool ramdisk_path = false;
     std::string ip = "127.0.0.1";
     int nt=4;
     std::string weights_file = "";
@@ -206,20 +214,32 @@ def llama_post(code_list, program, scale, mode, bitlength, indent):
         weights_file = __argv[2];
     {'}'}
     else if(party == DEALER){'{'}
-        if(__argc > 2){'{'}
+        if(__argc == 3){'{'}
+            nt = atoi(__argv[2]);
+        {'}'}
+        if(__argc > 3){'{'}
             nt = atoi(__argv[2]);
+            ramdisk_path = __argv[3];
         {'}'}
     {'}'}
     else if(party == SERVER){'{'}
         weights_file = __argv[2];
-        if(__argc > 3){'{'}
+        if(__argc == 4){'{'}
             nt = atoi(__argv[3]);
         {'}'}
+        if(__argc > 4){'{'}
+            nt = atoi(__argv[3]);
+            ramdisk_path = __argv[4];
+        {'}'}
     {'}'}
     else if(party == CLIENT){'{'}
         ip = __argv[2];
-        if(__argc > 3){'{'}
+        if(__argc == 4){'{'}
+            nt = atoi(__argv[3]);
+        {'}'}
+        if(__argc > 4){'{'}
             nt = atoi(__argv[3]);
+            ramdisk_path = __argv[4];
         {'}'}
     {'}'}
 
@@ -247,8 +267,13 @@ def llama_post(code_list, program, scale, mode, bitlength, indent):
     LlamaConfig::stochasticT = true;
     LlamaConfig::stochasticRT = true;
     LlamaConfig::num_threads = nt;
+    LlamaConfig::ramdisk_path = ramdisk_path;
 
-    llama->init(ip, true);
+    if(ramdisk_path){'{'}
+    llama->init(ip, true,true);
+    {'}'}else{'{'}
+    llama->init(ip,true,false);
+    {'}'}   
 
     Net<u64> net;
     net.init(scale);

diff --git a/OnnxBridge/README.md b/OnnxBridge/README.md
@@ -84,12 +84,19 @@ python3 main.py --path "/path/to/onnx-file" --generate "code" --backend LLAMA --
 # compile secure code
 LLAMA/compile_llama.sh "/path/to/file.cpp"
 
+# Enable and mount Ramdisk on client and server machines
+./ramdrive.sh <ramdisk_size>
 # generate LLAMA keys on client and server machines
 ./<network> 1 <num_threads>
 
+
+
 # start inference on server and client machines
-./<network> 2 <model_weights_file> <num_threads>// Server
-./<network> 3 <server-ip> <num_threads> < <image_file> // Client
+./<network> 2 <model_weights_file> <num_threads> <ramdisk_path "true/false">// Server
+./<network> 3 <server-ip> <num_threads> < <image_file> <ramdisk_path "true/false">// Client
+
+# Disable and unmount Ramdisk on client and server machines
+./unmount_ramdrive.sh
 ```
 
 #### **LLAMA Cleartext**

diff --git a/sytorch/Toy example- single inference.md b/sytorch/Toy example- single inference.md
@@ -94,5 +94,33 @@ chmod +x client-offline.sh client-online.sh
 (on client)
 ./client-online.sh
 ```
+8. For Using Ramdisk, follow the steps below:
+```
+# Enable and mount Ramdisk on client and server machines
+./ramdrive.sh <ramdisk_size>
+
+ramdisk_size >= sum of sizes of server and client keys.
+(example)
+Lenet server key size = 9.5 MB
+Lenet client key size = 9.5 MB
+Lenet Total key size = 19 MB
+So, ramdisk_size >= 19 MB
+
+command: ./ramdrive.sh 20m
+
+chexpert server key size = 87.5 GB
+chexpert client key size = 87.5 GB
+chexpert Total key size = 175 GB
+So ramdisk_size >= 175 GB
+
+command: ./ramdrive.sh 200g
+
+//change the server and client.sh scripts to use ramdisk 
+ ./lenet_LLAMA_15 1  ->> ./lenet_LLAMA_15 1 4 true
+// sed command to be added.
+
+# Disable and unmount Ramdisk on client and server machines after inference
+./unmount_ramdrive.sh
+```
 
 In this particular example, you should get a score array of `[-2.71362 1.06747 4.43045 0.795044 -3.21173 -2.39871 -8.49094 10.3443 1.0567 -0.694458]`, which is maximum at index 7, which is indeed expected as the [input.jpg](https://github.com/kanav99/models/raw/main/input.jpg) file contains an image of handwritten 7.
diff --git a/sytorch/ext/cryptoTools/cryptoTools/Common/Log.h b/sytorch/ext/cryptoTools/cryptoTools/Common/Log.h
@@ -41,7 +41,7 @@ namespace osuCrypto
         std::lock_guard<std::mutex>l(log.mLock);
         for (u64 i = 0; i < log.mMessages.size(); ++i)
         {
-            o << "[" << i << ", " << log.mMessages[i].first / 1000.0 << "ms ]  " << log.mMessages[i].second << std::endl;
+            o << "[" << i << ", " << log.mMessages[i].first / 1000.0 << "ms ]  " << log.mMessages[i].second << "\n";
         }
 
         return o;
@@ -219,4 +219,4 @@ namespace osuCrypto
     void setThreadName(const std::string name);
     void setThreadName(const char* name);
 
-}
+}