Update python packaging (#967)

intel · Jun 7, 2023 · d1e636f · d1e636f
1 parent bfcb2e4
commit d1e636f
Show file tree

Hide file tree

Showing 29 changed files with 394 additions and 359 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@
 */__pycache__
 *.snapshot
 *.so
+*.so.*
 *.pb
 *.ckpt
 *.log
@@ -14,7 +15,14 @@ tags
 build/
 _build
 dist/
-intel_extension_for_transformers.egg-info/
+
+# build / dist files
+/intel_extension_for_transformers/intel_extension_for_transformers[.-]*/
+/intel_extension_for_transformers/neural_engine_*
+/intel_extension_for_transformers/*.dll
+/intel_extension_for_transformers/_version.py
+/intel_extension_for_transformers[.-]*
+/neural_engine_py*
 
 /.vs
 
@@ -27,7 +35,6 @@ intel_extension_for_transformers.egg-info/
 /examples/**/tmp/
 /examples/**/model_and_tokenizer/
 /examples/**/*ir*/
-version.txt
 build_tmp
 
 /workflows/chatbot/demo/front_end/node_modules

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,5 @@
+include .gitmodules
+
+global-exclude torchoutput.pkl
+prune .github
+prune docker
diff --git a/docs/installation.md b/docs/installation.md
@@ -1,40 +1,24 @@
 # Installation
 
-1. [Linux Installation](#linux-installation)
+1. [Install from Pypi](#install-from-pypi)
 
-    1.1. [Prerequisites](#prerequisites)
+2. [Install from Source](#install-from-source)
 
-    1.2. [Install from Pypi](#install-from-pypi)
+    2.1. [Prerequisites](#prerequisites)
 
-    1.3. [Install from Source](#install-from-source)
-
-2. [Windows Installation](#windows-installation)
-
-    2.1. [Prerequisites](#prerequisites-1)
-
-    2.2. [Install from Pypi](#install-from-pypi-1)
-
-    2.3. [Install from Source](#install-from-source-1)
+    2.2. [Install Intel Extension for Transformers](#install-intel-extension-for-transformers)
 
 3. [System Requirements](#system-requirements)
 
-   3.1. [Validated Hardware Environment](#validated-hardware-environment)
+    3.1. [Validated Hardware Environment](#validated-hardware-environment)
 
-   3.2. [Validated Software Environment](#validated-software-environment)
+    3.2. [Validated Software Environment](#validated-software-environment)
 
-## Linux Installation
-### Prerequisites
-The following prerequisites and requirements must be satisfied for a successful installation:
+## Install from Pypi
+Binary builds for python 3.7, 3.8, 3.9 and 3.10 are available in Pypi
 
-- Python version: 3.7 or 3.8 or 3.9 or 3.10
-```
-# Install Dependency
-pip install -r requirements.txt
-```
 >**Note**: Recommend install protobuf <= 3.20.0 if use onnxruntime <= 1.11
 
-### Install from Pypi
-
 ```Bash
 # install stable basic version from pypi
 pip install intel-extension-for-transformers
@@ -45,48 +29,25 @@ pip install -i https://test.pypi.org/simple/ intel-extension-for-transformers
 # or install nightly version with only backend
 pip install -i https://test.pypi.org/simple/ intel-extension-for-transformers-backend
 ```
-```Shell
+```Bash
 # install stable basic version from from conda
 conda install -c intel intel_extension_for_transformers
 ```
 
-### Install from Source
-```Bash
-git clone https://github.com/intel/intel-extension-for-transformers.git intel_extension_for_transformers
-cd intel_extension_for_transformers
-# Install Dependency
-pip install -r requirements.txt
-git submodule update --init --recursive
-# Install intel_extension_for_transformers
-python setup.py install
-```
-
-## Windows Installation
+## Install from Source
 
 ### Prerequisites
-
 The following prerequisites and requirements must be satisfied for a successful installation:
+- Python version: 3.7 or 3.8 or 3.9 or 3.10
+- GCC (on Linux) or Visual Studio (on Windows)
 
-- Python version: 3.7 or 3.8 or 3.9
-- Visual Studio
-
-### Install from Pypi
-
-```Bat
-pip install intel-extension-for-transformers
-```
-
-### Install from Source
-
-```Bat
-git clone https://github.com/intel/intel-extension-for-transformers.git intel_extension_for_transformers
+### Install Intel Extension for Transformers
+```Bash
+git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel_extension_for_transformers
-# Install Dependency
-pip install -r requirements.txt
-git submodule update --init --recursive
 # Install intel_extension_for_transformers
-python setup.py install
-  ```
+pip install -v .
+```
 
 ## System Requirements
 ### Validated Hardware Environment

diff --git a/intel_extension_for_transformers/__init__.py b/intel_extension_for_transformers/__init__.py
@@ -15,4 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .version import __version__
+try:
+    from ._version import __version__  # load _version file generated by setuptools_scm 
+except ModuleNotFoundError:
+    __version__ = "0.0.0"
diff --git a/intel_extension_for_transformers/backends/neural_engine/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/CMakeLists.txt
@@ -8,6 +8,7 @@ set(NE_THIRD_PARTY_DIR "${NE_ROOT}/third_party")
 set(NE_CMAKE_DIR "${NE_ROOT}/cmake")
 list(APPEND CMAKE_MODULE_PATH ${NE_CMAKE_DIR})
 
+set(NE_VERSION_STRING "Unknown" CACHE STRING "The version string used by neural engine C++ interface")
 option(NE_WITH_SPARSELIB "Enable sparselib with sparse gemm ops" ON)
 option(NE_WITH_SPARSELIB_ONLY "Only for sparselib" OFF)
 option(NE_WITH_SPARSELIB_BENCHMARK "Enable sparselib sparse benchmark" OFF)

diff --git a/intel_extension_for_transformers/backends/neural_engine/__init__.py b/intel_extension_for_transformers/backends/neural_engine/__init__.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""The module of Neural Engine."""
+
+import subprocess
+import sys
+import intel_extension_for_transformers
+import os.path as path
+
+
+def neural_engine_bin():
+    ''' Entry point for C++ interface '''
+    neural_engine_bin = path.join(
+        intel_extension_for_transformers.__path__[0], 'neural_engine_bin')
+    raise SystemExit(subprocess.call(
+        [neural_engine_bin] + sys.argv[1:], close_fds=False))
diff --git a/intel_extension_for_transformers/backends/neural_engine/bin/neural_engine b/intel_extension_for_transformers/backends/neural_engine/bin/neural_engine
diff --git a/intel_extension_for_transformers/backends/neural_engine/compile/graph/graph.py b/intel_extension_for_transformers/backends/neural_engine/compile/graph/graph.py
@@ -60,7 +60,8 @@ def execution_options(self):
             "Please reset the execution_option property if you want change some "
             "options when inference, like 'graph.execution_options = your_new_options'. "
             "Do not use 'graph.execution_options.some_option = value' directly!")
-        import neural_engine_py as dp
+        # pylint: disable=E0611
+        import intel_extension_for_transformers.neural_engine_py as dp
         options = dp.ExecutionOptions()
         options_list = [
             option for option in dir(options)
@@ -408,7 +409,8 @@ def dump_tensor(self, tensor_list=[]):
 
     def engine_init(self, net_info={}, weight_data=b""):
         """Pybind engine executor."""
-        import neural_engine_py as dp
+        # pylint: disable=E0611
+        import intel_extension_for_transformers.neural_engine_py as dp
         if not weight_data:
             weight_data = self.weight_data
         if not net_info:
@@ -584,7 +586,8 @@ def graph_init(self, config, weight_data=None, load_weight=False):
                                          copy.deepcopy(output_tensors), attr)
             self.insert_nodes(len(self.nodes), [op])
         if not load_weight and weight_data:
-            import neural_engine_py as dp
+            # pylint: disable=E0611
+            import intel_extension_for_transformers.neural_engine_py as dp
             model = dp.Model(config, weight_data)
             self._engine = [model, output_list]
         if bin_file:

diff --git a/intel_extension_for_transformers/backends/neural_engine/docs/engine_tuning.md b/intel_extension_for_transformers/backends/neural_engine/docs/engine_tuning.md
@@ -60,7 +60,7 @@ model.graph_init(conf.yaml, model.bin)
 # or get model from compile
 from intel_extension_for_transformers.backends.neural_engine.compile import compile
 model = compile(model.onnx)
-import neural_engine_py as dp
+import intel_extension_for_transformers.neural_engine_py as dp
 options = dp.ExecutionOptions()
 options.enable_op_tuning = True
 model.execution_options = options
@@ -90,7 +90,7 @@ InnerProduct 14124194128933833351 SparseLib 4,1024,384 0 2
 You can set the table file path and tuning warmup iterations if you want to simulate real deployment conditions.
 
 ```python
-import neural_engine_py as dp
+import intel_extension_for_transformers.neural_engine_py as dp
 options = dp.ExecutionOptions()
 options.enable_op_tuning = True
 # set tuning warmup iterations
@@ -116,7 +116,7 @@ for i in range(iterations):
 # get performance here
 
 # 2. tuning
-import neural_engine_py as dp
+import intel_extension_for_transformers.neural_engine_py as dp
 options = dp.ExecutionOptions()
 options.enable_op_tuning = True
 options.warmup_iter = num_iterations

diff --git a/intel_extension_for_transformers/backends/neural_engine/executor/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/executor/CMakeLists.txt
@@ -126,6 +126,7 @@ target_link_libraries(neural_engine_py
 add_executable(neural_engine_bin
     src/nlp_executor.cc
 )
+add_compile_definitions(neural_engine_bin PRIVATE NE_VERSION_STRING=${NE_VERSION_STRING})
 
 add_dependencies(neural_engine_bin
     neural_engine

diff --git a/intel_extension_for_transformers/backends/neural_engine/executor/python/test_model.py b/intel_extension_for_transformers/backends/neural_engine/executor/python/test_model.py
@@ -17,7 +17,7 @@
 
 import argparse
 import numpy as np
-from neural_engine_py import Model
+from intel_extension_for_transformers.neural_engine_py import Model
 
 parser = argparse.ArgumentParser(description='Deep engine Model Executor')
 parser.add_argument('--weight',  default='', type=str, help='weight of the model')

diff --git a/intel_extension_for_transformers/backends/neural_engine/executor/src/common.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/common.cpp
@@ -808,9 +808,14 @@ __m256i cvt_fp32_to_bf16(const __m512 src) {
 #endif
 }
 #elif __AVX2__
+const uint8_t avx2_bf16_convert_maigc_num[32] = {0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b, 0x0e, 0x0f, 0x80, 0x80, 0x80,
+                                                 0x80, 0x80, 0x80, 0x80, 0x80, 0x02, 0x03, 0x06, 0x07, 0x0a, 0x0b,
+                                                 0x0e, 0x0f, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
 __m128i cvt_fp32_to_bf16(const __m256 src) {
-  auto y = _mm256_bsrli_epi128(_mm256_castps_si256(src), 2);
-  return _mm256_cvtepi32_epi16(y);
+  auto shuffle_v = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(avx2_bf16_convert_maigc_num));
+  __m256i trunc_elements = _mm256_shuffle_epi8(_mm256_castps_si256(src), shuffle_v);
+  __m256i ordered = _mm256_permute4x64_epi64(trunc_elements, 0x58);
+  return _mm256_castsi256_si128(ordered);
 }
 #endif
 
@@ -974,6 +979,7 @@ void runtime_minmax(const float* data, size_t length, float* min_num, float* max
   *max_num = *std::max_element(block_maxs.begin(), block_maxs.end());
 }
 
+#ifdef __AVX512F__
 void block_minmax_avx512(const float* Input, size_t N, float* Min, float* Max) {
   float tmp_min = std::numeric_limits<float>::max();
   float tmp_max = std::numeric_limits<float>::lowest();
@@ -1058,6 +1064,7 @@ void block_minmax_avx512(const float* Input, size_t N, float* Min, float* Max) {
   *Min = tmp_min;
   *Max = tmp_max;
 }
+#else
 void block_minmax(const float* Input, size_t N, float* Min, float* Max) {
   float tmp_min = std::numeric_limits<float>::max();
   float tmp_max = std::numeric_limits<float>::lowest();
@@ -1142,6 +1149,7 @@ void block_minmax(const float* Input, size_t N, float* Min, float* Max) {
   *Min = tmp_min;
   *Max = tmp_max;
 }
+#endif
 
 /************ InnerProductPrimitiveFwdFactory member function ************/
 size_t InnerProductPrimitiveFwdFactory::GenKey(const string& src0_dtype, const string& src1_dtype,

diff --git a/intel_extension_for_transformers/backends/neural_engine/executor/src/nlp_executor.cc b/intel_extension_for_transformers/backends/neural_engine/executor/src/nlp_executor.cc
@@ -15,6 +15,10 @@
 #include "executor.hpp"
 #include "gflags/gflags.h"
 
+#ifndef NE_VERSION_STRING  // Release version passed from cmake
+#define NE_VERSION_STRING Unknown
+#endif
+
 DEFINE_int32(batch_size, 1, "image batch sizes");
 DEFINE_int32(seq_len, 384, "default seq len");
 DEFINE_int32(iterations, 10, "iterations");
@@ -91,7 +95,11 @@ void run_net() {
   std::cout << " Throughput is " << 1000. / latency << std::endl;
 }
 
+// Use double macro to convert macro to string. ref: https://stackoverflow.com/a/6852934/21847662
+#define STR_LITERAL(str) #str
+#define SET_VERSION(x) gflags::SetVersionString(STR_LITERAL(x))
 int main(int argc, char** argv) {
+  SET_VERSION(NE_VERSION_STRING);
   gflags::ParseCommandLineFlags(&argc, &argv, true);
   executor::GlobalInit(argv[0]);
   run_net();

diff --git a/intel_extension_for_transformers/backends/neural_engine/kernels/scripts/README.md b/intel_extension_for_transformers/backends/neural_engine/kernels/scripts/README.md
@@ -1,17 +1,16 @@
 # How to visualize weights distribution of sparse model
 ## Introduction 
-sparse model is one type of compressed model, which contains un-structure information more than pre-trained model in the same weights tensor. visualization these sparse models could help us design a better kernel level computation pattern in specific hardware platform. this script currently supports both neural engine model and pytorch model.
+Sparse model is one type of compressed model, which contains un-structure information more than pre-trained model in the same weights tensor. visualization these sparse models could help us design a better kernel level computation pattern in specific hardware platform. this script currently supports both neural engine model and pytorch model.
 ## Setups
 ### 1. Prepare the sparse model 
-you have to prepare a model.bin file and its conf.yaml file for neural engine model in the same IR directory, but just one model.bin for pytorch model.
+You have to prepare a model.bin file and its conf.yaml file for neural engine model in the same IR directory, but just one model.bin for pytorch model.
 ### 2. Run below commands
-|   |   |
-|---|---|
-A. Neural Engine model | python sparsity_all.py --mt 1 --path /path to IR directory 
-B. Pytorch model | python sparsity_all.py --mt 0 --path /path to pytorch model directory
+`pip install -r requirements.txt`
+1. Neural Engine model: `python sparsity_all.py --mt 1 --path /path to IR directory`
+2. Pytorch model: `python sparsity_all.py --mt 0 --path /path to pytorch model directory`
 
 ### 3. Analysis results
-you could see visualization one of results as below hotmap figure after running above command in the same directory, and it could generate JPG format pictures for each weight tensor, the name of pictures corresponding with tensor name.
-![Image text](sample_figure.jpg)
+You could see visualization one of results as below hotmap figure after running above command in the same directory, and it could generate JPG format pictures for each weight tensor, the name of pictures corresponding with tensor name.
+![Sample figure](sample_figure.jpg)
 
 
diff --git a/intel_extension_for_transformers/backends/neural_engine/kernels/scripts/requirements.txt b/intel_extension_for_transformers/backends/neural_engine/kernels/scripts/requirements.txt
@@ -0,0 +1,4 @@
+matplotlib
+numpy
+torch~=1.13
+pyyaml