diff --git a/.github/license_template.txt b/.github/license_template.txt new file mode 100644 index 00000000000..4987549106e --- /dev/null +++ b/.github/license_template.txt @@ -0,0 +1,13 @@ +Copyright (c) 2024 Intel Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. \ No newline at end of file diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 9cd9f284854..c28d1c790c0 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -18,4 +18,4 @@ how to reproduce the test (including hardware information) ## Dependency Change? -any library dependency introduced or removed \ No newline at end of file +any library dependency introduced or removed diff --git a/.github/workflows/README.md b/.github/workflows/README.md index f031af5a0d6..709255337c6 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -33,6 +33,3 @@ If you want to run tests locally, please refer to test yml and corresponding scr | Kernel Benchmark | [sparse_lib_CI.yml](./sparse_lib_CI.yml) | [kernel-benchmark](./script/SparseLibCI/) | | NeuralChat Workflows | [chatbot-test.yml](./chatbot-test.yml) | [chatbot](./script/chatbot/) | |Copyright Check | [copyright_check.yml](./copyright_check.yml) | N/A | - - - diff --git a/.github/workflows/chatbot-finetune-mpt-7b-chat-hpu.yml b/.github/workflows/chatbot-finetune-mpt-7b-chat-hpu.yml index 5cc9da539cc..8faff4d8a5f 100644 --- a/.github/workflows/chatbot-finetune-mpt-7b-chat-hpu.yml +++ b/.github/workflows/chatbot-finetune-mpt-7b-chat-hpu.yml @@ -14,7 +14,7 @@ jobs: steps: - name: Clean Up Working Directory run: sudo rm -rf ~/itrex-actions-runner/_work/intel-extension-for-transformers/intel-extension-for-transformers/* - + - uses: actions/checkout@v3 with: submodules: "recursive" @@ -59,8 +59,8 @@ jobs: --device hpu \ --use_habana \ --use_lazy_mode " - - docker exec "chatbotfinetune-hpu-s0" bash -c "$cmd" + + docker exec "chatbotfinetune-hpu-s0" bash -c "$cmd" - name: Stop Container if: success() || failure() diff --git a/.github/workflows/chatbot-finetune-mpt-7b-chat.yml b/.github/workflows/chatbot-finetune-mpt-7b-chat.yml index 69aeec8a291..b7af3a95401 100644 --- a/.github/workflows/chatbot-finetune-mpt-7b-chat.yml +++ b/.github/workflows/chatbot-finetune-mpt-7b-chat.yml @@ -16,13 +16,13 @@ jobs: uses: actions/checkout@v2 - name: Load environment variables - run: + run: cat ~/itrex-actions-runner/.env >> $GITHUB_ENV - name: Prepare Cache run: cp ${{ env.CACHE }}/torch/* ./ - name: Build Docker Image - run: + run: if [[ $(docker images | grep chatbotfinetune-mpi | wc -l) == 0 ]]; then docker build ./ --target cpu --build-arg REPO=${{ github.server_url }}/${{ github.repository }}.git --build-arg ITREX_VER=${{ github.head_ref }} --build-arg REPO_PATH="." --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f intel_extension_for_transformers/neural_chat/docker/Dockerfile -t chatbotfinetune-mpi:latest && yes | docker container prune && yes | docker image prune; fi @@ -55,7 +55,7 @@ jobs: - name: Print Logs and Check Finetuning Status if: success() || failure() run: | - sh .github/workflows/script/chatbot/finish_ft_mpt-7b-chat_mpi.sh + sh .github/workflows/script/chatbot/finish_ft_mpt-7b-chat_mpi.sh - name: Stop Container if: success() || failure() diff --git a/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml b/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml index cd8d6497acd..08c866c7904 100644 --- a/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml +++ b/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml @@ -19,7 +19,7 @@ jobs: run: cat ~/actions-runner/.env >> $GITHUB_ENV - name: Build Docker Image - run: + run: if [ $(docker images | grep chatbotinfer-1-gha | wc -l) == 0 ]; then docker build --no-cache ./ --target cpu --build-arg REPO=${{ github.server_url }}/${{ github.repository }}.git --build-arg ITREX_VER=${{ github.head_ref }} --build-arg REPO_PATH="." --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f intel_extension_for_transformers/neural_chat/docker/Dockerfile -t chatbotinfer-1-gha:latest && yes | docker container prune && yes | docker image prune; fi diff --git a/.github/workflows/chatbot-inference-mpt-7b-chat-hpu.yml b/.github/workflows/chatbot-inference-mpt-7b-chat-hpu.yml index 407084adc36..9f086589a72 100644 --- a/.github/workflows/chatbot-inference-mpt-7b-chat-hpu.yml +++ b/.github/workflows/chatbot-inference-mpt-7b-chat-hpu.yml @@ -20,9 +20,9 @@ jobs: submodules: "recursive" - name: Load environment variables - run: + run: cat ~/itrex-actions-runner/.env >> $GITHUB_ENV - + - name: Build Docker Image run: docker build --no-cache ./ --target hpu --build-arg ITREX_VER=${{ github.event.pull_request.head.ref }} --build-arg REPO=${{ github.server_url }}/${{ github.event.pull_request.head.repo.full_name }}.git --build-arg REPO_PATH="." --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f intel_extension_for_transformers/neural_chat/docker/Dockerfile -t chatbotinfer-hpu:latest && yes | docker container prune && yes | docker image prune @@ -44,4 +44,3 @@ jobs: - name: Test Summary run: echo "Inference completed successfully" - diff --git a/.github/workflows/chatbot-inference-mpt-7b-chat.yml b/.github/workflows/chatbot-inference-mpt-7b-chat.yml index 76a59d6c489..aaa4226d7d0 100644 --- a/.github/workflows/chatbot-inference-mpt-7b-chat.yml +++ b/.github/workflows/chatbot-inference-mpt-7b-chat.yml @@ -16,11 +16,11 @@ jobs: uses: actions/checkout@v2 - name: Load environment variables - run: + run: cat ~/actions-runner/.env >> $GITHUB_ENV - name: Build Docker Image - run: + run: if [ $(docker images | grep chatbotinfer-1-gha | wc -l) == 0 ]; then docker build --no-cache ./ --target cpu --build-arg REPO=${{ github.server_url }}/${{ github.repository }}.git --build-arg ITREX_VER=${{ github.head_ref }} --build-arg REPO_PATH="." --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f intel_extension_for_transformers/neural_chat/docker/Dockerfile -t chatbotinfer-1-gha:latest && yes | docker container prune && yes | docker image prune; fi diff --git a/.github/workflows/chatbot-test.yml b/.github/workflows/chatbot-test.yml index 5f482abd39a..4b49381773b 100644 --- a/.github/workflows/chatbot-test.yml +++ b/.github/workflows/chatbot-test.yml @@ -50,4 +50,3 @@ jobs: # # call-finetune-mpt-7b-chat-hpu: # uses: ./.github/workflows/chatbot-finetune-mpt-7b-chat-hpu.yml - diff --git a/.github/workflows/chatbot_finetuning.yml b/.github/workflows/chatbot_finetuning.yml index de3c3805f66..fe22ba3d385 100644 --- a/.github/workflows/chatbot_finetuning.yml +++ b/.github/workflows/chatbot_finetuning.yml @@ -11,4 +11,3 @@ concurrency: jobs: call-finetune-mpt-7b-chat: uses: ./.github/workflows/chatbot-finetune-mpt-7b-chat.yml - diff --git a/.github/workflows/copyright_check.yml b/.github/workflows/copyright_check.yml deleted file mode 100644 index 0701faa35a0..00000000000 --- a/.github/workflows/copyright_check.yml +++ /dev/null @@ -1,80 +0,0 @@ -name: Copyright Check - -on: - pull_request: - branches: [main] - paths: - - intel_extension_for_transformers/** - - setup.py - - .github/workflows/format_scan.yml - workflow_dispatch: - -# If there is a new commit, the previous jobs will be canceled -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -env: - CODE_SCAN_LOG_PATH: "${{ github.workspace }}/log" - CONTAINER_NAME: "codeScan" - -jobs: - format-scan: - runs-on: itrex-node-spell - strategy: - matrix: - job_name: ["copyright"] - fail-fast: false - steps: - - name: Docker Clean Up - run: | - docker ps -a - if [[ $(docker ps -a | grep -i '${{ env.CONTAINER_NAME }}-${{ runner.name }}'$) ]]; then - docker start ${{ env.CONTAINER_NAME }}-${{ runner.name }} - echo "remove left files through container ..." - docker exec ${{ env.CONTAINER_NAME }}-${{ runner.name }} bash -c "ls -a /intel-extension-for-transformers && rm -fr /intel-extension-for-transformers/* && rm -fr /intel-extension-for-transformers/.* || true" - fi - - - name: Checkout out Repo - uses: actions/checkout@v3 - - - name: CopyRight check - run: | - source ${{ github.workspace }}/.github/workflows/script/change_color.sh - set -e - mkdir -p ${{ env.CODE_SCAN_LOG_PATH }} - supported_extensions=(py, sh, yaml) - git fetch - git --no-pager diff --name-only remotes/origin/${{ github.base_ref }} ${{ github.workspace }}/intel_extension_for_transformers> ${{ env.CODE_SCAN_LOG_PATH }}/diff.log - files=$(cat ${{ env.CODE_SCAN_LOG_PATH }}/diff.log | awk '!a[$0]++') - $LIGHT_PURPLE && echo " ----------------- checking ... --------------------------" && $RESET - if [[ -f ${{ env.CODE_SCAN_LOG_PATH }}/copyright_issue_summary.log ]]; then - rm -f ${{ env.CODE_SCAN_LOG_PATH }}/copyright_issue_summary.log - fi - for file in ${files} - do - if [[ "${supported_extensions[@]}" =~ "${file##*.}" ]]; then - if [ $(grep -E -c "Copyright \\(c\\) ([0-9]{4})(-[0-9]{4})? Intel Corporation" ${file}) = 0 ]; then - echo ${file} >> ${{ env.CODE_SCAN_LOG_PATH }}/copyright_issue_summary.log - $BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------" - cat ${{ env.CODE_SCAN_LOG_PATH }}/copyright_issue_summary.log - $BOLD_YELLOW && echo " ----------------- Current log file output end --------------------------" && $RESET - $BOLD_RED && echo "CopyRight has something wrong! Please click on the artifact button to download and view the error log!" && $RESET - fi - else - $LIGHT_PURPLE && echo "Skipping ${file}" && $RESET - fi - done - if [[ -f ${{ env.CODE_SCAN_LOG_PATH }}/copyright_issue_summary.log ]]; then - $BOLD_YELLOW && echo " ----------------- Current log file output start --------------------------" - cat ${{ env.CODE_SCAN_LOG_PATH }}/copyright_issue_summary.log - $BOLD_YELLOW && echo " ----------------- Current log file output end --------------------------" && $RESET - $BOLD_RED && echo "CopyRight has something wrong! Please click on the artifact button to download and view the error log!" && $RESET && exit 1 - fi - - - name: Publish pipeline artifact - if: ${{ failure() }} - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.job_name }} - path: ${{ env.CODE_SCAN_LOG_PATH }}.* diff --git a/.github/workflows/cpp-graph-test.yml b/.github/workflows/cpp-graph-test.yml index d61496c0da5..3beb0807e4b 100644 --- a/.github/workflows/cpp-graph-test.yml +++ b/.github/workflows/cpp-graph-test.yml @@ -63,7 +63,7 @@ jobs: env: WORKSPACE: ${{ env.WORKING_DIR }} GRAPH_DIR: ${{ env.WORKING_DIR }}/intel_extension_for_transformers/llm/runtime/graph - + - name: Rename summary run: | cd ${{ github.workspace }} @@ -97,7 +97,7 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.OUT_SCRIPT_PATH }}/generated/log - + - name: Merge CPP Graph Summary Log run: | cd ${{ env.OUT_SCRIPT_PATH }}/generated/log/cpp_graph diff --git a/.github/workflows/llm-test.yml b/.github/workflows/llm-test.yml index c497ed263ad..d8ad320e5cb 100644 --- a/.github/workflows/llm-test.yml +++ b/.github/workflows/llm-test.yml @@ -50,7 +50,7 @@ jobs: - name: Binary build run: | cd ${{ github.workspace }} - conda activate llm-test || source activate llm-test + conda activate llm-test || source activate llm-test pip install build --upgrade pip install -r requirements.txt python setup.py sdist bdist_wheel @@ -66,13 +66,13 @@ jobs: run: | cd ${{ github.workspace }}/.github/workflows/script/models bash run_llm.sh --model=${{ matrix.modelName }} --framework=${{ matrix.framework }} --mode=${{ matrix.mode }} --conda_env_name=llm-test --precision=int8 - + - name: FP8 Benchmark run: | cd ${{ github.workspace }}/.github/workflows/script/models bash run_llm.sh --model=${{ matrix.modelName }} --framework=${{ matrix.framework }} --mode=${{ matrix.mode }} --conda_env_name=llm-test --precision=fp8 - - + + - name: Publish pipeline artifact uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} @@ -145,4 +145,3 @@ jobs: echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." exit 1 fi - \ No newline at end of file diff --git a/.github/workflows/optimize-test.yml b/.github/workflows/optimize-test.yml index 151620052c2..6ea06aea1ba 100644 --- a/.github/workflows/optimize-test.yml +++ b/.github/workflows/optimize-test.yml @@ -104,13 +104,13 @@ jobs: docker exec ${{ env.CONTAINER_NAME }} \ bash -c "cd /intel-extension-for-transformers/.github/workflows/script/models \ && bash run_optimize.sh --model=${{ matrix.modelName }} --framework=${{ matrix.framework }} --mode='tuning'" - + - name: INT8 Benchmark run: | docker exec ${{ env.CONTAINER_NAME }} \ bash -c "cd /intel-extension-for-transformers/.github/workflows/script/models \ && bash run_optimize.sh --model=${{ matrix.modelName }} --framework=${{ matrix.framework }} --mode=${{ matrix.mode }} --precision=int8 --PERF_STABLE_CHECK=${{ vars.PERF_STABLE_CHECK }}" - + - name: FP32 Benchmark run: | docker exec ${{ env.CONTAINER_NAME }} \ @@ -127,7 +127,7 @@ jobs: --output_dir=/intel-extension-for-transformers/${{matrix.framework}}_${{matrix.modelName}} \ --build_id=${{ github.run_id }} \ --model_test_type=optimize" - + - name: Publish pipeline artifact uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} @@ -136,7 +136,7 @@ jobs: path: ${{ github.workspace }}/${{ matrix.framework }}_${{ matrix.modelName }} if-no-files-found: ignore # 'warn' or 'ignore' are also available, defaults to `warn` retention-days: 60 # 1 <= retention-days <= 90 - + Genreate-Report: runs-on: itrex-node-spell needs: [Optimize-Workflow] @@ -156,7 +156,7 @@ jobs: uses: actions/download-artifact@v3 with: path: ${{ env.OUT_SCRIPT_PATH }}/log - + - name: Analysis Summary run: | cd ${{ env.OUT_SCRIPT_PATH }} @@ -178,7 +178,7 @@ jobs: search_artifacts: false skip_unpack: false if_no_artifact_found: warn - + - name: Display structure of downloaded files run: cd ${{ env.OUT_SCRIPT_PATH }}/log && ls -R @@ -194,7 +194,7 @@ jobs: MR_source_branch: ${{ github.head_ref }} ghprbActualCommit: ${{ github.event.pull_request.head.sha }} - + - name: Publish Report uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0fb5d489d54..876e2176237 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -18,11 +18,10 @@ jobs: git config --local --get remote.origin.url cd docs/build_docs bash build.sh latest - + - name: Push to github uses: peaceiris/actions-gh-pages@v3 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./build_tmp/gh-pages publish_branch: gh-pages - diff --git a/.github/workflows/script/chatbot/hpu_check/to_hpu.py b/.github/workflows/script/chatbot/hpu_check/to_hpu.py index ee7d2218c9b..1a1b7b4a66f 100644 --- a/.github/workflows/script/chatbot/hpu_check/to_hpu.py +++ b/.github/workflows/script/chatbot/hpu_check/to_hpu.py @@ -3,4 +3,3 @@ ly = torch.nn.Linear(2, 4) ly.to("hpu") print("hpu is available") - diff --git a/.github/workflows/script/py_task_injection.py b/.github/workflows/script/py_task_injection.py index afcdffbe0c1..8e03a140850 100644 --- a/.github/workflows/script/py_task_injection.py +++ b/.github/workflows/script/py_task_injection.py @@ -14,7 +14,7 @@ def __init__(self, file_name: str, code_content: str, pattern: str) -> None: self.file_path = file_name self.code_content = code_content self.pattern = pattern - + def insert(self) -> None: original_code = self.get_source_code(self.file_path) @@ -70,4 +70,3 @@ def exists(target: str) -> bool: code_inserter.insert() else: raise ValueError(f"invalid task: {args.task}") - diff --git a/.github/workflows/unit-test-engine.yml b/.github/workflows/unit-test-engine.yml index 2a26547706b..14c0d8ccb88 100644 --- a/.github/workflows/unit-test-engine.yml +++ b/.github/workflows/unit-test-engine.yml @@ -52,7 +52,7 @@ jobs: echo "remove left files through container ..." docker exec ${{ env.EXTRA_CONTAINER_NAME }} bash -c "ls -a /intel-extension-for-transformers && rm -fr /intel-extension-for-transformers/* && rm -fr /intel-extension-for-transformers/.* || true" fi - + - name: Checkout out Repo uses: actions/checkout@v3 with: @@ -111,7 +111,7 @@ jobs: name: "UnitTest${{ matrix.test_name }}" path: ${{ github.workspace }}/log_dir retention-days: 5 - + Genreate-Report: runs-on: itrex-node-spell needs: [unit-test] @@ -126,15 +126,15 @@ jobs: fi - name: Checkout out Repo uses: actions/checkout@v3 - + - name: Download UT PR Log uses: actions/download-artifact@v3 with: path: ${{ github.workspace }}/log_dir - + - name: Display structure of downloaded files run: cd ${{ github.workspace }}/log_dir && ls -R - + - name: Calculate coverage run: | cd ${{ github.workspace }}/.github/workflows/script/unitTest/coverage @@ -146,4 +146,4 @@ jobs: with: name: Engine Unit Test path: ${{ github.workspace }}/log_dir - retention-days: 5 \ No newline at end of file + retention-days: 5 diff --git a/.github/workflows/unit-test-llmruntime.yml b/.github/workflows/unit-test-llmruntime.yml index f4d7171e6de..ceca740b9b5 100644 --- a/.github/workflows/unit-test-llmruntime.yml +++ b/.github/workflows/unit-test-llmruntime.yml @@ -30,7 +30,7 @@ jobs: steps: - name: Load environment variables run: cat ~/actions-runner2/.env >> $GITHUB_ENV - + - name: Docker Clean Up run: | docker ps -a diff --git a/.github/workflows/unit-test-neuralchat.yml b/.github/workflows/unit-test-neuralchat.yml index 0c820009b01..c07a1d45f3b 100644 --- a/.github/workflows/unit-test-neuralchat.yml +++ b/.github/workflows/unit-test-neuralchat.yml @@ -108,15 +108,15 @@ jobs: docker exec ${{ env.CONTAINER_NAME }} \ bash -c "cd /intel-extension-for-transformers && \ mv /log_dir . " - + - name: Publish pipeline artifact uses: actions/upload-artifact@v3 if: ${{ !cancelled() }} with: name: "UnitTest${{ matrix.test_name }}" path: ${{ github.workspace }}/log_dir - - + + Generate-Report: runs-on: itrex-node-spell needs: [unit-test] @@ -131,15 +131,15 @@ jobs: fi - name: Checkout out Repo uses: actions/checkout@v3 - + - name: Download UT PR Log uses: actions/download-artifact@v3 with: path: ${{ github.workspace }}/log_dir - + - name: Display structure of downloaded files run: cd ${{ github.workspace }}/log_dir && ls -R - + - name: Calculate coverage run: | cd ${{ github.workspace }}/.github/workflows/script/unitTest/coverage diff --git a/.github/workflows/unit-test-optimize.yml b/.github/workflows/unit-test-optimize.yml index 30e74c0b58a..524a7045d70 100644 --- a/.github/workflows/unit-test-optimize.yml +++ b/.github/workflows/unit-test-optimize.yml @@ -98,7 +98,7 @@ jobs: bash -c "cd /intel-extension-for-transformers/.github/workflows/script/unitTest \ && export HF_HOME=/dataset/cache/ \ && bash run_unit_test_optimize.sh --test_name=${{ matrix.test_name }}" - + - name: Collect log if: ${{ !cancelled() }} run: | @@ -127,15 +127,15 @@ jobs: fi - name: Checkout out Repo uses: actions/checkout@v3 - + - name: Download UT PR Log uses: actions/download-artifact@v3 with: path: ${{ github.workspace }}/log_dir - + - name: Display structure of downloaded files run: cd ${{ github.workspace }}/log_dir && ls -R - + - name: Calculate coverage run: | cd ${{ github.workspace }}/.github/workflows/script/unitTest/coverage @@ -148,8 +148,3 @@ jobs: name: Optimize Unit Test path: ${{ github.workspace }}/log_dir retention-days: 5 - - - - - diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 07076388f1f..d1c684ca438 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -23,11 +23,11 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true - + env: SCRIPT_PATH: ${{ github.workspace }}\.github\workflows\script WORKING_DIR: ${{ github.workspace }} - + jobs: Windows-Binary-Test: runs-on: 'Windows' @@ -38,7 +38,7 @@ jobs: submodules: "recursive" fetch-tags: true path: "a" - + - name: Binary build shell: cmd run: | @@ -46,5 +46,3 @@ jobs: SET HTTPS_PROXY=${{ vars.HTTP_PROXY_WINDOWS }} cd ${{ github.workspace }}\a\.github\workflows\script prepare_env_with_conda.bat - - \ No newline at end of file diff --git a/.gitpod.yml b/.gitpod.yml index 34fa666ecb7..399faa10854 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -6,4 +6,4 @@ tasks: - name: Main terminal before: echo 'start intel extension for transformers' init: echo 'install requirements.txt' - command: pip install -r requirements.txt + command: pip install -r requirements.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c411e070306..5024d33bb7f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,33 @@ repos: .github/workflows/script/formatScan/nlp_dict.txt )$ args: [--unique] + - id: end-of-file-fixer + files: (.*\.(py|md|rst|yaml|yml))$ + exclude: | + (?x)^( + examples/.+ + )$ + - id: check-json + - id: check-yaml + exclude: | + (?x)^( + conda_meta/meta.yaml| + docker/chart/templates/pytorchjob.yaml| + docker/chart/templates/pvc.yaml| + workflows/compression_aware_training/chart/templates/workflowTemplate.yaml| + .gitpod.yml + )$ + - id: requirements-txt-fixer + exclude: | + (?x)^( + examples/.+ + )$ + - id: trailing-whitespace + files: (.*\.(py|rst|cmake|yaml|yml))$ + exclude: | + (?x)^( + examples/.+ + )$ - repo: https://github.com/codespell-project/codespell rev: v2.2.6 @@ -31,3 +58,83 @@ repos: .github/workflows/sample_data/alpaca_data_sample_45.json| intel_extension_for_transformers/neural_chat/assets/docs/4th\ Generation\ Intel®\ Xeon®\ Scalable\ Processors\ Product\ Specifications.html )$ + + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.4 + hooks: + - id: insert-license + files: | + (?x)^( + intel_extension_for_transformers/.*(py|yaml|yml|sh)| + workflows/.*(py|yaml|yml|sh)| + tests/.*(py|yaml|yml|sh) + )$ + args: + [ + --license-filepath=.github/license_template.txt, + --use-current-year, + --detect-license-in-X-top-lines=40, + --skip-license-insertion-comment=Copyright, + ] + +# - repo: https://github.com/asottile/yesqa +# rev: v1.5.0 +# hooks: +# - id: yesqa +# name: Unused noqa +# +# - repo: https://github.com/pycqa/isort +# rev: 5.13.2 +# hooks: +# - id: isort +# exclude: | +# (?x)^( +# examples/.+ +# )$ +# +# - repo: https://github.com/PyCQA/docformatter +# rev: v1.7.5 +# hooks: +# - id: docformatter +# args: [ +# --in-place, +# --wrap-summaries=0, # 0 means disable wrap +# --wrap-descriptions=0, # 0 means disable wrap +# --black, +# --style=google, +# ] +# exclude: | +# (?x)^( +# examples/.+ +# )$ +# +# - repo: https://github.com/psf/black.git +# rev: 23.12.1 +# hooks: +# - id: black +# files: (.*\.py)$ +# exclude: | +# (?x)^( +# examples/.+ +# )$ +# +# - repo: https://github.com/asottile/blacken-docs +# rev: 1.16.0 +# hooks: +# - id: blacken-docs +# args: [--line-length=120, --skip-errors] +# exclude: | +# (?x)^( +# examples/.+| +# docs/source-app +# )$ +# +# - repo: https://github.com/astral-sh/ruff-pre-commit +# rev: v0.1.9 +# hooks: +# - id: ruff +# args: [--fix, --exit-non-zero-on-fix, --no-cache] +# exclude: | +# (?x)^( +# examples/.+ +# )$ diff --git a/docker/chart/templates/pvc.yaml b/docker/chart/templates/pvc.yaml index fbaf9c5c2d8..812fb086507 100644 --- a/docker/chart/templates/pvc.yaml +++ b/docker/chart/templates/pvc.yaml @@ -7,6 +7,6 @@ spec: storageClassName: {{ .Values.pvc.scn }} # nfs-client accessModes: - "ReadWriteOnce" - resources: + resources: requests: storage: {{ .Values.pvc.resources }} diff --git a/docker/chart/values.yaml b/docker/chart/values.yaml index 5929120a96c..8ea221a64bb 100644 --- a/docker/chart/values.yaml +++ b/docker/chart/values.yaml @@ -22,4 +22,3 @@ pvc: name: itrex scn: nil resources: 2Gi - diff --git a/docs/api_doc/engine/api_py_engine.rst b/docs/api_doc/engine/api_py_engine.rst index 834b76f31a2..c69591d2550 100644 --- a/docs/api_doc/engine/api_py_engine.rst +++ b/docs/api_doc/engine/api_py_engine.rst @@ -8,4 +8,3 @@ The following API information is available: compile.rst graph.rst - diff --git a/docs/api_doc/engine/graph.rst b/docs/api_doc/engine/graph.rst index 1a07b1db73c..4f62a310089 100644 --- a/docs/api_doc/engine/graph.rst +++ b/docs/api_doc/engine/graph.rst @@ -4,4 +4,3 @@ Graph .. autoapisummary:: intel_extension_for_transformers.llm.runtime.deprecated.compile.graph.graph - diff --git a/docs/api_doc/kernel/types.rst b/docs/api_doc/kernel/types.rst index 59b7e095f59..4b6a56cd3a6 100644 --- a/docs/api_doc/kernel/types.rst +++ b/docs/api_doc/kernel/types.rst @@ -18,4 +18,3 @@ Operator Specific Types :project: Intel® Extension for Transformers .. doxygenfile:: kernels/transpose_mha_types.hpp :project: Intel® Extension for Transformers - diff --git a/docs/api_doc/kernel_api.rst b/docs/api_doc/kernel_api.rst index ae3bea70e05..f32e542a597 100644 --- a/docs/api_doc/kernel_api.rst +++ b/docs/api_doc/kernel_api.rst @@ -10,4 +10,3 @@ The following C++ API information is available: kernel/engine.rst kernel/operator_desc.rst kernel/types.rst - diff --git a/docs/api_doc/optimization/config.rst b/docs/api_doc/optimization/config.rst index 79a715cd2d1..d15479fcccf 100644 --- a/docs/api_doc/optimization/config.rst +++ b/docs/api_doc/optimization/config.rst @@ -8,4 +8,4 @@ Config intel_extension_for_transformers.transformers.quantization intel_extension_for_transformers.transformers.distillation intel_extension_for_transformers.transformers.pruning - intel_extension_for_transformers.transformers.mixture.auto_distillation \ No newline at end of file + intel_extension_for_transformers.transformers.mixture.auto_distillation diff --git a/docs/api_doc/optimization/model.rst b/docs/api_doc/optimization/model.rst index a686bffaed2..366e789cc19 100644 --- a/docs/api_doc/optimization/model.rst +++ b/docs/api_doc/optimization/model.rst @@ -3,4 +3,4 @@ Model .. autoapisummary:: - intel_extension_for_transformers.transformers.modeling \ No newline at end of file + intel_extension_for_transformers.transformers.modeling diff --git a/docs/api_doc/optimization/tf_optimization.rst b/docs/api_doc/optimization/tf_optimization.rst index 54ed8339661..3aa7cb7864a 100644 --- a/docs/api_doc/optimization/tf_optimization.rst +++ b/docs/api_doc/optimization/tf_optimization.rst @@ -3,4 +3,4 @@ TensorFlow Optimizer .. autoapisummary:: - intel_extension_for_transformers.transformers.optimizer_tf \ No newline at end of file + intel_extension_for_transformers.transformers.optimizer_tf diff --git a/docs/api_doc/optimization/trainer.rst b/docs/api_doc/optimization/trainer.rst index 443fa9fa7dd..aefc4cbb3db 100644 --- a/docs/api_doc/optimization/trainer.rst +++ b/docs/api_doc/optimization/trainer.rst @@ -3,4 +3,4 @@ Trainer .. autoapisummary:: - intel_extension_for_transformers.transformers.trainer \ No newline at end of file + intel_extension_for_transformers.transformers.trainer diff --git a/docs/architecture.md b/docs/architecture.md index 0e5c72a7c02..4ebbabba6ed 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,4 +1,4 @@ # Architecture of Intel® Extension for Transformers arch -
\ No newline at end of file +
diff --git a/docs/autodistillation.md b/docs/autodistillation.md index b1a771a48fe..985c634ed98 100644 --- a/docs/autodistillation.md +++ b/docs/autodistillation.md @@ -519,4 +519,4 @@ auto_distillation: ... ``` -Please refer to [example](../examples/huggingface/pytorch/text-classification/distillation/run_glue.py) for the details \ No newline at end of file +Please refer to [example](../examples/huggingface/pytorch/text-classification/distillation/run_glue.py) for the details diff --git a/docs/build_docs/source/example.rst b/docs/build_docs/source/example.rst index 809a31b9396..727ee7e6e57 100644 --- a/docs/build_docs/source/example.rst +++ b/docs/build_docs/source/example.rst @@ -6,5 +6,3 @@ Example docs/examples.md docs/intel_extension_for_transformers/llm/runtime/deprecated/docs/validated_model.md - - diff --git a/docs/build_docs/source/feature.rst b/docs/build_docs/source/feature.rst index 7a5e4f60225..85a2e455738 100644 --- a/docs/build_docs/source/feature.rst +++ b/docs/build_docs/source/feature.rst @@ -15,4 +15,3 @@ Features docs/metrics.md docs/objectives.md docs/pipeline.md - diff --git a/docs/build_docs/source/index.rst b/docs/build_docs/source/index.rst index 8d93104b721..fe9a5b90ece 100644 --- a/docs/build_docs/source/index.rst +++ b/docs/build_docs/source/index.rst @@ -16,5 +16,3 @@ Sections docs/release.md docs/legal.md Repo - - diff --git a/docs/build_docs/source/kernel.rst b/docs/build_docs/source/kernel.rst index dd0e29f668a..4c140108af2 100644 --- a/docs/build_docs/source/kernel.rst +++ b/docs/build_docs/source/kernel.rst @@ -8,4 +8,3 @@ Transformers-accelerated Libraries (formerly known as SparseLib) is a high-perfo docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/README.md kernel_perf.rst kernel_desc.rst - diff --git a/docs/build_docs/source/kernel_desc.rst b/docs/build_docs/source/kernel_desc.rst index e5ed728bb52..f35ebd37047 100644 --- a/docs/build_docs/source/kernel_desc.rst +++ b/docs/build_docs/source/kernel_desc.rst @@ -4,14 +4,13 @@ Implementation Details .. toctree:: :maxdepth: 1 - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/3D_inference.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/binaryop_injector.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/eltwise_injector.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/3D_inference.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/binaryop_injector.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/eltwise_injector.md docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_vnni.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_amx.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_avx512f.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_layernormalized_spmm.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_transpose_matmul.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_transpose_mha.md - docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_dynamic_quant_matmul.md - + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_amx.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_avx512f.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_layernormalized_spmm.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_transpose_matmul.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_transpose_mha.md + docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_dynamic_quant_matmul.md diff --git a/docs/build_docs/source/kernel_perf.rst b/docs/build_docs/source/kernel_perf.rst index 34e575163a3..11d1dadcda2 100644 --- a/docs/build_docs/source/kernel_perf.rst +++ b/docs/build_docs/source/kernel_perf.rst @@ -8,4 +8,3 @@ Here we introduce performance relates issues for users who might want detailed docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/profiling.md docs/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/validated_data.md - diff --git a/docs/build_docs/source/user_guide.rst b/docs/build_docs/source/user_guide.rst index 429574312f6..4e3335dbf02 100644 --- a/docs/build_docs/source/user_guide.rst +++ b/docs/build_docs/source/user_guide.rst @@ -7,7 +7,3 @@ User Guide feature.rst neural_engine.rst kernel.rst - - - - diff --git a/docs/build_docs/sphinx-requirements.txt b/docs/build_docs/sphinx-requirements.txt index 320b8149028..fd3250494a7 100644 --- a/docs/build_docs/sphinx-requirements.txt +++ b/docs/build_docs/sphinx-requirements.txt @@ -1,8 +1,8 @@ -sphinx -sphinx_rtd_theme +breathe recommonmark +setuptools_scm[toml]>=6.2 +sphinx +sphinx-autoapi sphinx-markdown-tables sphinx-md -sphinx-autoapi -breathe -setuptools_scm[toml]>=6.2 +sphinx_rtd_theme diff --git a/docs/code_of_conduct.md b/docs/code_of_conduct.md index 506329e7363..15635ed3df8 100644 --- a/docs/code_of_conduct.md +++ b/docs/code_of_conduct.md @@ -83,4 +83,4 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage], [vers For answers to common questions about this code of conduct, see the [FAQ][FAQ-page] page. -[FAQ-page]: https://www.contributor-covenant.org/faq \ No newline at end of file +[FAQ-page]: https://www.contributor-covenant.org/faq diff --git a/docs/component_owner.md b/docs/component_owner.md index def3055c9eb..82ed4c109ed 100644 --- a/docs/component_owner.md +++ b/docs/component_owner.md @@ -11,4 +11,4 @@ Contributions to Intel Extension for Transformers specific component must get ap | Tensorflow | lvliang-intel | PenghuiCheng | | Neuralchat | lvliang-intel | lkk12014402 | | Workflow | lvliang-intel | kevinintel | -| Tests | VincyZhang | XuehaoSun | \ No newline at end of file +| Tests | VincyZhang | XuehaoSun | diff --git a/docs/contributors.md b/docs/contributors.md index 1f7cc5fc5cd..4b92fd91bd0 100644 --- a/docs/contributors.md +++ b/docs/contributors.md @@ -131,4 +131,3 @@ All Contributors:
- diff --git a/docs/devcatalog.md b/docs/devcatalog.md index bdcc86c00e0..40c90abcb65 100644 --- a/docs/devcatalog.md +++ b/docs/devcatalog.md @@ -186,4 +186,4 @@ For more information about or to read about other relevant workflow examples, se Submit your questions, feature requests, and bug reports to the [GitHub issues](https://github.com/intel/intel-extension-for-transformers/issues) page. You may also reach out to [Maintainers](inc.maintainers@intel.com). -*Other names and brands may be claimed as the property of others. [Trademarks](http://www.intel.com/content/www/us/en/legal/trademarks.html) \ No newline at end of file +*Other names and brands may be claimed as the property of others. [Trademarks](http://www.intel.com/content/www/us/en/legal/trademarks.html) diff --git a/docs/metrics.md b/docs/metrics.md index 0052a395017..c1e81bde32f 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -26,4 +26,4 @@ All metrics be provide by [Huggingface datasets](https://github.com/huggingface/ ```python from intel_extension_for_transformers.transformers import metric metric.Metric(name="eval_f1", greater_is_better=True, is_relative=True, criterion=0.01, weight_ratio=None) - ``` \ No newline at end of file + ``` diff --git a/docs/papers/efficient_LLM_inference_on_cpus.md b/docs/papers/efficient_LLM_inference_on_cpus.md index 42c2207f71b..bb0506fa4a4 100644 --- a/docs/papers/efficient_LLM_inference_on_cpus.md +++ b/docs/papers/efficient_LLM_inference_on_cpus.md @@ -126,4 +126,3 @@ python run_generation.py \ --batch_size 56 \ --tasks "lambada_openai", "piqa", "hellaswag", "winogrande" ``` - diff --git a/docs/pipeline.md b/docs/pipeline.md index 0deb987137b..54cc209038d 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -60,4 +60,3 @@ For executor, we only accept ONNX model now for pipeline. Users can get ONNX mod ) # output: [{'label': 'POSITIVE', 'score': 0.9998886585235596}] ``` - diff --git a/docs/smoothquant.md b/docs/smoothquant.md index f58f3eadc51..d7c03234a18 100644 --- a/docs/smoothquant.md +++ b/docs/smoothquant.md @@ -118,4 +118,3 @@ A list of models that achieved a <1% accuracy drop is shown below. |:---------:|--------------|------------| | PyTorch | [0-1] / 'auto' | False | | IPEX | [0-1] / 'auto' | True / False(Version>2.1) | - diff --git a/docs/tutorials/pytorch/question-answering/benchmark.py b/docs/tutorials/pytorch/question-answering/benchmark.py index 98832f1172c..b98b4f5ef39 100644 --- a/docs/tutorials/pytorch/question-answering/benchmark.py +++ b/docs/tutorials/pytorch/question-answering/benchmark.py @@ -586,4 +586,4 @@ def compute_metrics(p: EvalPrediction): print('Batch size = {}'.format(training_args.per_device_eval_batch_size)) print("Finally Eval eval_f1 Accuracy: {}".format(eval_f1_static)) print("Latency: {:.3f} ms".format(evalTime / samples * 1000)) -print("Throughput: {} samples/sec".format(samples/evalTime)) \ No newline at end of file +print("Throughput: {} samples/sec".format(samples/evalTime)) diff --git a/docs/tutorials/pytorch/summarization/benchmark.py b/docs/tutorials/pytorch/summarization/benchmark.py index b8f16f1fe50..90331d206dd 100644 --- a/docs/tutorials/pytorch/summarization/benchmark.py +++ b/docs/tutorials/pytorch/summarization/benchmark.py @@ -1,7 +1,7 @@ import logging import os import numpy as np -import nltk +import nltk from datasets import load_dataset, load_metric from intel_extension_for_transformers.transformers import metrics, OptimizedModel from intel_extension_for_transformers.transformers.trainer import NLPSeq2SeqTrainer diff --git a/docs/tutorials/pytorch/text-classification/utils/temperature_scaling.py b/docs/tutorials/pytorch/text-classification/utils/temperature_scaling.py index ba06ede88b2..1e967fc02f2 100644 --- a/docs/tutorials/pytorch/text-classification/utils/temperature_scaling.py +++ b/docs/tutorials/pytorch/text-classification/utils/temperature_scaling.py @@ -1,7 +1,7 @@ # # # This code is based on https://github.com/gpleiss/temperature_scaling/blob/master/temperature_scaling.py, # Under the MIT license: -# +# # MIT License # # Copyright (c) 2017 Geoff Pleiss diff --git a/examples/.config/engine_deploy.json b/examples/.config/engine_deploy.json index ebdf67028de..35c5083e029 100755 --- a/examples/.config/engine_deploy.json +++ b/examples/.config/engine_deploy.json @@ -459,7 +459,7 @@ "sequence_len": 128, "mode": "accuracy/throughput", "warm_up": 100, - "iteration": 1000, + "iteration": 1000 } }, "launcher": { diff --git a/examples/.config/onnx_optimize.json b/examples/.config/onnx_optimize.json index 7e1221bf238..916f5854dfd 100644 --- a/examples/.config/onnx_optimize.json +++ b/examples/.config/onnx_optimize.json @@ -48,5 +48,5 @@ "int8": "false" } } - }, + } } diff --git a/intel_extension_for_transformers/langchain/__init__.py b/intel_extension_for_transformers/langchain/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/langchain/__init__.py +++ b/intel_extension_for_transformers/langchain/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/langchain/embeddings/__init__.py b/intel_extension_for_transformers/langchain/embeddings/__init__.py index d1c0f087a0c..95fc2cacabb 100644 --- a/intel_extension_for_transformers/langchain/embeddings/__init__.py +++ b/intel_extension_for_transformers/langchain/embeddings/__init__.py @@ -19,4 +19,4 @@ HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings -) \ No newline at end of file +) diff --git a/intel_extension_for_transformers/langchain/embeddings/embeddings.py b/intel_extension_for_transformers/langchain/embeddings/embeddings.py index 1c21331c454..a8d80011c60 100644 --- a/intel_extension_for_transformers/langchain/embeddings/embeddings.py +++ b/intel_extension_for_transformers/langchain/embeddings/embeddings.py @@ -62,7 +62,7 @@ class HuggingFaceEmbeddings(langchain_core.pydantic_v1.BaseModel, langchain_core model_name: str = DEFAULT_MODEL_NAME """Model name to use.""" cache_folder: Optional[str] = None - """Path to store models. + """Path to store models. Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" model_kwargs: Dict[str, Any] = langchain_core.pydantic_v1.Field(default_factory=dict) """Keyword arguments to pass to the model.""" @@ -113,7 +113,7 @@ def embed_documents(self, texts: List[str]) -> List[List[float]]: return embeddings.tolist() - def embed_query(self, text: str) -> List[float]: + def embed_query(self, text: str) -> List[float]: """Compute query embeddings using a HuggingFace transformer model. Args: @@ -234,7 +234,7 @@ class HuggingFaceInstructEmbeddings(langchain_core.pydantic_v1.BaseModel, langch model_name: str = DEFAULT_INSTRUCT_MODEL """Model name to use.""" cache_folder: Optional[str] = None - """Path to store models. + """Path to store models. Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" model_kwargs: Dict[str, Any] = langchain_core.pydantic_v1.Field(default_factory=dict) """Keyword arguments to pass to the model.""" @@ -248,7 +248,7 @@ class HuggingFaceInstructEmbeddings(langchain_core.pydantic_v1.BaseModel, langch def __init__(self, **kwargs: Any): """Initialize the sentence_transformer.""" super().__init__(**kwargs) - + # check sentence_transformers python package try: import sentence_transformers @@ -258,7 +258,7 @@ def __init__(self, **kwargs: Any): "Could not import sentence_transformers python package. " "Please install it with `pip install sentence_transformers`." ) from exc - + # check InstructorEmbedding python package try: import InstructorEmbedding @@ -268,7 +268,7 @@ def __init__(self, **kwargs: Any): "Could not import InstructorEmbedding python package. " "Please install it with `pip install InstructorEmbedding`." ) from exc - + self.client = OptimizedInstructor( self.model_name, cache_folder=self.cache_folder, **self.model_kwargs ) diff --git a/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py b/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py index 20078ded4f2..bfaf15b4267 100644 --- a/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py +++ b/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py @@ -35,7 +35,7 @@ class OptimizedInstructorTransformer(InstructorEmbedding.INSTRUCTOR_Transformer) def __init__(self, *args, **kwargs): """Initialize the OptimizedInstructorTransformer.""" super().__init__(*args, **kwargs) - + def _load_model(self, model_name_or_path, config, cache_dir, **model_args): """Loads the transformer model""" if isinstance(config, T5Config): @@ -43,9 +43,9 @@ def _load_model(self, model_name_or_path, config, cache_dir, **model_args): elif isinstance(config, MT5Config): self._load_mt5_model(model_name_or_path, config, cache_dir, **model_args) else: - self.auto_model = OptimizedModel.from_pretrained(model_name_or_path, - config=config, - cache_dir=cache_dir, + self.auto_model = OptimizedModel.from_pretrained(model_name_or_path, + config=config, + cache_dir=cache_dir, **model_args) class OptimizedInstructor(InstructorEmbedding.INSTRUCTOR): @@ -53,24 +53,24 @@ def __init__(self, *args, **kwargs): """Initialize the OptimizedInstructor.""" super().__init__(*args, **kwargs) - def _load_auto_model(self, - model_name_or_path, - token: Optional[Union[bool, str]], + def _load_auto_model(self, + model_name_or_path, + token: Optional[Union[bool, str]], cache_folder: Optional[str], trust_remote_code: bool = False): # pragma: no cover """Creates a simple Transformer + Mean Pooling model and returns the modules.""" logger.warning("No sentence-transformers model found with name {}." \ "Creating a new one with MEAN pooling.".format(model_name_or_path)) transformer_model = OptimzedTransformer( - model_name_or_path, cache_dir=cache_folder, model_args={"token": token, + model_name_or_path, cache_dir=cache_folder, model_args={"token": token, "trust_remote_code": trust_remote_code}) pooling_model = sentence_transformers.models.Pooling( transformer_model.get_word_embedding_dimension(), 'mean') return [transformer_model, pooling_model] - - def _load_sbert_model(self, - model_name_or_path: str, - token: Optional[Union[bool, str]], + + def _load_sbert_model(self, + model_name_or_path: str, + token: Optional[Union[bool, str]], cache_folder: Optional[str], trust_remote_code: bool = False): """Loads a full sentence-transformers model.""" @@ -112,9 +112,9 @@ def _load_sbert_model(self, if module_config['idx']==0: logger.info('load Optimized InstructorTransformer') kwargs = {} - for config_name in ['sentence_bert_config.json', 'sentence_roberta_config.json', - 'sentence_distilbert_config.json', 'sentence_camembert_config.json', - 'sentence_albert_config.json', 'sentence_xlm-roberta_config.json', + for config_name in ['sentence_bert_config.json', 'sentence_roberta_config.json', + 'sentence_distilbert_config.json', 'sentence_camembert_config.json', + 'sentence_albert_config.json', 'sentence_xlm-roberta_config.json', 'sentence_xlnet_config.json']: config_path = sentence_transformers.util.load_file_path( model_name_or_path, config_name, token=token, cache_folder=cache_folder) @@ -139,5 +139,5 @@ def _load_sbert_model(self, model_name_or_path, module_config['path'], token, cache_folder) module = module_class.load(module_path) modules[module_config['name']] = module - + return modules diff --git a/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py b/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py index d56edb34b48..d78757576cd 100644 --- a/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py +++ b/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py @@ -44,9 +44,9 @@ def _load_model(self, model_name_or_path, config, cache_dir, **model_args): elif isinstance(config, MT5Config): # pragma: no cover self._load_mt5_model(model_name_or_path, config, cache_dir, **model_args) else: - self.auto_model = OptimizedModel.from_pretrained(model_name_or_path, - config=config, - cache_dir=cache_dir, + self.auto_model = OptimizedModel.from_pretrained(model_name_or_path, + config=config, + cache_dir=cache_dir, **model_args) class OptimizedSentenceTransformer(sentence_transformers.SentenceTransformer): @@ -55,9 +55,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _load_auto_model( - self, - model_name_or_path: str, - token: Optional[Union[bool, str]], + self, + model_name_or_path: str, + token: Optional[Union[bool, str]], cache_folder: Optional[str], trust_remote_code: bool = False): """ @@ -66,17 +66,17 @@ def _load_auto_model( logger.warning("No sentence-transformers model found with name {}." \ "Creating a new one with MEAN pooling.".format(model_name_or_path)) transformer_model = OptimzedTransformer( - model_name_or_path, cache_dir=cache_folder, model_args={"token": token, + model_name_or_path, cache_dir=cache_folder, model_args={"token": token, "trust_remote_code": trust_remote_code}) pooling_model = sentence_transformers.models.Pooling( transformer_model.get_word_embedding_dimension(), 'mean') return [transformer_model, pooling_model] - + def _load_sbert_model( - self, - model_name_or_path: str, - token: Optional[Union[bool, str]], - cache_folder: Optional[str], + self, + model_name_or_path: str, + token: Optional[Union[bool, str]], + cache_folder: Optional[str], trust_remote_code: bool = False): """ Loads a full sentence-transformers model @@ -121,9 +121,9 @@ def _load_sbert_model( # But, do load the config file first. if module_class == sentence_transformers.models.Transformer and module_config['path'] == "": kwargs = {} - for config_name in ['sentence_bert_config.json', 'sentence_roberta_config.json', - 'sentence_distilbert_config.json', 'sentence_camembert_config.json', - 'sentence_albert_config.json', 'sentence_xlm-roberta_config.json', + for config_name in ['sentence_bert_config.json', 'sentence_roberta_config.json', + 'sentence_distilbert_config.json', 'sentence_camembert_config.json', + 'sentence_albert_config.json', 'sentence_xlm-roberta_config.json', 'sentence_xlnet_config.json']: config_path = sentence_transformers.util.load_file_path( model_name_or_path, config_name, token=token, cache_folder=cache_folder) diff --git a/intel_extension_for_transformers/langchain/embeddings/requirements.txt b/intel_extension_for_transformers/langchain/embeddings/requirements.txt index 50df36aa266..70da9b406af 100644 --- a/intel_extension_for_transformers/langchain/embeddings/requirements.txt +++ b/intel_extension_for_transformers/langchain/embeddings/requirements.txt @@ -1,3 +1,3 @@ -langchain_core -InstructorEmbedding git+https://github.com/UKPLab/sentence-transformers.git@5c838a705c24c2dfd151a71674c99d09d014c1a9 +InstructorEmbedding +langchain_core diff --git a/intel_extension_for_transformers/langchain/embeddings/utils.py b/intel_extension_for_transformers/langchain/embeddings/utils.py index 497bfb88cca..b646b8d66ae 100644 --- a/intel_extension_for_transformers/langchain/embeddings/utils.py +++ b/intel_extension_for_transformers/langchain/embeddings/utils.py @@ -20,9 +20,9 @@ from intel_extension_for_transformers.transformers.utils.utility import LazyImport sentence_transformers = LazyImport("sentence_transformers") -def get_module_path(model_name_or_path: str, +def get_module_path(model_name_or_path: str, path: str, - token: Optional[Union[bool, str]], + token: Optional[Union[bool, str]], cache_folder: Optional[str]): is_local = os.path.isdir(model_name_or_path) if is_local: diff --git a/intel_extension_for_transformers/langchain/retrievers/vectorstore_retriever.py b/intel_extension_for_transformers/langchain/retrievers/vectorstore_retriever.py index 0bbc31fba46..c5cbbf3ff19 100644 --- a/intel_extension_for_transformers/langchain/retrievers/vectorstore_retriever.py +++ b/intel_extension_for_transformers/langchain/retrievers/vectorstore_retriever.py @@ -24,7 +24,7 @@ class VectorStoreRetriever(VectorRetriever): def __init__(self, document_store=None, **kwargs): super().__init__(**kwargs) - + def get_context(self, query): context = '' links = [] @@ -32,4 +32,4 @@ def get_context(self, query): for doc in retrieved_documents: context = context + doc.page_content + " " links.append(doc.metadata['source']) - return context.strip(), links \ No newline at end of file + return context.strip(), links diff --git a/intel_extension_for_transformers/langchain/vectorstores/chroma.py b/intel_extension_for_transformers/langchain/vectorstores/chroma.py index e5c7daa8ba6..de98db2cf65 100644 --- a/intel_extension_for_transformers/langchain/vectorstores/chroma.py +++ b/intel_extension_for_transformers/langchain/vectorstores/chroma.py @@ -51,7 +51,7 @@ class Chroma(Chroma_origin): def __init__(self, **kwargs): super().__init__(**kwargs) - + @classmethod def from_texts( cls: Type[Chroma], @@ -149,7 +149,7 @@ def from_documents( """ texts = [doc.page_content for doc in documents] metadatas = [doc.metadata for doc in documents] - if 'doc_id' in metadatas[0]: + if 'doc_id' in metadatas[0]: ids = [doc.metadata['doc_id'] for doc in documents] if sign == 'child': persist_directory = persist_directory + "_child" @@ -213,7 +213,7 @@ def build( **kwargs, ) return chroma_collection - + @classmethod def reload( @@ -226,7 +226,7 @@ def reload( client: Optional[chromadb.Client] = None, relevance_score_fn: Optional[Callable[[float], float]] = None, ) -> Chroma: - + if not persist_directory: persist_directory = _DEFAULT_PERSIST_DIR chroma_collection = cls( @@ -238,4 +238,3 @@ def reload( collection_metadata=collection_metadata, ) return chroma_collection - diff --git a/intel_extension_for_transformers/langchain/vectorstores/qdrant.py b/intel_extension_for_transformers/langchain/vectorstores/qdrant.py index aebb1d1d8dd..82c31558863 100644 --- a/intel_extension_for_transformers/langchain/vectorstores/qdrant.py +++ b/intel_extension_for_transformers/langchain/vectorstores/qdrant.py @@ -40,7 +40,7 @@ class Qdrant(Qdrant_origin): _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain" - + @classmethod def from_documents( cls, @@ -62,10 +62,10 @@ def from_documents( documents (List[Document]): List of documents to add to the vectorstore. embedding (Optional[Embeddings]): A subclass of `Embeddings`, responsible for text vectorization. sign (Optional[str], optional): sign for retrieval_type of 'child_parent'. Defaults to None. - location (Optional[str], optional): + location (Optional[str], optional): If `:memory:` - use in-memory Qdrant instance. If `str` - use it as a `url` parameter. - If `None` - fallback to relying on `host` and `port` parameters. + If `None` - fallback to relying on `host` and `port` parameters. Defaults to None. url (Optional[str], optional): either host or str of "Optional[scheme], host, Optional[port], Optional[prefix]". Defaults to None. @@ -74,7 +74,7 @@ def from_documents( 'localhost'. Defaults to None. persist_directory (Optional[str], optional): Path in which the vectors will be stored while using local mode. Defaults to None. - collection_name (Optional[str], optional): Name of the Qdrant collection to be used. + collection_name (Optional[str], optional): Name of the Qdrant collection to be used. Defaults to _LANGCHAIN_DEFAULT_COLLECTION_NAME. force_recreate (bool, optional): _description_. Defaults to False. """ @@ -86,18 +86,18 @@ def from_documents( texts = [d.page_content for d in documents] metadatas = [d.metadata for d in documents] return cls.from_texts( - texts, - embedding, - metadatas=metadatas, + texts, + embedding, + metadatas=metadatas, location=location, url=url, api_key=api_key, host=host, - path=persist_directory, + path=persist_directory, collection_name=collection_name, force_recreate=force_recreate, **kwargs) - + @classmethod def build( cls, @@ -119,10 +119,10 @@ def build( documents (List[Document]): List of documents to add to the vectorstore. embedding (Optional[Embeddings]): A subclass of `Embeddings`, responsible for text vectorization. sign (Optional[str], optional): sign for retrieval_type of 'child_parent'. Defaults to None. - location (Optional[str], optional): + location (Optional[str], optional): If `:memory:` - use in-memory Qdrant instance. If `str` - use it as a `url` parameter. - If `None` - fallback to relying on `host` and `port` parameters. + If `None` - fallback to relying on `host` and `port` parameters. Defaults to None. url (Optional[str], optional): either host or str of "Optional[scheme], host, Optional[port], Optional[prefix]". Defaults to None. @@ -131,37 +131,37 @@ def build( 'localhost'. Defaults to None. persist_directory (Optional[str], optional): Path in which the vectors will be stored while using local mode. Defaults to None. - collection_name (Optional[str], optional): Name of the Qdrant collection to be used. + collection_name (Optional[str], optional): Name of the Qdrant collection to be used. Defaults to _LANGCHAIN_DEFAULT_COLLECTION_NAME. force_recreate (bool, optional): _description_. Defaults to False. - kwargs: + kwargs: Current used: port (Optional[int], optional): Port of the REST API interface. Defaults to 6333. grpc_port (int, optional): Port of the gRPC interface. Defaults to 6334. - prefer_grpc (bool, optional): If true - use gPRC interface whenever possible in custom methods. + prefer_grpc (bool, optional): If true - use gPRC interface whenever possible in custom methods. Defaults to False. https (Optional[bool], optional): If true - use HTTPS(SSL) protocol. - prefix (Optional[str], optional): + prefix (Optional[str], optional): If not None - add prefix to the REST URL path. Example: service/v1 will result in http://localhost:6333/service/v1/{qdrant-endpoint} for REST API. - timeout (Optional[float], optional): + timeout (Optional[float], optional): Timeout for REST and gRPC API requests. - + distance_func (str, optional): Distance function. One of: "Cosine" / "Euclid" / "Dot". Defaults to "Cosine". - content_payload_key (str, optional): A payload key used to store the content of the document. + content_payload_key (str, optional): A payload key used to store the content of the document. Defaults to CONTENT_KEY. metadata_payload_key (str, optional): A payload key used to store the metadata of the document. Defaults to METADATA_KEY. vector_name (Optional[str], optional): Name of the vector to be used internally in Qdrant. Defaults to VECTOR_NAME. shard_number (Optional[int], optional): Number of shards in collection. - replication_factor (Optional[int], optional): + replication_factor (Optional[int], optional): Replication factor for collection. Defines how many copies of each shard will be created. Have effect only in distributed mode. - write_consistency_factor (Optional[int], optional): + write_consistency_factor (Optional[int], optional): Write consistency factor for collection. Defines how many replicas should apply the operation for us to consider it successful. Increasing this number will make the collection more @@ -178,9 +178,9 @@ def build( hnsw_config (Optional[common_types.HnswConfigDiff], optional): Params for HNSW index. optimizers_config (Optional[common_types.OptimizersConfigDiff], optional): Params for optimizer. wal_config (Optional[common_types.WalConfigDiff], optional): Params for Write-Ahead-Log. - quantization_config (Optional[common_types.QuantizationConfig], optional): + quantization_config (Optional[common_types.QuantizationConfig], optional): Params for quantization, if None - quantization will be disable. - init_from (Optional[common_types.InitFrom], optional): + init_from (Optional[common_types.InitFrom], optional): Use data stored in another collection to initialize this collection. on_disk (Optional[bool], optional): if True, vectors will be stored on disk. If None, default value will be used. @@ -222,8 +222,8 @@ def build( **kwargs, ) return qdrant_collection - - + + @classmethod def reload( cls, @@ -241,10 +241,10 @@ def reload( Args: embedding (Optional[Embeddings]): A subclass of `Embeddings`, responsible for text vectorization. - location (Optional[str], optional): + location (Optional[str], optional): If `:memory:` - use in-memory Qdrant instance. If `str` - use it as a `url` parameter. - If `None` - fallback to relying on `host` and `port` parameters. + If `None` - fallback to relying on `host` and `port` parameters. Defaults to None. url (Optional[str], optional): either host or str of "Optional[scheme], host, Optional[port], Optional[prefix]". Defaults to None. @@ -253,7 +253,7 @@ def reload( 'localhost'. Defaults to None. persist_directory (Optional[str], optional): Path in which the vectors will be stored while using local mode. Defaults to None. - collection_name (Optional[str], optional): Name of the Qdrant collection to be used. + collection_name (Optional[str], optional): Name of the Qdrant collection to be used. Defaults to _LANGCHAIN_DEFAULT_COLLECTION_NAME. force_recreate (bool, optional): _description_. Defaults to False. """ @@ -263,7 +263,7 @@ def reload( # for a single quick embedding to get vector size tmp_texts = ["foo"] - + qdrant_collection = cls.construct_instance( texts=tmp_texts, embedding=embedding, @@ -277,8 +277,8 @@ def reload( **kwargs ) return qdrant_collection - - + + def is_local( self, ): @@ -287,4 +287,4 @@ def is_local( isinstance(self.client._client, qdrant_client.local.qdrant_local.QdrantLocal): return True else: - return False \ No newline at end of file + return False diff --git a/intel_extension_for_transformers/llm/__init__.py b/intel_extension_for_transformers/llm/__init__.py index 1be1d8a9c5b..5193c828c62 100644 --- a/intel_extension_for_transformers/llm/__init__.py +++ b/intel_extension_for_transformers/llm/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/llm/amp/__init__.py b/intel_extension_for_transformers/llm/amp/__init__.py index 1be1d8a9c5b..5193c828c62 100644 --- a/intel_extension_for_transformers/llm/amp/__init__.py +++ b/intel_extension_for_transformers/llm/amp/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/llm/evaluation/lm_code_eval/evaluator.py b/intel_extension_for_transformers/llm/evaluation/lm_code_eval/evaluator.py index 2435d7f8723..19246d632ce 100644 --- a/intel_extension_for_transformers/llm/evaluation/lm_code_eval/evaluator.py +++ b/intel_extension_for_transformers/llm/evaluation/lm_code_eval/evaluator.py @@ -105,5 +105,5 @@ def evaluate(model, with open(args.metric_output_path, "w") as f: f.write(dumped) - + return results diff --git a/intel_extension_for_transformers/llm/evaluation/lm_eval/models/__init__.py b/intel_extension_for_transformers/llm/evaluation/lm_eval/models/__init__.py index d50c735ebce..9e023a95d47 100644 --- a/intel_extension_for_transformers/llm/evaluation/lm_eval/models/__init__.py +++ b/intel_extension_for_transformers/llm/evaluation/lm_eval/models/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/intel_extension_for_transformers/llm/evaluation/lm_eval/models/huggingface.py b/intel_extension_for_transformers/llm/evaluation/lm_eval/models/huggingface.py index e3e64e2595f..86e13a573e8 100644 --- a/intel_extension_for_transformers/llm/evaluation/lm_eval/models/huggingface.py +++ b/intel_extension_for_transformers/llm/evaluation/lm_eval/models/huggingface.py @@ -351,7 +351,7 @@ def _create_auto_model( load_in_8bit=load_in_8bit, trust_remote_code=trust_remote_code, torch_dtype=torch_dtype - ) + ) else: if load_in_4bit: assert ( @@ -468,7 +468,7 @@ def add_special_tokens(self) -> bool: elif self.model_format == "runtime": return True elif self.AUTO_MODEL_CLASS is transformers.AutoModelForCausalLM: - return False + return False elif self.AUTO_MODEL_CLASS is transformers.AutoModel: return False elif self.AUTO_MODEL_CLASS is transformers.AutoModelForSeq2SeqLM: @@ -624,7 +624,7 @@ def __init__(self, *args, pretrained, model_format, **kwargs): from transformers import AutoTokenizer, TextStreamer from intel_extension_for_transformers.transformers import AutoModelForCausalLM self.runtime_model = AutoModelForCausalLM.from_pretrained(pretrained, quantization_config=self.woq_config) - + if self.model_format == "onnx": if not os.path.exists(os.path.join(pretrained, "decoder_model.onnx")) and \ not os.path.exists(os.path.join(pretrained, "decoder_with_past_model.onnx")) and \ diff --git a/intel_extension_for_transformers/llm/finetuning/data_utils.py b/intel_extension_for_transformers/llm/finetuning/data_utils.py index 5a4029aa39d..f2303d1147d 100644 --- a/intel_extension_for_transformers/llm/finetuning/data_utils.py +++ b/intel_extension_for_transformers/llm/finetuning/data_utils.py @@ -310,7 +310,7 @@ def create_data(self, examples): start = 1 for j in range(start, len(conv) - 1, 2): - + u = conv[j]["value"] ass = conv[j+1]["value"] prompt = prompt + self.user + u + self.end + '\n' + self.assistant @@ -475,4 +475,3 @@ def preprocess_dataset(raw_datasets, tokenizer, data_args, finetune_args): raise NotImplementedError(f'finetune task data preprocessing is not support currently.') return raw_datasets, preprocess_fn - diff --git a/intel_extension_for_transformers/llm/finetuning/eval_utils.py b/intel_extension_for_transformers/llm/finetuning/eval_utils.py index a866ae43efe..abaf53d258c 100644 --- a/intel_extension_for_transformers/llm/finetuning/eval_utils.py +++ b/intel_extension_for_transformers/llm/finetuning/eval_utils.py @@ -91,4 +91,4 @@ def postprocess_text(preds, labels): result = metric.compute(use_stemmer=True) result = {k: round(v * 100, 4) for k, v in result.items()} - return result \ No newline at end of file + return result diff --git a/intel_extension_for_transformers/llm/finetuning/finetuning.py b/intel_extension_for_transformers/llm/finetuning/finetuning.py index 4e55cd3c500..28bd7f55970 100644 --- a/intel_extension_for_transformers/llm/finetuning/finetuning.py +++ b/intel_extension_for_transformers/llm/finetuning/finetuning.py @@ -70,8 +70,8 @@ class Finetuning: def __init__(self, finetuning_config: BaseFinetuningConfig): self.model_args, self.data_args, self.training_args, self.finetune_args = ( - finetuning_config.model_args, - finetuning_config.data_args, + finetuning_config.model_args, + finetuning_config.data_args, finetuning_config.training_args, finetuning_config.finetune_args ) @@ -781,7 +781,7 @@ def preprocess_logits_for_metrics(logits, labels): # like past_key_values, but logits always come first logits = logits[0] return logits.argmax(dim=-1) - + if training_args.do_train: # download model & vocab. diff --git a/intel_extension_for_transformers/llm/quantization/__init__.py b/intel_extension_for_transformers/llm/quantization/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/llm/quantization/__init__.py +++ b/intel_extension_for_transformers/llm/quantization/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/llm/quantization/autograd/__init__.py b/intel_extension_for_transformers/llm/quantization/autograd/__init__.py index 55c2040a698..f0f7c1bfcc8 100644 --- a/intel_extension_for_transformers/llm/quantization/autograd/__init__.py +++ b/intel_extension_for_transformers/llm/quantization/autograd/__init__.py @@ -16,4 +16,4 @@ # limitations under the License. -from .functions import matmul_kbit \ No newline at end of file +from .functions import matmul_kbit diff --git a/intel_extension_for_transformers/llm/quantization/gptq_utils.py b/intel_extension_for_transformers/llm/quantization/gptq_utils.py index c4899b2fea5..53807988b55 100644 --- a/intel_extension_for_transformers/llm/quantization/gptq_utils.py +++ b/intel_extension_for_transformers/llm/quantization/gptq_utils.py @@ -34,4 +34,3 @@ def unpack_weight(qweight, scales, qzeros, q_config): torch.bitwise_and(weight, (2**bits) - 1, out=weight) return weight, scales, zeros - diff --git a/intel_extension_for_transformers/llm/quantization/nn/__init__.py b/intel_extension_for_transformers/llm/quantization/nn/__init__.py index 683cb2c77cc..0d198d95a2e 100644 --- a/intel_extension_for_transformers/llm/quantization/nn/__init__.py +++ b/intel_extension_for_transformers/llm/quantization/nn/__init__.py @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .modules import QuantizedLinearQBits \ No newline at end of file +from .modules import QuantizedLinearQBits diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/onnx_extractor.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/onnx_extractor.py index a06c75f0432..f121a36a3c4 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/onnx_extractor.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/onnx_extractor.py @@ -35,7 +35,7 @@ class ONNXExtractor(object): """The ONNXExtractor class. Decorate the node in model.graph_def, and the new node has the attributes like input_tensors - and output_tensors, these tensors record the source/dest op name. All of these nodes + and output_tensors, these tensors record the source/dest op name. All of these nodes (in a list) will compose a graph, which is Graph class, as the return object. Args: diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/tf_extractor.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/tf_extractor.py index 9f51f794f09..9c3674c255e 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/tf_extractor.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/tf_extractor.py @@ -36,7 +36,7 @@ class TensorflowExtractor(object): Decorate the node in model.graph_def, and the new node has the attributes like input_tensors and output_tensors, these tensors record the source/dest op name. All of these nodes (in a list) will compose a graph, which is Graph class, as the return object. - + Args: model: TensorflowModel diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/torch_extractor.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/torch_extractor.py index 147ffc30c1b..2cf5a9cb236 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/torch_extractor.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/extractors/torch_extractor.py @@ -170,7 +170,7 @@ def __call__(self, model): fuse_position_ids(graph) fuse_view(graph) fuse_gather_indices(graph) - + new_graph = Graph() new_graph.framework_modeling_config['framework'] = 'torch' graph_nodes_dict = {} diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/loaders/loader.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/loaders/loader.py index 5ddb30e48d6..762a82d6b1b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/loaders/loader.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/loaders/loader.py @@ -46,7 +46,7 @@ def __call__(self, model, pattern_config=None): if framework == 'onnxruntime': if isinstance(model, str): model = onnx.load(model) - + try: from ..onnx_utils import ONNX_OPTIMIZER_PASS optimize_level = os.getenv('ONNX_OPTIMIZER_LEVEL', 1) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/logger.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/logger.py index 0c8a73a77bc..00478394034 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/logger.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/logger.py @@ -138,4 +138,4 @@ def warning(msg, *args, **kwargs): for _, line in enumerate(_pretty_dict(msg).split('\n')): Logger().get_logger().warning(line, *args, **kwargs) else: - Logger().get_logger().warning(msg, *args, **kwargs) \ No newline at end of file + Logger().get_logger().warning(msg, *args, **kwargs) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/onnx_utils.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/onnx_utils.py index 9b69b2d8d78..b759bcb778b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/onnx_utils.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/onnx_utils.py @@ -80,7 +80,7 @@ def graph_node_names_details(model): is the node output name list; outputs in value is for output_tensor dest op Args: model: ONNXModel - + Returns: node_names_details: the graph node info dict @@ -274,7 +274,7 @@ def onnx_extract_operator(node, framework_model, nodes_dict, engine_graph=None): except BaseException: # if origin_tensor_name in nodes_dict: pre_node = nodes_dict[origin_tensor_name].node - + data = None if pre_node in framework_model.graph.initializer: if pre_node.data_type == TensorProto.BFLOAT16: @@ -310,7 +310,7 @@ def onnx_extract_operator(node, framework_model, nodes_dict, engine_graph=None): input_names.append(node.name) """Output_tensors. - + Note: in onnx, NodeProto has the output attribute """ diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/binary_op.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/binary_op.py index ff44eb0b4e6..d388db193b0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/binary_op.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/binary_op.py @@ -81,4 +81,4 @@ def set_attr(self, framework, node): self._input_tensors.append(Tensor(name=self._name + "_mul_val", data=np.array([-1]).astype(np.float32), shape=[1], - dest_op=[self._name])) \ No newline at end of file + dest_op=[self._name])) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/concat.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/concat.py index 28e83c9c0de..16e25e9fff6 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/concat.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/concat.py @@ -37,4 +37,3 @@ def set_attr(self, framework, node): self._attr['axis'] = node.attribute[0].ints if framework == "torch": self._attr['axis'] = node.inputsAt(node.inputsSize() - 1).toIValue() - \ No newline at end of file diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/conv.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/conv.py index f9ce8ae4933..1fb94904789 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/conv.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/conv.py @@ -69,4 +69,3 @@ def set_attr(self, framework, node): self._attr['benchmark'] = node.inputsAt(9).toIValue() self._attr['deterministic'] = node.inputsAt(10).toIValue() self._attr['cudnn_enabled'] = node.inputsAt(11).toIValue() - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/empty_ops.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/empty_ops.py index c623c349d5d..ff430ee9a32 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/empty_ops.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/empty_ops.py @@ -595,4 +595,3 @@ class Zeros(Operator): def __init__(self): """The init function of this operator.""" super().__init__() - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather.py index 34c61a3facd..eea650acb1c 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather.py @@ -39,7 +39,7 @@ def set_attr(self, framework, node): except BaseException: axis = 0 self._attr['axis'] = axis - + if framework == 'onnxruntime': # idx_axis self._attr['axis'] = 0 @@ -54,7 +54,7 @@ def set_attr(self, framework, node): self._attr['axis'] = 0 - + if framework == 'torch': if node.kind() == 'aten::embedding': # indices: bs x seq_len diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather_elements.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather_elements.py index 17c2f048f57..6bc91571bc1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather_elements.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/gather_elements.py @@ -34,6 +34,3 @@ def set_attr(self, framework, node): for attribute in node.attribute: if attribute.name == 'axis': self._attr['axis'] = attribute.i - - - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/layer_normalization.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/layer_normalization.py index 28bb1ffbe04..ecbec021f18 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/layer_normalization.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/layer_normalization.py @@ -51,4 +51,4 @@ def set_attr(self, framework, node): """Extract the node attr from torchscript.""" if node.inputsSize() > 4: self._attr['epsilon'] = node.inputsAt(4).toIValue() - self._attr['normalized_shape'] = list2str(parseTorchListConstruct(node.inputsAt(1))) \ No newline at end of file + self._attr['normalized_shape'] = list2str(parseTorchListConstruct(node.inputsAt(1))) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/pow.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/pow.py index 5325ccbfa5b..11d1f887b34 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/pow.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/pow.py @@ -43,4 +43,4 @@ def set_attr(self, framework, node): data=data, dtype="fp32" ) - self.input_tensors.append(input_tensor) \ No newline at end of file + self.input_tensors.append(input_tensor) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/reduce_mean.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/reduce_mean.py index 0807e1942f3..3800fa376ec 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/reduce_mean.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/reduce_mean.py @@ -47,4 +47,4 @@ def set_attr(self, framework, node): if len(axis) == 1: self._attr['axis'] = axis[0] else: - self._attr['axis'] = list2str(axis) \ No newline at end of file + self._attr['axis'] = list2str(axis) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/scatter_elements.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/scatter_elements.py index 2770ab81f3e..a5eade0a2db 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/scatter_elements.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/scatter_elements.py @@ -34,6 +34,3 @@ def set_attr(self, framework, node): for attribute in node.attribute: if attribute.name == 'axis': self._attr['axis'] = attribute.i - - - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/size.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/size.py index c630e00dc37..2309e23876b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/size.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/size.py @@ -31,4 +31,4 @@ def __init__(self): def set_attr(self, framework, node): """Extract the node attr from torchscript.""" - self._attr['dim'] = node.inputsAt(1).toIValue() \ No newline at end of file + self._attr['dim'] = node.inputsAt(1).toIValue() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/slice_position_ids.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/slice_position_ids.py index 0f02fd85e23..49af6e47dc6 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/slice_position_ids.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/slice_position_ids.py @@ -38,4 +38,3 @@ def set_attr(self, framework, node): self._attr['starts'] = node.inputsAt(2).toIValue() self._attr['ends_with_tensor'] = 1 self._attr['steps'] = node.inputsAt(4).toIValue() - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/squeeze.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/squeeze.py index aa46ac91364..3968fdc5615 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/squeeze.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/squeeze.py @@ -53,4 +53,3 @@ def set_attr(self, framework, node): self._input_tensors.pop() if framework == 'torch': self._attr['axes'] = node.inputsAt(1).toIValue() - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/top_k.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/top_k.py index 8868355b389..ee6c3b95010 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/top_k.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/top_k.py @@ -44,4 +44,4 @@ def set_attr(self, framework, node): self._attr['k'] = int(self._input_tensors[1].data) self._input_tensors.pop() # remove the data output_tensor, just keep indices - self._output_tensors = [self._output_tensors[1]] \ No newline at end of file + self._output_tensors = [self._output_tensors[1]] diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/transpose.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/transpose.py index ebdeb1fcfe7..0ac445188a9 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/transpose.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/transpose.py @@ -60,4 +60,4 @@ def set_attr(self, framework, node): if node.kind() == 'aten::transpose': dim0 = node.inputsAt(1).toIValue() dim1 = node.inputsAt(2).toIValue() - self._attr['transpose_dims'] = list2str([dim0, dim1]) \ No newline at end of file + self._attr['transpose_dims'] = list2str([dim0, dim1]) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/view.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/view.py index 2387bb0c0bf..e9a30e63ca7 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/view.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/ops/view.py @@ -38,4 +38,4 @@ def set_attr(self, framework, node): for i in range(1, node.inputsSize()): shape_list.append(node.inputsAt(i).toIValue()) shape_list = [-1 if x is None else x for x in shape_list] - self._attr['shape'] = list2str(shape_list) \ No newline at end of file + self._attr['shape'] = list2str(shape_list) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/optimizer.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/optimizer.py index b6c7b73ed75..acfee5e88ac 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/optimizer.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/optimizer.py @@ -28,7 +28,7 @@ class Optimizer: def __init__(self, graph, input_shape=None, *args, **kwargs): """The optimizer initialization. - + Args: graph: neural engine Graph class input_shape: list of list, model input data shape list diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/InnerproductReshapeFusion.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/InnerproductReshapeFusion.py index 5cd5a9a5e83..f37b92caeaf 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/InnerproductReshapeFusion.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/InnerproductReshapeFusion.py @@ -63,7 +63,7 @@ def __call__(self, model): }, 'returns': [3, 0] }, - + { 'patterns': { 'in': [[(0, 'InnerProduct'), (1, 'View'), (2, 'Reorder')], @@ -84,7 +84,7 @@ def __call__(self, model): 'input_data': [0] } ], [[0, 1, 2], 3]], - + 1: [[], [[], 1]] }, 'output_tensors': { @@ -116,7 +116,7 @@ def _set_attr1(new_node_names, ret_old_nodes, model): attr1['src_perm'] = "0,1,2,3" attr1['dst_perm'] = "0,2,1,3" reorder_node_idx.attr = attr1 - + def _set_attr(new_node_names, ret_old_nodes, model): for i in range(len(new_node_names)): mat_node_idx = model.get_node_id(new_node_names[i][0]) @@ -129,7 +129,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): attr['reshape'] = ret_old_nodes[i][0].attr['shape'] attr['reshape_dims'] = '0' model.nodes[mat_node_idx].attr = attr - + if model.framework_modeling_config['framework'] == 'torch': pattern_dict = pattern_mapping_config['InnerproductReshapeFusion'][0] model, new_node_names, ret_old_nodes = util.pattern_mapping("InnerproductReshapeFusion", @@ -139,7 +139,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): pattern_dict = pattern_mapping_config['InnerproductReshapeFusion'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("InnerproductReshapeFusion", + model, new_node_names, ret_old_nodes = util.pattern_mapping("InnerproductReshapeFusion", pattern_dict, model) if len(new_node_names) != 0: _set_attr1(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_cls_token.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_cls_token.py index 15bff9b6d46..479596d11e8 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_cls_token.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_cls_token.py @@ -143,7 +143,7 @@ def __call__(self, model): model.nodes[transpose_node_idx].attr = ret_old_nodes[j][0].attr concat_node_idx = model.get_node_id(new_node_names[j][1]) model.nodes[concat_node_idx].attr = ret_old_nodes[j][1].attr - + return model return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_embeddings.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_embeddings.py index 415b94901e6..0dd8199c07d 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_embeddings.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/add_embeddings.py @@ -231,7 +231,7 @@ def _set_attr(hidden_size, epsilon, node_names, model, is_vit = False): # shape = [bs, seq_len, hidden_size] after embeddings for i in range(len(pattern_mapping_config['AddEmbeddings']) - 1): pattern_dict = pattern_mapping_config['AddEmbeddings'][i] - model, new_node_names, ret_old_nodes = util.pattern_mapping("AddEmbeddings", + model, new_node_names, ret_old_nodes = util.pattern_mapping("AddEmbeddings", pattern_dict, model) if len(new_node_names) != 0: for j in range(len(new_node_names)): @@ -246,9 +246,9 @@ def _set_attr(hidden_size, epsilon, node_names, model, is_vit = False): if len(pattern_dict['patterns']['in'][0]) == 2: binary_add_node_idx = model.get_node_id(new_node_names[j][0]) model.nodes[binary_add_node_idx].attr = OrderedDict() - + return model - + # shape = [seq_len, bs, hidden_size] after embeddings for pattern_dict in pattern_mapping_config['AddEmbeddings'][-1:]: model, new_node_names, ret_old_nodes = util.pattern_mapping("AddEmbeddings", diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/arangewithreciprocal.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/arangewithreciprocal.py index a32da4fb8f4..b5f0a11401d 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/arangewithreciprocal.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/arangewithreciprocal.py @@ -37,10 +37,10 @@ def __call__(self, model): 'ArangewithReciprocal': [ { 'patterns': { - 'in': [[(0, 'Shape'), (1, 'Arange'), (2, 'Div'), (3, 'Pow'), + 'in': [[(0, 'Shape'), (1, 'Arange'), (2, 'Div'), (3, 'Pow'), (4, 'Reciprocal'), (5, 'Mul')] ], - 'out': [[(0, 'Range'), (1, 'Div'), (2, 'Pow'), + 'out': [[(0, 'Range'), (1, 'Div'), (2, 'Pow'), (3, 'Div')]] }, 'search_mode': 'op_type', @@ -49,7 +49,7 @@ def __call__(self, model): 1: 2, 2: 3, 3: 5 - + }, 'input_tensors': { 0: [[{ @@ -75,16 +75,16 @@ def __call__(self, model): }, 'returns': [0] }, - - + + ] } def _set_attr(new_node_names, ret_old_nodes, model): for i in range(len(new_node_names)): - + slice_node = model.get_node_by_name(ret_old_nodes[i][0].input_tensors[0].source_op[0]) fixed_pos_embedding_dim = int(slice_node.attr['ends']) range_node_idx = model.get_node_id(new_node_names[i][0]) @@ -106,10 +106,10 @@ def _set_attr(new_node_names, ret_old_nodes, model): OrderedDict({'algorithm': 'div'}) reciprocal_node = model.get_node_by_name(new_node_names[i][3]) reciprocal_node.input_tensors[0].data = np.array([1], dtype=np.float32) - + if model.framework_modeling_config['framework'] == 'torch': pattern_dict = pattern_mapping_config['ArangewithReciprocal'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("ArangewithReciprocal", + model, new_node_names, ret_old_nodes = util.pattern_mapping("ArangewithReciprocal", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attentionBlock_WeightReshapeTo4D.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attentionBlock_WeightReshapeTo4D.py index d9a046560fb..f3321d48457 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attentionBlock_WeightReshapeTo4D.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attentionBlock_WeightReshapeTo4D.py @@ -54,4 +54,3 @@ def __call__(self, model): add_node.input_tensors[1].shape = [1, 512, 1, 1] return model - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py index f72373425f7..3c8f00d7e60 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py @@ -159,7 +159,7 @@ def __call__(self, model): }, 'returns': [5,6] }, - + # int8 lat { 'patterns': { diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_reshape.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_reshape.py index 974e8179cdc..c583fcca68e 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_reshape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/attention_reshape.py @@ -92,7 +92,7 @@ def __call__(self, model): # Lat_int8 { 'patterns': { - 'in': [ [(0, 'Shape'), (1, 'Gather'), (2, 'Gather'), (3, 'Unsqueeze'), + 'in': [ [(0, 'Shape'), (1, 'Gather'), (2, 'Gather'), (3, 'Unsqueeze'), (4, 'Concat'), (6, 'Reshape'),(7, 'MatMulWithBias')], [(),(5, 'Transpose'), (6, 'Reshape')]], 'out': [[(0, 'Transpose'), (1, 'Reshape'), (2, 'MatMulWithBias')]] diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/collect_quant_info.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/collect_quant_info.py index b6aaf86c416..f4fcb5b8133 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/collect_quant_info.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/collect_quant_info.py @@ -52,7 +52,7 @@ def __call__(self, model): 'patterns': { 'in': [[(0, 'Quantize'), (1, ['Dequantize'])]], }, - + }, { 'patterns': { diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/conv_reshape.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/conv_reshape.py index f7d193a2c0b..50b7daef2c2 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/conv_reshape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/conv_reshape.py @@ -127,5 +127,5 @@ def _set_attr(channel, node_names, model): conv_node_idx = model.get_node_id(new_node_names[i][0]) model.nodes[conv_node_idx].attr = ret_old_nodes[i][1].attr model.nodes[conv_node_idx].attr['src_perm'] = ret_old_nodes[i][0].attr['dst_perm'] - + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/einsumwitharange.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/einsumwitharange.py index 3cd27381619..ddb9173081d 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/einsumwitharange.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/einsumwitharange.py @@ -66,7 +66,7 @@ def __call__(self, model): }, 'returns': [0, 1, 2] }, - + { 'patterns': { 'in': [[(0, 'Shape'), (2, 'Add'), (3, 'Arange'), (4, 'Einsum')], @@ -90,8 +90,8 @@ def __call__(self, model): 2: [[{ 4: [1] }], [[1], 2]] - - + + }, 'output_tensors': { 0: [[], [[], 1]], @@ -102,7 +102,7 @@ def __call__(self, model): }, 'returns': [0] } - + ] } if model.framework_modeling_config['framework'] != 'torch': @@ -114,7 +114,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): if 'end' in ret_old_nodes[i][0].attr.keys(): attr['end_with_shape'] = ret_old_nodes[i][0].attr['end'] model.nodes[range_node_idx].attr = attr - + matmul_node = model.get_node_by_name(new_node_names[i][2]) reshape_node = model.get_node_by_name(new_node_names[i][1]) reshape_node.attr = OrderedDict({'dst_shape': '-1, 1'}) @@ -131,13 +131,13 @@ def _set_attr(new_node_names, ret_old_nodes, model): input_tensors=[matmul_node.input_tensors[1]], output_tensors=[reshape_output], attr=OrderedDict({'dst_shape': '1, -1'})) - + matmul_node.input_tensors[1] = reshape_output insert_idx = model.get_node_id(new_node_names[i][2]) model.insert_nodes(insert_idx, [reshape_op]) - + pattern_dict = pattern_mapping_config['EinsumwithArange'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("EinsumwithArange", + model, new_node_names, ret_old_nodes = util.pattern_mapping("EinsumwithArange", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) @@ -148,8 +148,8 @@ def _set_attr1(new_node_names, ret_old_nodes, model): attr = OrderedDict() attr['algorithm'] = "add" attr['end_with_shape'] = 1 - model.nodes[range_node_idx].attr = attr - + model.nodes[range_node_idx].attr = attr + matmul_node = model.get_node_by_name(new_node_names[i][2]) reshape_node = model.get_node_by_name(new_node_names[i][1]) reshape_node.attr = OrderedDict({'dst_shape': '-1, 1'}) @@ -169,9 +169,9 @@ def _set_attr1(new_node_names, ret_old_nodes, model): matmul_node.input_tensors[1] = reshape_output insert_idx = model.get_node_id(new_node_names[i][2]) model.insert_nodes(insert_idx, [reshape_op]) - + pattern_dict = pattern_mapping_config['EinsumwithArange'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("EinsumwithArange", + model, new_node_names, ret_old_nodes = util.pattern_mapping("EinsumwithArange", pattern_dict, model) if len(new_node_names) != 0: _set_attr1(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/embeddingbag.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/embeddingbag.py index 5b86713cfb8..443649abcf9 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/embeddingbag.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/embeddingbag.py @@ -103,7 +103,7 @@ def _set_attr(hidden_size, axis, batch_dims, node_names, model): for i in range(len(pattern_mapping_config['EmbeddingBag'])): pattern_dict = pattern_mapping_config['EmbeddingBag'][i] - model, new_node_names, ret_old_nodes = util.pattern_mapping("EmbeddingBag", + model, new_node_names, ret_old_nodes = util.pattern_mapping("EmbeddingBag", pattern_dict, model) if len(new_node_names) != 0 and i == 0: diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/gelu.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/gelu.py index 44ca0173216..26be23ce16b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/gelu.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/gelu.py @@ -110,7 +110,7 @@ def __call__(self, model): for i in range(len(pattern_mapping_config['Gelu'])): pattern_dict = pattern_mapping_config['Gelu'][i] - model, new_node_names, ret_old_nodes = util.pattern_mapping("Gelu", + model, new_node_names, ret_old_nodes = util.pattern_mapping("Gelu", pattern_dict, model) if len(new_node_names) != 0: for j in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/generate_sequence.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/generate_sequence.py index 8b61c08ae8e..4c766cb049a 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/generate_sequence.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/generate_sequence.py @@ -81,7 +81,7 @@ def __call__(self, model): }], [[0], 1]], }, 'returns': [5, 0] - } + } ] } collect_node = [] @@ -98,7 +98,7 @@ def __call__(self, model): attr["step"] = int(old_node.input_tensors[2].data) new_node_idx = model.get_node_id(new_node_names[j][0]) model.nodes[new_node_idx].attr = attr - + if i == 1: collect_node.append(ret_old_nodes[j][1]) model.insert_nodes(10, collect_node) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNorm.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNorm.py index 67bedbd5376..4ab29e13106 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNorm.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNorm.py @@ -24,10 +24,10 @@ @pattern_registry(pattern_type='GroupNorm') class GroupNorm(Pattern): """ - The input channels are separated into num_groups groups, each containing num_channels / + The input channels are separated into num_groups groups, each containing num_channels / num_groups channels. Each group is calculated like: y = (x - E(X)) / (Var(x) + epsilon) * gamma + beta - More info can see: https://pytorch.org/docs/stable/generated/torch.nn.GroupNorm.html + More info can see: https://pytorch.org/docs/stable/generated/torch.nn.GroupNorm.html """ def __call__(self, model): @@ -86,4 +86,4 @@ def _set_attr(group, channels, epsilon, node_names, model): return model - return model \ No newline at end of file + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNormSwish.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNormSwish.py index 9face170578..1a296516af1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNormSwish.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/groupNormSwish.py @@ -24,10 +24,10 @@ @pattern_registry(pattern_type='GroupNormSwish') class GroupNormSwish(Pattern): """ - The input channels are separated into num_groups groups, each containing num_channels / + The input channels are separated into num_groups groups, each containing num_channels / num_groups channels. Each group is calculated like: y = (x - E(X)) / (Var(x) + epsilon) * gamma + beta - More info can see: https://pytorch.org/docs/stable/generated/torch.nn.GroupNorm.html + More info can see: https://pytorch.org/docs/stable/generated/torch.nn.GroupNorm.html """ def __call__(self, model): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithbiasgelu.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithbiasgelu.py index 76f1f5408e8..26e88a8b706 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithbiasgelu.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithbiasgelu.py @@ -36,7 +36,7 @@ def __call__(self, model): 'InnerproductWithBiasGelu': [ { 'patterns': { - 'in': [[(0, 'InnerProduct'), (1, 'Pow'), (3, 'Mul'), + 'in': [[(0, 'InnerProduct'), (1, 'Pow'), (3, 'Mul'), (4, 'Add'), (5, 'Mul'),(6, 'Tanh'), (7, 'Add'), (8, 'Mul')], [(0, 'InnerProduct'), (2, 'Mul'), (8, 'Mul')] ], @@ -74,7 +74,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): #attr['src1_perm'] = '1, 0' model.nodes[mat_node_idx].attr = attr pattern_dict = pattern_mapping_config['InnerproductWithBiasGelu'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("InnerproductWithBiasGelu", + model, new_node_names, ret_old_nodes = util.pattern_mapping("InnerproductWithBiasGelu", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithslice.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithslice.py index eae1ea0fd4a..d0d2962ed63 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithslice.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithslice.py @@ -66,7 +66,7 @@ def __call__(self, model): if model.framework_modeling_config['framework'] != 'torch': return model - + def _set_attr(new_node_names, ret_old_nodes, model): for i in range(len(new_node_names)): mat_node_idx = model.get_node_id(new_node_names[i][0]) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithswish.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithswish.py index 071ab06e2f7..f5dee1036de 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithswish.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/innerproductwithswish.py @@ -72,7 +72,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): attr['append_op'] = 'swish' model.nodes[mat_node_idx].attr = attr pattern_dict = pattern_mapping_config['InnerproductWithSwish'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("InnerproductWithSwish", + model, new_node_names, ret_old_nodes = util.pattern_mapping("InnerproductWithSwish", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/input_data.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/input_data.py index b1d11de299f..8c38d08c7e0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/input_data.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/input_data.py @@ -57,7 +57,7 @@ def __call__(self, model): }, 'returns': [0, 1, 2] }, - + # onnx model from huggingface { 'patterns': { @@ -128,7 +128,7 @@ def __call__(self, model): 1: [0] }, { 2: [0] - }], + }], [[0, 1, 2], 3]] }, 'returns': [0, 1, 2] @@ -199,7 +199,7 @@ def __call__(self, model): 1: [0] }, { 2: [0] - }], + }], [[0, 1, 2], 3]] }, 'returns': [0, 1, 2] @@ -225,7 +225,7 @@ def __call__(self, model): }, 'returns': [] }, - + # minilmv2-lat-roberta { 'patterns': { diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/insert_quant_node.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/insert_quant_node.py index 9e6b7676e10..3eab3f8e7ca 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/insert_quant_node.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/insert_quant_node.py @@ -127,7 +127,7 @@ def get_scale_zp(tensor_min_data, tensor_max_data, dtype): quant_max = Tensor( name=output_name + "_max", shape=[quant_info[input_name][4].size], - data=np.array(quant_info[input_name][4].astype("float32")), + data=np.array(quant_info[input_name][4].astype("float32")), dtype="fp32") # insert output min and max tensor model.change_node_input_tensors(node.name, insert_offset + 4, @@ -214,7 +214,7 @@ def get_scale_zp(tensor_min_data, tensor_max_data, dtype): quant_max = Tensor( name=output_name + "_max", shape=[quant_info[input_name][4].size], - data=np.array(quant_info[input_name][4].astype("float32")), + data=np.array(quant_info[input_name][4].astype("float32")), dtype="fp32") # insert output min and max tensor model.change_node_input_tensors(node.name, insert_offset + 7, @@ -241,7 +241,7 @@ def get_scale_zp(tensor_min_data, tensor_max_data, dtype): node.input_tensors[0] = \ model.get_node_by_name(src0_source_op[0]).input_tensors[0] node.input_tensors[0].dest_op = [node.name] - else: + else: remove_list.append(src1_source_op[0]) node.input_tensors[1] = \ model.get_node_by_name(src1_source_op[0]).input_tensors[0] diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/int8_bf16_mixed_precision_checker.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/int8_bf16_mixed_precision_checker.py index 421786d14e1..af99174642c 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/int8_bf16_mixed_precision_checker.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/int8_bf16_mixed_precision_checker.py @@ -61,7 +61,7 @@ def _all_dst_ops(node, model, checker, output_dtype): op.attr.get('output_dtype', 'fp32') != output_dtype: return False return True - + def _insert_bf16_quant_node(pre_node, model): output_tensor = copy.deepcopy(pre_node.output_tensors[0]) output_tensor.dtype = 'bf16' @@ -146,7 +146,7 @@ def _revert_logits_output_dtype(model, output_dtype): non_quantized_patterns = [[(0, 'Range'), (1, ['Div', 'BinaryOp']), (2, 'Pow'), (3, ['Div', 'BinaryOp']), (4, 'Reshape'), (7, ['Matmul', 'Einsum', 'BatchMatmul'])], - [(), (5, 'Range'), (6, 'Reshape'), + [(), (5, 'Range'), (6, 'Reshape'), (7, ['Matmul', 'Einsum', 'BatchMatMul'])]] match_ret = util.search_pattern(non_quantized_patterns, model) for ret in match_ret: diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/interact_features.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/interact_features.py index f027c009cce..7188a113145 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/interact_features.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/interact_features.py @@ -36,7 +36,7 @@ def __call__(self, model): # DLRM onnx model { 'patterns': { - 'in': [[(0, 'Relu'), (1, 'Shape'), (2, 'Gather'), (3, 'Unsqueeze'), + 'in': [[(0, 'Relu'), (1, 'Shape'), (2, 'Gather'), (3, 'Unsqueeze'), (7, 'Concat'), (9, 'Reshape')], [(0, 'Relu'), (4, 'Shape'), (5, 'Gather'), (6, 'Unsqueeze'), (7, 'Concat')], @@ -52,7 +52,7 @@ def __call__(self, model): 'input_tensors': { 0: [[{0: [0]}], [[0], 1]], 1: [[], [[], 1]], - 2: [[{1: [0]}], [[1], 2]], + 2: [[{1: [0]}], [[1], 2]], }, 'output_tensors': { 0 : [[{0: [0]}], [[0], 1]], @@ -124,7 +124,7 @@ def _set_attr(concat_num, node_names, model): ret_old_nodes[j][2].name) model.nodes[relu_node_idx].output_tensors[0].dest_op.remove( ret_old_nodes[j][3].name) - + elif len(new_node_names) != 0 and i == 1: for j in range(len(new_node_names)): matmul_attr = OrderedDict() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/last_layer_shape.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/last_layer_shape.py index c3edf693460..0921b2a925b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/last_layer_shape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/last_layer_shape.py @@ -141,7 +141,7 @@ def __call__(self, model): # bert_mlperf pattern_dict = pattern_mapping_config['LastLayerShape'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LastLayerShape", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LastLayerShape", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -163,7 +163,7 @@ def __call__(self, model): # bert_base_mrpc pattern_dict = pattern_mapping_config['LastLayerShape'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LastLayerShape", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LastLayerShape", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -190,7 +190,7 @@ def __call__(self, model): # roberta_base / distil_bert pattern_dict = pattern_mapping_config['LastLayerShape'][2] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LastLayerShape", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LastLayerShape", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm.py index 3069569678a..949cb294528 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm.py @@ -208,14 +208,14 @@ def _set_attr(epsilon, node_names, model): # bert_base layer_norm patterns pattern_dict = pattern_mapping_config['LayerNorm'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNorm", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNorm", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): epsilon = ret_old_nodes[i][0].attr['epsilon'] _set_attr(epsilon, new_node_names[i], model) pattern_dict = pattern_mapping_config['LayerNorm'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNorm", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNorm", pattern_dict, model) assert len(new_node_names) != 0 for i in range(len(new_node_names)): @@ -226,7 +226,7 @@ def _set_attr(epsilon, node_names, model): for i in range(2, len(pattern_mapping_config['LayerNorm'])): pattern_dict = pattern_mapping_config['LayerNorm'][i] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNorm", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNorm", pattern_dict, model) if len(new_node_names) != 0: for j in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm_with_reduce_mean.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm_with_reduce_mean.py index a029237587e..54a938c194d 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm_with_reduce_mean.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/layer_norm_with_reduce_mean.py @@ -97,7 +97,7 @@ def _set_attr(hidden_size, epsilon, reduce_mean_attr, node_names, model): pattern_dict = pattern_mapping_config['LayerNormWithReduceMean'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNormWithReduceMean", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LayerNormWithReduceMean", pattern_dict, model) if len(new_node_names) != 0: for j in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_embeding.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_embeding.py index c869177cdec..b473b8f55d0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_embeding.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_embeding.py @@ -75,7 +75,7 @@ def __call__(self, model): }, { 'patterns': { - 'in': [[(0, 'Slice'),(1, 'Unsqueeze'), (2, 'Unsqueeze'), (3, 'Slice'), (7, 'Expand'), + 'in': [[(0, 'Slice'),(1, 'Unsqueeze'), (2, 'Unsqueeze'), (3, 'Slice'), (7, 'Expand'), (8,'Rsub'), (9,'ConstantOfShape')], [(), (4, 'Shape'), (7, 'Expand')], [(), (5, 'Shape'), (7, 'Expand')], @@ -107,11 +107,11 @@ def __call__(self, model): }, 'returns': [1] }, - + { 'patterns': { 'in': [[(0, 'Arange'), (1, 'Less'), (3, 'ConstantOfShape'), - (5,'Concat'), (6,'Unsqueeze'), (7,'Unsqueeze'), (8,'Slice'), (9, 'Slice'), + (5,'Concat'), (6,'Unsqueeze'), (7,'Unsqueeze'), (8,'Slice'), (9, 'Slice'), (11, 'Expand'), (13, 'Add')], [(), (2, 'Full'), (3, 'ConstantOfShape')], [(), (4, 'Zeros'), (5, 'Concat')], @@ -136,7 +136,7 @@ def __call__(self, model): }, 'returns': [0, 12] }, - + ] } @@ -184,4 +184,4 @@ def __call__(self, model): model.remove_nodes(remove_add.output_tensors[0].dest_op) model.remove_nodes([ret_old_nodes[i][0].output_tensors[0].dest_op[0]]) model._framework_modeling_config['architecture'] = 'decoder_only' - return model \ No newline at end of file + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_matmulwithtranspose.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_matmulwithtranspose.py index 5483dc3d1cd..4f4edbf0d93 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_matmulwithtranspose.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_matmulwithtranspose.py @@ -35,7 +35,7 @@ def __call__(self, model): """The __call__ function of this pattern class.""" pattern_mapping_config = { 'LlamaMatMulWithTranspose': [ - # llama + # llama { 'patterns': { 'in': [[(0, 'Reorder'), (1, 'Matmul')]], @@ -82,7 +82,7 @@ def __call__(self, model): }, 'returns': [1] }, - + { 'patterns': { 'in': [[(0, 'Add'), (1, 'Max'), (2, 'Softmax')]], @@ -113,7 +113,7 @@ def __call__(self, model): } pattern_dict = pattern_mapping_config['LlamaMatMulWithTranspose'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaMatMulWithTranspose", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaMatMulWithTranspose", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -123,7 +123,7 @@ def __call__(self, model): mat_node_idx.attr = attr pattern_dict = pattern_mapping_config['LlamaMatMulWithTranspose'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaMatMulWithTranspose", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaMatMulWithTranspose", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -134,7 +134,7 @@ def __call__(self, model): mat_node_idx.attr = attr pattern_dict = pattern_mapping_config['LlamaMatMulWithTranspose'][2] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaMatMulWithTranspose", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaMatMulWithTranspose", pattern_dict, model) return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_postprocess.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_postprocess.py index 1bfe92f1e44..9e44b4ec830 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_postprocess.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_postprocess.py @@ -114,7 +114,7 @@ def __call__(self, model): model.insert_nodes(insert_idx, [reshape_op]) node.input_tensors[0] = reshape_output break - + if node.op_type == 'RmsNorm': pre_node = model.get_node_by_name(node.input_tensors[0].source_op[0]) if 'output_dtype' in pre_node.attr: @@ -126,8 +126,8 @@ def __call__(self, model): prepre_node = model.get_node_by_name(prepre_node.input_tensors[0].source_op[0]) prepre_node.attr = None prepre_node.attr = OrderedDict({'dst_shape': '-1'}) - + if node.op_type == 'Shape': remove_shape.append(node.name) - model.remove_nodes(remove_shape) - return model \ No newline at end of file + model.remove_nodes(remove_shape) + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_rotary_pos_emb.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_rotary_pos_emb.py index 659c0cb9f39..4986bc0ad21 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_rotary_pos_emb.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/llama_rotary_pos_emb.py @@ -68,7 +68,7 @@ def __call__(self, model): 'in': [[(0, 'Shape'), (1, 'Add')]], }, }, - + { 'patterns': { 'in': [[(0, 'Shape'), (1, 'Div'), (2, 'Slice'), (4, 'Neg'), (5, 'Concat')], @@ -77,7 +77,7 @@ def __call__(self, model): }, ] } - + def _set_attr(new_node_names, ret_old_nodes, model): remove_shape_list = [] for i in range(len(new_node_names)): @@ -88,12 +88,12 @@ def _set_attr(new_node_names, ret_old_nodes, model): attr_slice['ends_with_tensor'] = 1 slice_node = model.get_node_by_name(new_node_names[i][0]) slice_node.attr = attr_slice - + if model.framework_modeling_config['framework'] != 'torch': return model - + pattern_dict = pattern_mapping_config['LlamaRoraryPosEmb'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaRoraryPosEmb", + model, new_node_names, ret_old_nodes = util.pattern_mapping("LlamaRoraryPosEmb", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) @@ -113,7 +113,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): remove_node_list.extend([pattern_nodes_name[0], pattern_nodes_name[1]]) model.remove_nodes(remove_node_list) - # rotate_half pattern for llama + # rotate_half pattern for llama pattern = pattern_mapping_config['LlamaRoraryPosEmb'][2]['patterns']['in'] patterns_nodes_name = util.search_pattern(pattern, model) remove_node_list = [] diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/lower_all_tuples.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/lower_all_tuples.py index d7bc9bab378..5fffd442c28 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/lower_all_tuples.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/lower_all_tuples.py @@ -49,7 +49,7 @@ def __call__(self, model): for i in range(len(dest_node.input_tensors)): if dest_node.input_tensors[i].name == node.output_tensors[0].name: del dest_node.input_tensors[i] - idx = i + idx = i for tensor in node.input_tensors: if node.name in tensor.dest_op: tensor.dest_op.remove(node.name) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias.py index 43a6c70d4c0..767f84c57d4 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias.py @@ -79,7 +79,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): model.nodes[mat_node_idx].attr = attr pattern_dict = pattern_mapping_config['MatMulWithBias'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBias", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBias", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_gelu.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_gelu.py index ce06ff59189..057f8e23adc 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_gelu.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_gelu.py @@ -74,7 +74,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): model.nodes[mat_node_idx].attr = attr pattern_dict = pattern_mapping_config['MatMulWithBiasGelu'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBiasGelu", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBiasGelu", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_relu.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_relu.py index e3a1f390e56..1bcaa2976db 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_relu.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_relu.py @@ -73,7 +73,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): model.nodes[mat_node_idx].attr = attr pattern_dict = pattern_mapping_config['MatMulWithBiasRelu'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBiasRelu", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBiasRelu", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_tanh.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_tanh.py index f6035f6ae45..8292ff9fbb9 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_tanh.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_bias_tanh.py @@ -73,7 +73,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): model.nodes[mat_node_idx].attr = attr pattern_dict = pattern_mapping_config['MatMulWithBiasTanh'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBiasTanh", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithBiasTanh", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose.py index 67e4c57fe53..d6e3e7c5dc5 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose.py @@ -60,7 +60,7 @@ def __call__(self, model): }, 'returns': [0, 1, 2] }, - + { 'patterns': { 'in': [[(0, 'Reorder'), (1, 'Matmul'), (2, 'Reorder'), (3, 'Shape'), (5, 'View')], @@ -86,7 +86,7 @@ def __call__(self, model): }, 'returns': [0, 2, 5] }, - + { 'patterns': { 'in': [[(0, 'Reorder'), (4, 'Matmul')], @@ -176,23 +176,23 @@ def _set_attr(new_node_names, ret_old_nodes, model): if transpose_b: attr['src0_perm'] = transpose_b model.nodes[mat_node_idx].attr = attr - + concat_node = model.get_node_by_name(model.nodes[mat_node_idx].input_tensors[0].source_op[0]) concat1_node = model.get_node_by_name(model.nodes[mat_node_idx].input_tensors[1].source_op[0]) if concat_node.op_type == "Concat": concat_node.attr = OrderedDict({'axis': '3'}) if concat1_node.op_type == "Concat": - concat1_node.attr = OrderedDict({'axis': '3'}) + concat1_node.attr = OrderedDict({'axis': '3'}) pattern_dict = pattern_mapping_config['MatMulWithTranspose'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) pattern_dict = pattern_mapping_config['MatMulWithTranspose'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -208,9 +208,9 @@ def _set_attr(new_node_names, ret_old_nodes, model): if reshape_attr: attr['reshape'] = '-1, ' + str(reshape_attr[-1]) model.nodes[mat_node_idx].attr = attr - - - + + + def _set_attr1(new_node_names, ret_old_nodes, model): for i in range(len(new_node_names)): transpose_a = ret_old_nodes[i][0].attr['dst_perm'] @@ -236,7 +236,7 @@ def _set_attr1(new_node_names, ret_old_nodes, model): if transpose_b: attr['src1_perm'] = transpose_b model.nodes[mat_node_idx].attr = attr - + concat_node = model.get_node_by_name(model.nodes[mat_node_idx].input_tensors[0].source_op[0]) concat1_node = model.get_node_by_name(model.nodes[mat_node_idx].input_tensors[1].source_op[0]) if concat_node.op_type == "Concat": @@ -246,16 +246,16 @@ def _set_attr1(new_node_names, ret_old_nodes, model): concat2 = model.get_node_by_name(concat1_node.input_tensors[1].source_op[0]) # concat2 = model.get_node_by_name(reorder_node1.input_tensors[0].source_op[0]) concat2.attr = OrderedDict({'axis': '3'}) - + pattern_dict = pattern_mapping_config['MatMulWithTranspose'][2] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", pattern_dict, model) if len(new_node_names) != 0: _set_attr1(new_node_names, ret_old_nodes, model) pattern_dict = pattern_mapping_config['MatMulWithTranspose'][3] - model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", + model, new_node_names, ret_old_nodes = util.pattern_mapping("MatMulWithTranspose", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -274,6 +274,6 @@ def _set_attr1(new_node_names, ret_old_nodes, model): model.nodes[mat_node_idx].attr = attr concat_node = model.get_node_by_name(model.nodes[mat_node_idx].input_tensors[1].source_op[0]) concat_node.attr = OrderedDict({'axis': '1'}) - + return model return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose_scale_add.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose_scale_add.py index d794d9a6a18..627c1f0584b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose_scale_add.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/matmul_with_transpose_scale_add.py @@ -65,7 +65,7 @@ def __call__(self, model): if model.framework_modeling_config['framework'] != 'torch': return model - + def _set_attr(node_names, old_nodes, scale, binary_add=True): mat_node_idx = model.get_node_id(node_names[0]) attr = copy.deepcopy(old_nodes[0].attr) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/neox_rotary_pos_emb.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/neox_rotary_pos_emb.py index 7e5cc060c68..a902f1c42f0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/neox_rotary_pos_emb.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/neox_rotary_pos_emb.py @@ -66,7 +66,7 @@ def __call__(self, model): 'in': [[(0, 'Shape'), (1, 'Add')]], }, }, - + { 'patterns': { 'in': [[(0, 'Shape'), (1, 'Div'), (2, 'Slice'), (4, 'Neg'), (5, 'Concat')], @@ -99,7 +99,7 @@ def __call__(self, model): }, ] } - + def _set_attr(new_node_names, ret_old_nodes, model): remove_shape_list = [] for i in range(len(new_node_names)): @@ -121,12 +121,12 @@ def _set_attr(new_node_names, ret_old_nodes, model): mask_tensor = copy.deepcopy(tensor) break slice_node.input_tensors.append(mask_tensor) - + if model.framework_modeling_config['framework'] != 'torch': return model - + pattern_dict = pattern_mapping_config['NeoxRoraryPosEmb'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("NeoxRoraryPosEmb", + model, new_node_names, ret_old_nodes = util.pattern_mapping("NeoxRoraryPosEmb", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) @@ -146,7 +146,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): remove_node_list.extend([pattern_nodes_name[0], pattern_nodes_name[1]]) model.remove_nodes(remove_node_list) - # rotate_half pattern for llama + # rotate_half pattern for llama pattern = pattern_mapping_config['NeoxRoraryPosEmb'][2]['patterns']['in'] patterns_nodes_name = util.search_pattern(pattern, model) remove_node_list = [] @@ -166,7 +166,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): pattern_dict = pattern_mapping_config['NeoxRoraryPosEmb'][3] - model, new_node_names, ret_old_nodes = util.pattern_mapping("NeoxRoraryPosEmb", + model, new_node_names, ret_old_nodes = util.pattern_mapping("NeoxRoraryPosEmb", pattern_dict, model) remove_node_list = [] for i in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings.py index e501804a41b..2461142a044 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings.py @@ -271,7 +271,7 @@ def _remove_assert(pattern, model): for i in range(0, len(pattern_mapping_config['PositionEmbeddings'])-1): pattern_dict = pattern_mapping_config['PositionEmbeddings'][i] - model, new_node_names, ret_old_nodes = util.pattern_mapping("PositionEmbeddings", + model, new_node_names, ret_old_nodes = util.pattern_mapping("PositionEmbeddings", pattern_dict, model) if len(new_node_names) != 0: for j in range(len(new_node_names)): @@ -287,7 +287,7 @@ def _remove_assert(pattern, model): # bert_base_mrpc pattern_dict = pattern_mapping_config['PositionEmbeddings'][-1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("PositionEmbeddings", + model, new_node_names, ret_old_nodes = util.pattern_mapping("PositionEmbeddings", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings_v1.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings_v1.py index 23f8e6665a8..33559a5c3ba 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings_v1.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/position_embeddings_v1.py @@ -36,7 +36,7 @@ def __call__(self, model): # roberta_base { 'patterns': { - 'in': [[(0, 'Equal'), (1, 'Not'), (2, 'Cast'), (3, 'CumSum'), + 'in': [[(0, 'Equal'), (1, 'Not'), (2, 'Cast'), (3, 'CumSum'), (4, 'Add'), (5, 'Mul'), (6, 'Cast'), (7, 'Add'), (8, 'Gather')]], 'out': [[(0, 'PositionIds'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), (4, 'Reshape')]] @@ -78,7 +78,7 @@ def __call__(self, model): # roberta_base int8 { 'patterns': { - 'in': [[(0, 'Equal'), (1, 'Not'), (2, 'Cast'), (3, 'CumSum'), + 'in': [[(0, 'Equal'), (1, 'Not'), (2, 'Cast'), (3, 'CumSum'), (4, 'Mul'), (5, 'Cast'), (6, 'Add'), (7, 'Gather')]], 'out': [[(0, 'PositionIds'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), (4, 'Reshape')]] @@ -135,7 +135,7 @@ def _set_attr(hidden_size, axis, batch_dims, node_names, model): attr4['dst_shape'] = '-1,-1,' + str(hidden_size) attr4['dims'] = '0,1' attr4['mul'] = '1,2' - + position_node_idx = model.get_node_id(node_names[0]) model.nodes[position_node_idx].attr = attr0 diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/quantize_fusion.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/quantize_fusion.py index 2eae493bfc7..9dc52e2b3d6 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/quantize_fusion.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/quantize_fusion.py @@ -49,7 +49,7 @@ def search_quant_fusion(node): if pre_node.input_tensors[0].name in quant_info and len(pre_node.input_tensors) >= 6 \ or (pre_node.op_type == "Softmax") \ or (EXECUTOR_TYPE.get(pre_node.op_type, pre_node.op_type) in \ - ["InnerProduct", "Matmul"] and (not quant_info or is_from_quant)): + ["InnerProduct", "Matmul"] and (not quant_info or is_from_quant)): return (pre_node, True) elif pre_node.op_type == "Reshape": return search_quant_fusion(pre_node) @@ -65,7 +65,7 @@ def search_quant_fusion(node): # fuse quant nodes to previous innerproduct or matmul output dtype to enhance perf for node in model.nodes: if node.op_type == "Quantize": - dtype = node.attr['output_dtype'] + dtype = node.attr['output_dtype'] quant_node, can_fuse = search_quant_fusion(node) if can_fuse: if dtype == 'u8' or dtype == 's8': @@ -113,4 +113,3 @@ def is_lat_model(model, p=None): model.remove_nodes(remove_node_name) return model - \ No newline at end of file diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/remove_range.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/remove_range.py index 47a0be29fc2..e3243497513 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/remove_range.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/remove_range.py @@ -43,22 +43,22 @@ def __call__(self, model): ], }, }, - + { 'patterns': { - 'in': [[(0, 'Cos'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), + 'in': [[(0, 'Cos'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), (4, 'Slice')] ], }, }, { 'patterns': { - 'in': [[(0, 'Sin'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), + 'in': [[(0, 'Sin'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), (4, 'Slice')] ], }, - }, - + }, + ], } @@ -77,11 +77,11 @@ def __call__(self, model): if not (isinstance(node, list)): remove_list.append(node) model.remove_nodes(remove_list) - + pattern = pattern_mapping_config['RemoveRange'][1]['patterns']['in'] patterns_nodes_name = util.search_pattern(pattern, model) once_flag2 = False - + keep_list = [] for pattern_nodes_name in patterns_nodes_name: if once_flag2 == False : @@ -90,18 +90,18 @@ def __call__(self, model): continue remove = model.get_node_by_name(pattern_nodes_name[-2]) keep = model.get_node_by_name(keep_list[-2]) - + next_node = model.get_node_by_name(remove.output_tensors[0].dest_op[0]) keep.output_tensors[0].dest_op.append(next_node.name) next_node.input_tensors[1] = keep.output_tensors[0] for node_name in pattern_nodes_name: if node_name not in keep_list and not (isinstance(node_name, list)): model.remove_nodes([node_name]) - + pattern = pattern_mapping_config['RemoveRange'][2]['patterns']['in'] patterns_nodes_name = util.search_pattern(pattern, model) once_flag2 = False - + keep_list = [] for pattern_nodes_name in patterns_nodes_name: if once_flag2 == False : @@ -110,12 +110,12 @@ def __call__(self, model): continue remove = model.get_node_by_name(pattern_nodes_name[-2]) keep = model.get_node_by_name(keep_list[-2]) - + next_node = model.get_node_by_name(remove.output_tensors[0].dest_op[0]) keep.output_tensors[0].dest_op.append(next_node.name) next_node.input_tensors[1] = keep.output_tensors[0] for node_name in pattern_nodes_name: if node_name not in keep_list and not (isinstance(node_name, list)): model.remove_nodes([node_name]) - + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/removeslice.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/removeslice.py index f4a0e9f7d87..db340270676 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/removeslice.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/removeslice.py @@ -35,7 +35,7 @@ def __call__(self, model): return model remove_list = [] - first_one = False + first_one = False output_tensor = None for node in model.nodes: if node.op_type == 'SliceMask': @@ -48,9 +48,9 @@ def __call__(self, model): if next_node.input_tensors[i].name == node.output_tensors[0].name: next_node.input_tensors[i] = output_tensor output_tensor.dest_op.append(model.get_node_by_name(node.output_tensors[0].dest_op[0]).name) - + remove_list.append(node.name) - + model.remove_nodes(remove_list) return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_after_restore_hidden_states.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_after_restore_hidden_states.py index 1f594afa5df..c5025906fbe 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_after_restore_hidden_states.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_after_restore_hidden_states.py @@ -47,7 +47,7 @@ def __call__(self, model): 'node_names': { 0: 0, 1: 'reshape_to_2d_before_inner_product_in_last_restoration', - 2: 1, + 2: 1, }, 'input_tensors': { 0: [[{0: [0]}, {0: [1]}, {0: [2]}], [[0, 1, 2], 3]], @@ -67,14 +67,14 @@ def __call__(self, model): def _set_attr(se_attr, mat_attr, hidden_size, node_names, model): attr1 = OrderedDict() attr1['dst_shape'] = '-1,' + str(hidden_size) - + scatter_elements_node_idx = model.get_node_id(node_names[0]) model.nodes[scatter_elements_node_idx].attr = se_attr reshape_2d_node_idx = model.get_node_id(node_names[1]) model.nodes[reshape_2d_node_idx].attr = attr1 mat_node_idx = model.get_node_id(node_names[2]) model.nodes[mat_node_idx].attr = mat_attr - + # minilmv2-lat-roberta pattern_dict = pattern_mapping_config['ReshapeAfterRestoreHiddenStates'][0] model, new_node_names, ret_old_nodes = util.pattern_mapping( @@ -89,7 +89,7 @@ def _set_attr(se_attr, mat_attr, hidden_size, node_names, model): mat_node.input_tensors[0].name = ret_old_nodes[i][1].input_tensors[0].name reshape_node.output_tensors[0].name = mat_node.input_tensors[0].name _set_attr(se_attr, mat_attr, hidden_size, new_node_names[i], model) - + return model - - return model \ No newline at end of file + + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_before_restore_hidden_states.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_before_restore_hidden_states.py index 3766db1f2a6..3d1413ba7b8 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_before_restore_hidden_states.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/reshape_before_restore_hidden_states.py @@ -67,14 +67,14 @@ def _set_attr(ln_attr, se_attr, hidden_size, node_names, model): attr1 = OrderedDict() attr1['dst_shape'] = '-1,-1,' + str(hidden_size) attr1['dims'] = 0 - + ln_node_idx = model.get_node_id(node_names[0]) model.nodes[ln_node_idx].attr = ln_attr reshape_3d_node_idx = model.get_node_id(node_names[1]) model.nodes[reshape_3d_node_idx].attr = attr1 scatter_elements_node_idx = model.get_node_id(node_names[2]) model.nodes[scatter_elements_node_idx].attr = se_attr - + # minilmv2-lat-roberta layer_norm_idx = [] remove_list = [] @@ -95,11 +95,11 @@ def _set_attr(ln_attr, se_attr, hidden_size, node_names, model): ln_node = copy.deepcopy(model.get_node_by_name(new_node_names[i][0])) model.remove_nodes([new_node_names[i][0]]) model.insert_nodes(layer_norm_idx[i] + i, [ln_node]) - + remove_list.append(new_node_names[i][0]) - - + + # model.remove_nodes(remove_list) return model - - return model \ No newline at end of file + + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rms_norm.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rms_norm.py index 654ce4a6372..93250daf89c 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rms_norm.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rms_norm.py @@ -69,14 +69,14 @@ def _set_attr(epsilon, node_names, model): attr['epsilon'] = float(epsilon.input_tensors[1].data) ln_node_idx = model.get_node_id(node_names[0]) model.nodes[ln_node_idx].attr = attr - + if len(model.nodes[ln_node_idx].input_tensors) == 2: hidden_size = model.nodes[ln_node_idx].input_tensors[1].data.shape[0] model.add_config_item("hidden_size", hidden_size) # import pdb;pdb.set_trace() pattern_dict = pattern_mapping_config['RmsNorm'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("RmsNorm", + model, new_node_names, ret_old_nodes = util.pattern_mapping("RmsNorm", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rotary_pos_emb.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rotary_pos_emb.py index 8d54ca5a69e..5d16fa1a6e4 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rotary_pos_emb.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/rotary_pos_emb.py @@ -64,7 +64,7 @@ def __call__(self, model): }, { 'patterns': { - 'in': [[(0, 'Add'), (1, 'Slice'), (2, 'Unsqueeze'), + 'in': [[(0, 'Add'), (1, 'Slice'), (2, 'Unsqueeze'), (3, 'Slice')] ], 'out': [[(0, 'Reshape'), (1, 'Slice')]] @@ -96,7 +96,7 @@ def __call__(self, model): { 'patterns': { - 'in': [[(0, 'Slice'), (1, 'Slice'), (2, 'Slice'), (3, 'Slice'), + 'in': [[(0, 'Slice'), (1, 'Slice'), (2, 'Slice'), (3, 'Slice'), (4, 'Neg'), (5, 'Stack'), (6, 'Flatten')] ], 'out': [[(0, 'Slice'), (1, 'Mul'), (2, 'Reshape'), (3, 'Concat'), (4, 'Reshape')]] @@ -131,7 +131,7 @@ def __call__(self, model): }, 'returns': [0, 3, 5] }, - + { 'patterns': { 'in': [[(0, 'Shape'), (2, 'Add'), (3, 'Arange')], @@ -175,15 +175,15 @@ def _set_attr(new_node_names, ret_old_nodes, model): shape = [2], dtype="int32") gather_node.input_tensors.insert(0, idx_tensor) - + # batch_dims: 0 - + pattern_dict = pattern_mapping_config['RoraryPosEmb'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("RoraryPosEmb", + model, new_node_names, ret_old_nodes = util.pattern_mapping("RoraryPosEmb", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) - + def _set_attr1(new_node_names, ret_old_nodes, model): remove_shape_list = [] @@ -198,9 +198,9 @@ def _set_attr1(new_node_names, ret_old_nodes, model): slice_node.attr = OrderedDict({"starts_with_tensor" : "1", "ends_add" : "1", "axes" : "1", "steps" : "1"}) model.remove_nodes(remove_shape_list) - + pattern_dict = pattern_mapping_config['RoraryPosEmb'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("RoraryPosEmb", + model, new_node_names, ret_old_nodes = util.pattern_mapping("RoraryPosEmb", pattern_dict, model) if len(new_node_names) != 0: _set_attr1(new_node_names, ret_old_nodes, model) @@ -227,12 +227,12 @@ def _set_attr2(new_node_names, ret_old_nodes, model): concat_node.attr = OrderedDict({'axis': '4'}) reshape_node = model.get_node_by_name(new_node_names[i][2]) reshape_node.attr = OrderedDict({'unsqueeze': '-1'}) - + reshape_output = Tensor(name=concat_node.input_tensors[1].name + "_reshape", source_op=[concat_node.name + "_reshape"], dest_op=[concat_node.name], dtype=concat_node.input_tensors[1].dtype) - + reshape_op = util.construct_node( node_name=concat_node.name + "_reshape", op_type='Reshape', @@ -243,13 +243,13 @@ def _set_attr2(new_node_names, ret_old_nodes, model): concat_node.input_tensors[1] = reshape_output insert_idx = model.get_node_id(new_node_names[i][3]) model.insert_nodes(insert_idx, [reshape_op]) - + last_reshape_node = model.get_node_by_name(new_node_names[i][4]) last_reshape_node.attr = OrderedDict({'mul': '3, 4'}) pattern_dict = pattern_mapping_config['RoraryPosEmb'][2] - model, new_node_names, ret_old_nodes = util.pattern_mapping("RoraryPosEmb", + model, new_node_names, ret_old_nodes = util.pattern_mapping("RoraryPosEmb", pattern_dict, model) if len(new_node_names) != 0: _set_attr2(new_node_names, ret_old_nodes, model) - + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/slicemask.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/slicemask.py index faae1ef5843..99bd4930248 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/slicemask.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/slicemask.py @@ -36,7 +36,7 @@ def __call__(self, model): 'SliceMask': [ { 'patterns': { - 'in': [[(0, 'Shape'), (2, 'Sub'), (3, 'Int'), (6, 'Slice'), + 'in': [[(0, 'Shape'), (2, 'Sub'), (3, 'Int'), (6, 'Slice'), (7, 'Slice'), (8, 'Where'), (9, 'Add')], [(), (1, 'Shape'), (2, 'Sub')], [(), (4, 'Slice'), (5, 'Slice'), (6, 'Slice')] @@ -70,10 +70,10 @@ def __call__(self, model): }, 'returns': [] }, - + { 'patterns': { - 'in': [[(0, 'Shape'), (2, 'Sub'), (3, 'Slice'), (4, 'Slice'), + 'in': [[(0, 'Shape'), (2, 'Sub'), (3, 'Slice'), (4, 'Slice'), (5, 'Where')], [(), (1, 'Shape'), (2, 'Sub')], ], @@ -118,7 +118,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): model.nodes[binary_node_idx].attr = attr slice_node = model.get_node_by_name(new_node_names[i][0]) import numpy as np - + slice_node.input_tensors[0].data = np.array(slice_node.input_tensors[0].data, dtype=np.float32) slice_node.input_tensors[0].data.dtype = np.float32 slice_node.input_tensors[0].data[np.where(slice_node.input_tensors[0].data==0)] = -10000 @@ -135,7 +135,7 @@ def _set_attr(new_node_names, ret_old_nodes, model): attr_slice2['steps'] = 1 model.get_node_by_name(new_node_names[i][1]).attr = attr_slice2 pattern_dict = pattern_mapping_config['SliceMask'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("SliceMask", + model, new_node_names, ret_old_nodes = util.pattern_mapping("SliceMask", pattern_dict, model) if len(new_node_names) != 0: _set_attr(new_node_names, ret_old_nodes, model) @@ -148,10 +148,10 @@ def _set_attr1(new_node_names, ret_old_nodes, model): model.nodes[binary_node_idx].attr = attr slice_node = model.get_node_by_name(new_node_names[i][0]) import numpy as np - + slice_node.input_tensors[0].data = np.array(slice_node.input_tensors[0].data, dtype=np.float32) slice_node.input_tensors[0].data.dtype = np.float32 - + slice_node.input_tensors[0].data[np.where(slice_node.input_tensors[0].data==0)] = -1600000 slice_node.input_tensors[0].data[np.where(slice_node.input_tensors[0].data==1)] = 0 attr_slice1 = OrderedDict() @@ -162,10 +162,10 @@ def _set_attr1(new_node_names, ret_old_nodes, model): attr_slice1['steps'] = 1 slice_node.attr = attr_slice1 pattern_dict = pattern_mapping_config['SliceMask'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("SliceMask", + model, new_node_names, ret_old_nodes = util.pattern_mapping("SliceMask", pattern_dict, model) if len(new_node_names) != 0: _set_attr1(new_node_names, ret_old_nodes, model) - - return model \ No newline at end of file + + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_ExplicitNHWCTransposeQAT.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_ExplicitNHWCTransposeQAT.py index 4c43f250220..d632f48220a 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_ExplicitNHWCTransposeQAT.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_ExplicitNHWCTransposeQAT.py @@ -183,7 +183,7 @@ def __call__(self, model): activation_scale = ((activation_max - activation_min) / 255).astype(float) weight_scale = (np.maximum(abs(weight_max), abs(weight_min)) / 128).astype(float) - bias_fp32 = (bias_s32 * activation_scale * weight_scale).astype(np.float32) + bias_fp32 = (bias_s32 * activation_scale * weight_scale).astype(np.float32) compensation = 0 node.input_tensors[2].data = copy.deepcopy((bias_fp32 + compensation).astype(np.float32)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_QuantizeFusion.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_QuantizeFusion.py index 90842eae63b..4a277c90f4a 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_QuantizeFusion.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_QuantizeFusion.py @@ -74,7 +74,7 @@ def search_quant_fusion(node): # fuse quant nodes to previous innerproduct or matmul output dtype to enhance perf for node in model.nodes: if node.op_type == "Quantize": - dtype = node.attr['output_dtype'] + dtype = node.attr['output_dtype'] quant_node, can_fuse = search_quant_fusion(node) if can_fuse: if dtype == 'u8' or dtype == 's8': @@ -132,4 +132,3 @@ def is_lat_model(model, p=None): model.remove_nodes(remove_node_name) return model - \ No newline at end of file diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_insertQuantNode.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_insertQuantNode.py index 4f4fe85369e..48d13d0ef6f 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_insertQuantNode.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/stableDiffusion_insertQuantNode.py @@ -96,9 +96,9 @@ def __call__(self, model): model.change_node_input_tensors(node.name, insert_offset + 2 * idx + 3, quant_min, 'insert') model.change_node_input_tensors(node.name, insert_offset + 2 * idx + 4, - quant_max, 'insert') + quant_max, 'insert') if "output" in quant_info[input_name][2]: - + #import pdb;pdb.set_trace() output_name = node.output_tensors[0].name quant_min = Tensor( @@ -109,7 +109,7 @@ def __call__(self, model): quant_max = Tensor( name=output_name + "_max", shape=[quant_info[input_name][4].size], - data=np.array(quant_info[input_name][4].astype("float32")), + data=np.array(quant_info[input_name][4].astype("float32")), dtype="fp32") # insert output min and max tensor model.change_node_input_tensors(node.name, insert_offset + 7, @@ -169,7 +169,7 @@ def __call__(self, model): quant_max = Tensor( name=output_name + "_max", shape=[quant_info[input_name][4].size], - data=np.array(quant_info[input_name][4].astype("float32")), + data=np.array(quant_info[input_name][4].astype("float32")), dtype="fp32") # insert output min and max tensor model.change_node_input_tensors(node.name, insert_offset + 7, @@ -209,5 +209,5 @@ def __call__(self, model): for remove_idx in remove_tensors_list: model.change_node_input_tensors(node.name, remove_tensors_list[0], None, 'remove') - + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/start_end_logits.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/start_end_logits.py index 1dd6305294e..b859c0fd8b1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/start_end_logits.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/start_end_logits.py @@ -41,11 +41,11 @@ def __call__(self, model): [[(0, 'Split'), (1, 'Squeeze')]], ] } - + # tf has reshape operations before split logits, so just remove this split things. # But in onnx, MatMul have implicit broadcasting mechanism, like [M K N] * [N K]. - # So before splitting, it has not reshape operations. We need to insert reshape op - # when we remove the split pattern in onnx since engine emits tensor of size + # So before splitting, it has not reshape operations. We need to insert reshape op + # when we remove the split pattern in onnx since engine emits tensor of size # [bs*seq_len, hidden_size] like tf. for i in range(len(patterns['StartEndLogits'])): in_pattern = patterns['StartEndLogits'][i] @@ -64,7 +64,7 @@ def __call__(self, model): last_dim = pre_first_node.input_tensors[1].shape[-1] attr = OrderedDict({'dst_shape': '-1,-1,' + str(last_dim), 'dims': '0,1'}) - reshape_node = util.construct_node(first_node.name, 'Reshape', + reshape_node = util.construct_node(first_node.name, 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors, attr=attr) model.insert_nodes(first_node_idx, [reshape_node]) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/subgraph_matcher.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/subgraph_matcher.py index 12aa3a73783..e8eb5f87e8f 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/subgraph_matcher.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/subgraph_matcher.py @@ -38,7 +38,7 @@ "Conv": "Convolution", "QuantizedMatMulWithBiasAndDequantize": "InnerProduct", "TransposeBatchMatMul": "Matmul", - "MatmulwithTranspose" : "Matmul", + "MatmulwithTranspose" : "Matmul", "BatchMatMul": "Matmul", "BatchMatMulV2": "Matmul", "Add": "BinaryAdd", @@ -136,7 +136,7 @@ 'StableDiffusion_InsertQuantNode': False, 'StableDiffusion_CollectQuantInfo': False, 'StableDiffusion_QuantizeFusion': False, - + #GPT-J 'TorchEmbedding': True, 'InnerproductReshapeFusion': True, @@ -150,7 +150,7 @@ 'RemoveSlice': True, 'RemoveRange': True, 'RemoveLastView': True, - + 'MatMulWithTransposeScaleAdd': True, 'EmbeddingsTo2DBeforeInnerProduct': True, 'QuantGatherToBF16': False, @@ -158,7 +158,7 @@ 'MultiHeadAttention': True, 'Int8BF16MixedPrecisionChecker': False, 'QuantizedGraphDtypeRefactor': True, - + #LLAMA 'LlamaEmbeddings': False, 'LlamaMatMulWithTranspose': False, @@ -174,7 +174,7 @@ class SubGraphMatcher(object): """The SubGraphMatcher class.""" def __call__(self, model, tune = False, pattern_config = None): """The __call__ function of SubGraphMatcher class.""" - logger.info('Start to implement Sub-Graph matching and replacing...') + logger.info('Start to implement Sub-Graph matching and replacing...') if tune: model = self._tune_patterns(model) else: @@ -185,7 +185,7 @@ def __call__(self, model, tune = False, pattern_config = None): def _fuse_patterns(self, model, supported_patterns=supported_patterns, pattern_mask=None, pattern_config=None): pattern_mask = [True for _ in range(len(supported_patterns))] \ if pattern_mask == None else pattern_mask - + for index in range(len(supported_patterns)): pattern_name = supported_patterns[index] if pattern_name in pattern_default_setting: @@ -207,7 +207,7 @@ def _fuse_patterns(self, model, supported_patterns=supported_patterns, pattern_m return model def _tune_patterns(self, model, iterations = 10, warm_up = 5): - # pattern tuning strategy(for superbert): + # pattern tuning strategy(for superbert): # 1. only one pattern off/on each time (pruning) # 2. check accuracy with framework # 3. and only save min latency config @@ -239,8 +239,8 @@ def _tune_patterns(self, model, iterations = 10, warm_up = 5): if off_latency < on_latency and off_latency < min_latency: min_latency = off_latency pattern_mask = off_pattern_mask - - # generate model according pattern mask + + # generate model according pattern mask self._fuse_patterns(model, all_patterns, pattern_mask) logger.info('End tuning pattern...') return model @@ -257,9 +257,8 @@ def _remove_identity(self, model): if node.op_type == "Cos": node.attr = OrderedDict({'algorithm': 'cos'}) if node.op_type == "Sin": - node.attr = OrderedDict({'algorithm': 'sin'}) + node.attr = OrderedDict({'algorithm': 'sin'}) op_type = EXECUTOR_TYPE[node.op_type] model.nodes[i].op_type = op_type model.remove_nodes(rm_node_names) return model - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/textEncoder_QReshape.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/textEncoder_QReshape.py index e45e3607d7d..8228db63261 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/textEncoder_QReshape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/textEncoder_QReshape.py @@ -104,7 +104,7 @@ def __call__(self, model): attr['dst_shape'] = ','.join(attr['dst_shape']) attr['dims'] = '0,1' - + reshape_node_idx = model.get_node_id(new_node_names[j][0]) model.nodes[reshape_node_idx].attr = attr diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings.py index 0eb16a09cc3..a31643209d3 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings.py @@ -249,7 +249,7 @@ def _set_attr_with_gather(hidden_size, axis, batch_dims, node_names, model): return model pattern_dict = pattern_mapping_config['TokenTypeEmbeddings'][0] - model, new_node_names, ret_old_nodes = util.pattern_mapping("TokenTypeEmbeddings", + model, new_node_names, ret_old_nodes = util.pattern_mapping("TokenTypeEmbeddings", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -291,7 +291,7 @@ def _set_attr_with_onehot(hidden_size, one_hot_attr, mat_attr, node_names, model # bert_base onehot+matmul embeddings pattern_dict = pattern_mapping_config['TokenTypeEmbeddings'][1] - model, new_node_names, ret_old_nodes = util.pattern_mapping("TokenTypeEmbeddings", + model, new_node_names, ret_old_nodes = util.pattern_mapping("TokenTypeEmbeddings", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): @@ -315,7 +315,7 @@ def _set_attr_with_onehot(hidden_size, one_hot_attr, mat_attr, node_names, model # geminet pattern_dict = pattern_mapping_config['TokenTypeEmbeddings'][2] - model, new_node_names, ret_old_nodes = util.pattern_mapping("TokenTypeEmbeddings", + model, new_node_names, ret_old_nodes = util.pattern_mapping("TokenTypeEmbeddings", pattern_dict, model) if len(new_node_names) != 0: for i in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings_v1.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings_v1.py index ceec41a1249..0fec2b11e00 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings_v1.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/token_type_embeddings_v1.py @@ -38,11 +38,11 @@ def __call__(self, model): { 'patterns': { 'in': [[(2, 'Shape'), (3, 'Gather'), (7, 'Unsqueeze'), (8, 'Concat'), - (9, 'Reshape'), (10, 'Shape'), (11, 'ConstantOfShape'), + (9, 'Reshape'), (10, 'Shape'), (11, 'ConstantOfShape'), (14, 'Where'), (15, 'Expand'), (16, 'Gather')], [(), (0, 'Shape'), (1, 'Gather'), (6, 'Unsqueeze'), (8, 'Concat')], [(3, 'Gather'), (4, 'Unsqueeze'), (5, 'Slice'), (15, 'Expand')], - [(11, 'ConstantOfShape'), (12, 'Mul'), (13, 'Equal'), + [(11, 'ConstantOfShape'), (12, 'Mul'), (13, 'Equal'), (14, 'Where')]], 'out': [[(0, 'TokenTypeIds'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), (4, 'Reshape')]] @@ -88,7 +88,7 @@ def __call__(self, model): (8, 'Reshape'), (10, 'Where'), (11, 'Expand'), (12, 'Gather')], [(0, 'Shape'), (1, 'Gather'), (6, 'Unsqueeze'), (7, 'Concat')], [(2, 'Gather'), (4, 'Unsqueeze'), (5, 'Slice'), (11, 'Expand')], - [(8, 'Reshape'), (9, 'Equal'), + [(8, 'Reshape'), (9, 'Equal'), (10, 'Where')]], 'out': [[(0, 'TokenTypeIds'), (1, 'Reshape'), (2, 'Gather'), (3, 'Reshape'), (4, 'Reshape')]] @@ -145,7 +145,7 @@ def _set_attr(hidden_size, axis, batch_dims, node_names, model): attr4['dst_shape'] = '-1,-1,' + str(hidden_size) attr4['dims'] = '0,1' attr4['mul'] = '1,2' - + tokentype_node_idx = model.get_node_id(node_names[0]) slice_data = model.nodes[tokentype_node_idx].input_tensors[1].data model.nodes[tokentype_node_idx].input_tensors[1].data = slice_data.astype(np.int32) @@ -163,7 +163,7 @@ def _set_attr(hidden_size, axis, batch_dims, node_names, model): reshape_2_node_idx = model.get_node_id(node_names[4]) model.nodes[reshape_2_node_idx].attr = attr4 - + # roberta_base for i in range(len(pattern_mapping_config['TokenTypeEmbeddingsV1'])): pattern_dict = pattern_mapping_config['TokenTypeEmbeddingsV1'][i] @@ -177,5 +177,5 @@ def _set_attr(hidden_size, axis, batch_dims, node_names, model): batch_dims = gatherv2_node.attr['batch_dims'] _set_attr(hidden_size, axis, batch_dims, new_node_names[i], model) - - return model \ No newline at end of file + + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torch_unpack_baddbmm.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torch_unpack_baddbmm.py index c09ba34b7e7..6934be80a02 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torch_unpack_baddbmm.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torch_unpack_baddbmm.py @@ -86,4 +86,4 @@ def _set_attr(new_node_names, ret_old_nodes, model): model, new_node_names, ret_old_nodes = \ util.pattern_mapping('TorchUnpackBaddbmm', pattern_dict, model) _set_attr(new_node_names, ret_old_nodes, model) - return model \ No newline at end of file + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torchpaddingsquence.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torchpaddingsquence.py index 6e913ffb586..f5d055ed142 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torchpaddingsquence.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/torchpaddingsquence.py @@ -99,7 +99,7 @@ def get_hidden_size(model, p=None, mat_idx=0): # import pdb;pdb.set_trace() hidden_size = 4096 #hidden_size = int(mat_node.input_tensors[1].shape[-1]) - + #hidden_size = 768 else: hidden_size = -1 diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_FFNSlice.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_FFNSlice.py index 532b75b29d3..a0cd8373404 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_FFNSlice.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_FFNSlice.py @@ -69,4 +69,4 @@ def __call__(self, model): attr['steps'] = 1 slice_node.attr = attr - return model \ No newline at end of file + return model diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_encoderHiddenStatesReshape.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_encoderHiddenStatesReshape.py index e212b6304e6..01d6613f214 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_encoderHiddenStatesReshape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transformer2Dmodel_encoderHiddenStatesReshape.py @@ -90,7 +90,7 @@ def __call__(self, model): matmul_node.input_tensors[0] = new_node.output_tensors[0] all_dest_op.append(matmul_node.name) - + # only insert one node to reshape the encoder_hidden_satates. new_node.output_tensors[0].dest_op = all_dest_op assert first_matmul_node_idx != -1 @@ -139,7 +139,7 @@ def __call__(self, model): matmul_node.input_tensors[0] = new_node.output_tensors[0] all_dest_op.append(matmul_node.name) - + # only insert one node to reshape the encoder_hidden_satates. new_node.output_tensors[0].dest_op = all_dest_op assert first_matmul_node_idx != -1 diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transpose_batch_matmul.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transpose_batch_matmul.py index 23be0333a3c..e38feb326e1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transpose_batch_matmul.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/transpose_batch_matmul.py @@ -382,7 +382,7 @@ def _adj_perm(src_perm): for i in range(0, len(pattern_mapping_config['TransposeBatchMatMul'])-5): pattern_dict = pattern_mapping_config['TransposeBatchMatMul'][i] - model, new_node_names, ret_old_nodes = util.pattern_mapping("TransposeBatchMatMul", + model, new_node_names, ret_old_nodes = util.pattern_mapping("TransposeBatchMatMul", pattern_dict, model) if len(new_node_names) != 0: for j in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/word_embeddings.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/word_embeddings.py index aa60a999f68..b2c32080cb3 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/word_embeddings.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/sub_graph/word_embeddings.py @@ -276,7 +276,7 @@ def _set_attr(hidden_size, axis, batch_dims, node_names, model, batch_idx=0): return model for i in range(len(pattern_mapping_config['WordEmbeddings'])): pattern_dict = pattern_mapping_config['WordEmbeddings'][i] - model, new_node_names, ret_old_nodes = util.pattern_mapping("WordEmbeddings", + model, new_node_names, ret_old_nodes = util.pattern_mapping("WordEmbeddings", pattern_dict, model) if len(new_node_names) != 0: for j in range(len(new_node_names)): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/tf_utils.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/tf_utils.py index c41a5424d59..db509969aa4 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/tf_utils.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/tf_utils.py @@ -166,8 +166,8 @@ def tf_extract_operator(node, framework_model, nodes_dict, engine_graph=None): input_tensors.append(input_tensor) """ output_tensors - Almost every op generate one tensor in deep learning, so we just give one tensor in - output_tensors (list). However, this tensor maybe delivered to several nodes, so the + Almost every op generate one tensor in deep learning, so we just give one tensor in + output_tensors (list). However, this tensor maybe delivered to several nodes, so the dest_op should have several strings. """ if node.op not in MULTI_OUTPUT_OP.keys(): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/compile/torch_utils.py b/intel_extension_for_transformers/llm/runtime/deprecated/compile/torch_utils.py index 3554dfe2270..b6943f97165 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/compile/torch_utils.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/compile/torch_utils.py @@ -111,4 +111,3 @@ def torch_extract_operator(node, model, nodes_dict, engine_graph=None): output_tensors.append(output_tensor) return op_maps.get(op_type, op_type), input_tensors, output_tensors - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/docs/operator_register.md b/intel_extension_for_transformers/llm/runtime/deprecated/docs/operator_register.md index 9930ea66f50..eadf866efad 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/docs/operator_register.md +++ b/intel_extension_for_transformers/llm/runtime/deprecated/docs/operator_register.md @@ -127,4 +127,3 @@ After creating the customized operator, finally register it to operator class as ``` REGISTER_OPERATOR_CLASS(Gelu); ``` - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/docs/validated_model.md b/intel_extension_for_transformers/llm/runtime/deprecated/docs/validated_model.md index f3abfab3103..118789b6d03 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/docs/validated_model.md +++ b/intel_extension_for_transformers/llm/runtime/deprecated/docs/validated_model.md @@ -1504,4 +1504,4 @@ Performance varies by use, configuration and other factors. See platform configu 9 - \ No newline at end of file + diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/README.md b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/README.md index c1d72445bcc..5b21d01f692 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/README.md +++ b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/README.md @@ -56,4 +56,4 @@ Refer to corresponding [unit test](../test/gtest/kernels/) for examples. | MHA | Dense | AMX-INT8 | | [Transposed MHA](docs/kernel_desc/kernel_transpose_mha.md) | Sparse | AMX-INT8, VNNI | | Transposed Layernorm | Sparse | AVX512F | -| [Dynamic Quant Matmul](docs/kernel_desc/kernel_dynamic_quant_matmul.md) | Dense | AMX-INT8 | \ No newline at end of file +| [Dynamic Quant Matmul](docs/kernel_desc/kernel_dynamic_quant_matmul.md) | Dense | AMX-INT8 | diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/gpu/sparse_gemm_gpu.md b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/gpu/sparse_gemm_gpu.md index b2db2ca3fa0..3ca4a982f2d 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/gpu/sparse_gemm_gpu.md +++ b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/gpu/sparse_gemm_gpu.md @@ -336,4 +336,4 @@ __kernel void gemm(const int M, const int N, const int K, } } } -``` \ No newline at end of file +``` diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_dynamic_quant_matmul.md b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_dynamic_quant_matmul.md index ca204325bf8..8e563061a67 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_dynamic_quant_matmul.md +++ b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/kernel_desc/kernel_dynamic_quant_matmul.md @@ -59,4 +59,4 @@ Finally, we can intuitively feel the performance benefits of kernel executing on
![perf_chat](../imgs/kernel_dynamic_quant_matmul_perf_chat.png) -
\ No newline at end of file + diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/profiling.md b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/profiling.md index de3a0977d64..e3299f95af1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/profiling.md +++ b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/docs/profiling.md @@ -167,4 +167,3 @@ XDIS 2a5: RET BASE C3 ret ``` - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/README.md b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/README.md index f1530316772..ee1ffa4a3f1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/README.md +++ b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/README.md @@ -12,5 +12,3 @@ You have to prepare a model.bin file and its conf.yaml file for neural engine mo ### 3. Analysis results You could see visualization one of results as below hotmap figure after running above command in the same directory, and it could generate JPG format pictures for each weight tensor, the name of pictures corresponding with tensor name. ![Sample figure](sample_figure.jpg) - - diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/requirements.txt b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/requirements.txt index 120bc061201..9aa3581de8b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/requirements.txt +++ b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/requirements.txt @@ -1,4 +1,4 @@ matplotlib numpy -torch~=1.13 pyyaml +torch~=1.13 diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/sparsity_all.py b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/sparsity_all.py index e60fb540098..a2329dba547 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/sparsity_all.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/kernels/scripts/sparsity_all.py @@ -50,7 +50,7 @@ def cal_sparse_pytorch(tensor, block): type=int, default=0, help="modeltype: 0 pytorch model, 1 nn model" - + ) parser.add_argument( '--path', @@ -64,13 +64,13 @@ def cal_sparse_pytorch(tensor, block): print("-"*100+"visualize pytorch model"+"-"*100) print("model path:"+str(args.path)) model_file='' - for root, dirs, files in os.walk(args.path): + for root, dirs, files in os.walk(args.path): for file_name in files: #search .bin file if '.bin' in file_name: model_file=file_name if model_file=='': - assert "Must contain .bin file in this directory!" - path_model=args.path+"/"+model_file + assert "Must contain .bin file in this directory!" + path_model=args.path+"/"+model_file checkpoint=torch.load(path_model,map_location="cpu") offset_path=args.path+"/pytorch_model_hotmaps/" if not os.path.exists(offset_path): @@ -81,7 +81,7 @@ def cal_sparse_pytorch(tensor, block): for name in checkpoint: if "weight" in name and len((checkpoint[name]).shape)>1: #and name.find("weight") > 0 tensor = checkpoint[name] - tensor_size = int(tensor.numel()) #get number of element + tensor_size = int(tensor.numel()) #get number of element elt_zero = tensor_size - tensor.nonzero().size(0) elt_sparse = elt_zero / tensor_size if len(tensor.size()) > 1: @@ -106,7 +106,7 @@ def cal_sparse_pytorch(tensor, block): print(name, elt_zero, elt_sparse, tensor.size()) f.write("{}:{},{}\t{}".format(name, elt_zero, elt_sparse, tensor.size())) - + if args.modeltype==1: print("-"*100+"visualize nn model"+"-"*100) print("model path:"+str(args.path)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/gtest/kernels/FindISA.cmake b/intel_extension_for_transformers/llm/runtime/deprecated/test/gtest/kernels/FindISA.cmake index 7d6745a3f98..caa8573aa76 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/gtest/kernels/FindISA.cmake +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/gtest/kernels/FindISA.cmake @@ -47,10 +47,10 @@ endmacro() macro(check_isa isa) if(WIN32) - string(TOUPPER "${isa}" UPPER_ISA) + string(TOUPPER "${isa}" UPPER_ISA) check_isa_win("${UPPER_ISA}") endif() - if(UNIX) + if(UNIX) check_isa_unix("${isa}") endif() endmacro() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/requirements.txt b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/requirements.txt index e577ddd6a67..22240c8f888 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/requirements.txt +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/requirements.txt @@ -1,8 +1,8 @@ +datasets +intel-extension-for-pytorch==2.0.0 +intel-tensorflow==2.12.0 onnx onnxruntime -datasets -transformers onnxruntime_extensions torch==2.0 -intel-tensorflow==2.12.0 -intel-extension-for-pytorch==2.0.0 +transformers diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_add_embeddings.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_add_embeddings.py index cd2baccf7e8..cacdf7e76de 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_add_embeddings.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_add_embeddings.py @@ -39,20 +39,20 @@ def test_add_embeddings_with_seq_len_first(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) add_node = OPERATORS['Add']() input_tensors = [Tensor(name='add_src0'), Tensor(name='add_src1')] output_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=['transpose'])] - add_node.construct('add', 'Add', input_tensors=input_tensors, + add_node.construct('add', 'Add', input_tensors=input_tensors, output_tensors=output_tensors) - + transpose_node = OPERATORS['Transpose']() input_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=['transpose'])] output_tensors = [Tensor(name='transpose:0', source_op=['transpose'], dest_op=['layernorm'])] - transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, + transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'src_perm': '0,1,2', 'dst_perm': '1,0,2'})) @@ -65,7 +65,7 @@ def test_add_embeddings_with_seq_len_first(self): shape=[1024])] output_tensors = [Tensor(name='layernorm:0', source_op=['layernorm'], dest_op=[])] - ln_node.construct('layernorm', 'LayerNorm', input_tensors=input_tensors, + ln_node.construct('layernorm', 'LayerNorm', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({'epsilon': 0.009})) graph.insert_nodes(len(graph.nodes), [input_data_node, add_node, transpose_node, ln_node]) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_binary_op.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_binary_op.py index aed849ebd3f..c00921289e6 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_binary_op.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_binary_op.py @@ -70,4 +70,4 @@ def test_binary_op(self): self.assertEqual(id_0, id_1) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_cast_to.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_cast_to.py index c1280986b96..88aef522ef0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_cast_to.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_cast_to.py @@ -39,20 +39,20 @@ def test_softmax_cast(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) softmax_node = OPERATORS['Softmax']() input_tensors = [Tensor(name='s_src0')] output_tensors = [Tensor(name='softmax:0', source_op=['softmax'], dest_op=['cast'])] - softmax_node.construct('softmax', 'Softmax', input_tensors=input_tensors, + softmax_node.construct('softmax', 'Softmax', input_tensors=input_tensors, output_tensors=output_tensors) - + cast_node = OPERATORS['Cast']() input_tensors = [Tensor(name='softmax:0', source_op=['softmax'], dest_op=['cast'])] output_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=[])] - cast_node.construct('cast', 'Cast', input_tensors=input_tensors, + cast_node.construct('cast', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'DstT': 'fp32'})) @@ -67,20 +67,20 @@ def test_greater_cast_reducesum(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) greater_node = OPERATORS['Greater']() input_tensors = [Tensor(name='g_src0'), Tensor(name='g_src1', data=np.array([1]).astype("int64"), shape=[1])] output_tensors = [Tensor(name='greater:0', source_op=['greater'], dest_op=['cast'])] - greater_node.construct('greater', 'Greater', input_tensors=input_tensors, + greater_node.construct('greater', 'Greater', input_tensors=input_tensors, output_tensors=output_tensors) cast_node = OPERATORS['Cast']() input_tensors = [Tensor(name='greater:0', source_op=['greater'], dest_op=['cast'])] output_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['reducesum'])] - cast_node.construct('cast', 'Cast', input_tensors=input_tensors, + cast_node.construct('cast', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'DstT': 'int64'})) @@ -88,7 +88,7 @@ def test_greater_cast_reducesum(self): input_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['reducesum'])] output_tensors = [Tensor(name='reducesum:0', source_op=['reducesum'], dest_op=[])] - rs_node.construct('reducesum', 'ReduceSum', input_tensors=input_tensors, + rs_node.construct('reducesum', 'ReduceSum', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, greater_node, cast_node, rs_node]) @@ -102,20 +102,20 @@ def test_range_cast_less(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) cast_node = OPERATORS['Cast']() input_tensors = [Tensor(name='c_src0')] output_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['range'])] - cast_node.construct('cast', 'Cast', input_tensors=input_tensors, + cast_node.construct('cast', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'DstT': 'int64'})) range_node = OPERATORS['Range']() input_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['range'])] output_tensors = [Tensor(name='range:0', source_op=['range'], dest_op=['less'])] - range_node.construct('range', 'Range', input_tensors=input_tensors, + range_node.construct('range', 'Range', input_tensors=input_tensors, output_tensors=output_tensors) less_node = OPERATORS['Less']() @@ -123,7 +123,7 @@ def test_range_cast_less(self): Tensor(name='less_val', data=np.array([3]).astype("int64"))] output_tensors = [Tensor(name='less:0', source_op=['less'], dest_op=[])] - less_node.construct('less', 'Less', input_tensors=input_tensors, + less_node.construct('less', 'Less', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, cast_node, range_node, less_node]) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_decoder_attn_reshape.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_decoder_attn_reshape.py index e45f8957874..fcd37d8bbd7 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_decoder_attn_reshape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_decoder_attn_reshape.py @@ -39,54 +39,54 @@ def test_decoder_attn_reshape(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) unsqueeze1_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='unsqueeze1_src0')] output_tensors = [Tensor(name='unsqueeze1:0', source_op=['unsqueeze1'], dest_op=['concat'])] - unsqueeze1_node.construct('unsqueeze1', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze1_node.construct('unsqueeze1', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) shape1_node = OPERATORS['Shape']() input_tensors = [Tensor(name='s1_src0')] output_tensors = [Tensor(name='shape1:0', source_op=['shape1'], dest_op=['gather1'])] - shape1_node.construct('shape1', 'Shape', input_tensors=input_tensors, + shape1_node.construct('shape1', 'Shape', input_tensors=input_tensors, output_tensors=output_tensors) gather1_node = OPERATORS['Gather']() input_tensors = [Tensor(name='shape1:0', source_op=['shape1'], dest_op=['gather1']), Tensor(name='gather1_idx:0', data=np.array([0]).astype("int32"))] output_tensors = [Tensor(name='gather1:0', source_op=['gather1'], dest_op=['unsqueeze2'])] - gather1_node.construct('gather1', 'Gather', input_tensors=input_tensors, + gather1_node.construct('gather1', 'Gather', input_tensors=input_tensors, output_tensors=output_tensors) unsqueeze2_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='gather1:0', source_op=['gather1'], dest_op=['unsqueeze2'])] output_tensors = [Tensor(name='unsqueeze2:0', source_op=['unsqueeze2'], dest_op=['concat'])] - unsqueeze2_node.construct('unsqueeze2', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze2_node.construct('unsqueeze2', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) shape2_node = OPERATORS['Shape']() input_tensors = [Tensor(name='s2_src0')] output_tensors = [Tensor(name='shape2:0', source_op=['shape2'], dest_op=['gather2'])] - shape2_node.construct('shape2', 'Shape', input_tensors=input_tensors, + shape2_node.construct('shape2', 'Shape', input_tensors=input_tensors, output_tensors=output_tensors) gather2_node = OPERATORS['Gather']() input_tensors = [Tensor(name='shape2:0', source_op=['shape2'], dest_op=['gather2']), Tensor(name='gather2_idx:0', data=np.array([0]).astype("int32"))] output_tensors = [Tensor(name='gather2:0', source_op=['gather2'], dest_op=['unsqueeze3'])] - gather2_node.construct('gather2', 'Gather', input_tensors=input_tensors, + gather2_node.construct('gather2', 'Gather', input_tensors=input_tensors, output_tensors=output_tensors) unsqueeze3_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='gather2:0', source_op=['gather2'], dest_op=['unsqueeze3'])] output_tensors = [Tensor(name='unsqueeze3:0', source_op=['unsqueeze3'], dest_op=['concat'])] - unsqueeze3_node.construct('unsqueeze3', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze3_node.construct('unsqueeze3', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) concat_node = OPERATORS['Concat']() @@ -96,27 +96,27 @@ def test_decoder_attn_reshape(self): Tensor(name='unsqueeze3:0', source_op=['unsqueeze3'], dest_op=['concat'])] output_tensors = [Tensor(name='concat:0', source_op=['concat'], dest_op=['reshape'])] - concat_node.construct('concat', 'Concat', input_tensors=input_tensors, + concat_node.construct('concat', 'Concat', input_tensors=input_tensors, output_tensors=output_tensors) reshape_node = OPERATORS['Reshape']() input_tensors = [Tensor(name='concat:0', source_op=['concat'], dest_op=['reshape'])] output_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=['gather3'])] - reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, + reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors) gather3_node = OPERATORS['Gather']() input_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=['gather3']), Tensor(name='gather3_idx:0', data=np.array([0]).astype("int32"))] output_tensors = [Tensor(name='gather3:0', source_op=['gather3'], dest_op=['transpose'])] - gather3_node.construct('gather3', 'Gather', input_tensors=input_tensors, + gather3_node.construct('gather3', 'Gather', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({'axis': 1})) transpose_node = OPERATORS['Transpose']() input_tensors = [Tensor(name='gather3:0', source_op=['gather3'], dest_op=['transpose'])] output_tensors = [Tensor(name='transpose:0', source_op=['transpose'], dest_op=[])] - transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, + transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, unsqueeze1_node, shape1_node, diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_embeddingbag.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_embeddingbag.py index bf6842046ea..77bb90a0de0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_embeddingbag.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_embeddingbag.py @@ -32,37 +32,37 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_embeddingbag_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) squeeze_node = OPERATORS['Squeeze']() - input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), + input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), Tensor(data=np.array(1))] - output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['relu'])] - squeeze_node.construct('matmul', 'Squeeze', input_tensors=input_tensors, + squeeze_node.construct('matmul', 'Squeeze', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'src1_perm': '1,0'})) - + embeddingbag_node = OPERATORS['EmbeddingBag']() - input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['relu']), - Tensor(name='matmul1:0', source_op=['matmul'], + Tensor(name='matmul1:0', source_op=['matmul'], dest_op=['relu']), - Tensor(name='matmul2:0', source_op=['matmul'], + Tensor(name='matmul2:0', source_op=['matmul'], dest_op=['relu'])] output_tensors = [Tensor(name='relu:0', source_op=['relu'], dest_op=[])] - embeddingbag_node.construct('relu', 'EmbeddingBag', input_tensors=input_tensors, + embeddingbag_node.construct('relu', 'EmbeddingBag', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, squeeze_node, embeddingbag_node]) graph = EmbeddingBag()(graph) self.assertEqual(3, len(graph.nodes)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_execution_options.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_execution_options.py index 047b34be526..dcda3a545a0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_execution_options.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_execution_options.py @@ -39,7 +39,7 @@ def test_execution_options(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(name="activation", shape=[-1, -1], dtype="fp32")] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) ip_node = OPERATORS['InnerProduct']() input_tensors = [Tensor(name="activation", shape=[-1, -1], dtype="fp32"), diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_gelu.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_gelu.py index 75e0431565d..b015d5d48ed 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_gelu.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_gelu.py @@ -31,46 +31,46 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_gelu_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) div_node = OPERATORS['Div']() input_tensors = [Tensor()] output_tensors = [Tensor(name='div:0', source_op=['div'], dest_op=['erf'])] - div_node.construct('div', 'Div', input_tensors=input_tensors, + div_node.construct('div', 'Div', input_tensors=input_tensors, output_tensors=output_tensors) - + erf_node = OPERATORS['Erf']() input_tensors = [Tensor(name='div:0', source_op=['div'], dest_op=['erf'])] output_tensors = [Tensor(name='erf:0', source_op=['erf'], dest_op=['add'])] - erf_node.construct('erf', 'Erf', input_tensors=input_tensors, + erf_node.construct('erf', 'Erf', input_tensors=input_tensors, output_tensors=output_tensors) - + add_node = OPERATORS['Add']() input_tensors = [Tensor(name='erf:0', source_op=['erf'], dest_op=['add'])] output_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=['mul_1'])] - add_node.construct('add', 'Add', input_tensors=input_tensors, + add_node.construct('add', 'Add', input_tensors=input_tensors, output_tensors=output_tensors) - + mul_1_node = OPERATORS['Mul']() input_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=['mul_1'])] output_tensors = [Tensor(name='mul_1:0', source_op=['mul_1'], dest_op=['mul_2'])] - mul_1_node.construct('mul_1', 'Mul', input_tensors=input_tensors, + mul_1_node.construct('mul_1', 'Mul', input_tensors=input_tensors, output_tensors=output_tensors) - + mul_2_node = OPERATORS['Mul']() input_tensors = [Tensor(name='mul_1:0', source_op=['mul_1'], dest_op=['mul_2'])] output_tensors = [Tensor(name='mul_2:0', source_op=['mul_2'], dest_op=[])] - mul_2_node.construct('mul_2', 'Mul', input_tensors=input_tensors, + mul_2_node.construct('mul_2', 'Mul', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, div_node, erf_node, add_node, mul_1_node, mul_2_node]) graph = Gelu()(graph) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_graph_dispatch.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_graph_dispatch.py index 3ffcd68f33d..a415bbf56e5 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_graph_dispatch.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_graph_dispatch.py @@ -42,7 +42,7 @@ def test_graph_dispatch(self): input_0 = np.random.uniform(low=0, high=128, size=shape).astype('int32') input_1 = np.random.uniform(low=0, high=1, size=shape).astype('int32') input_2 = np.random.uniform(low=0, high=1, size=shape).astype('int32') - + # validate int8 sparse graph tuning int8_model_path = "/tf_dataset2/inc-ut/nlptoolkit_ut_model/bert_mini_int8_original_IR" if is_win(): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_is_supported_onnx_graph.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_is_supported_onnx_graph.py index 3e4df2646d2..af36efe0265 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_is_supported_onnx_graph.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_is_supported_onnx_graph.py @@ -35,7 +35,7 @@ def tearDownClass(self): def test_is_supported_onnx_graph(self): '''test is_supported_onnx_graph''' # create onnx matmul + bias graph - # input and output + # input and output a = helper.make_tensor_value_info('a', TensorProto.FLOAT, [10, 10]) x = helper.make_tensor_value_info('x', TensorProto.FLOAT, [10, 10]) b = helper.make_tensor_value_info('b', TensorProto.FLOAT, [10, 10]) @@ -49,7 +49,7 @@ def test_is_supported_onnx_graph(self): # test API is_supported = is_supported_onnx_graph(graph) self.assertEqual(is_supported, True) - + '''test is_supported_onnx_node''' ops_type = ["Add", "Softmax", "Slice", "ReduceMean", "Reshape", "Concat", "Gather", "QuantizeLinear", "Transpose", "MatMul", diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_last_layer_shape.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_last_layer_shape.py index 4c394811f06..eb750a1c728 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_last_layer_shape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_last_layer_shape.py @@ -31,47 +31,47 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_last_layer_shape_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) pack_node = OPERATORS['Pack']() input_tensors = [Tensor(), Tensor(), Tensor(data=768)] output_tensors = [Tensor(name='pack:0', source_op=['pack'], dest_op=['reshape'])] - pack_node.construct('pack', 'Pack', input_tensors=input_tensors, + pack_node.construct('pack', 'Pack', input_tensors=input_tensors, output_tensors=output_tensors) - + reshape_node = OPERATORS['Reshape']() input_tensors = [Tensor(name='pack:0', source_op=['pack'], dest_op=['reshape'])] - output_tensors = [Tensor(name='reshape:0', source_op=['reshape'], + output_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=['strided_slice'])] - reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, + reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors) strided_slice_node = OPERATORS['StridedSlice']() - input_tensors = [Tensor(name='reshape:0', source_op=['reshape'], + input_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=['strided_slice'])] - output_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], + output_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], dest_op=['squeeze'])] - strided_slice_node.construct('strided_slice', 'StridedSlice', input_tensors=input_tensors, + strided_slice_node.construct('strided_slice', 'StridedSlice', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({'test': 1})) - + squeeze_node = OPERATORS['Squeeze']() - input_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], + input_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], dest_op=['squeeze'])] output_tensors = [Tensor(name='squeeze:0', dest_op=[])] - squeeze_node.construct('squeeze', 'Squeeze', input_tensors=input_tensors, + squeeze_node.construct('squeeze', 'Squeeze', input_tensors=input_tensors, output_tensors=output_tensors) - - graph.insert_nodes(len(graph.nodes), [input_data_node, pack_node, reshape_node, + + graph.insert_nodes(len(graph.nodes), [input_data_node, pack_node, reshape_node, strided_slice_node, squeeze_node]) - + graph = LastLayerShape()(graph) self.assertEqual(4, len(graph.nodes)) self.assertEqual('-1,-1,768', graph.nodes[1].attr['dst_shape']) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_reduce_mean.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_reduce_mean.py index 4cbf142ea17..ed43b9dc8ce 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_reduce_mean.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_reduce_mean.py @@ -31,33 +31,33 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_layer_norm_with_reduce_mean_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) ln_node = OPERATORS['LayerNorm']() input_tensors = [Tensor(), Tensor(shape=[768]), Tensor(shape=[768])] - output_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], + output_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], dest_op=['reduce_mean'])] - ln_node.construct('layer_norm', 'LayerNorm', input_tensors=input_tensors, + ln_node.construct('layer_norm', 'LayerNorm', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'epsilon': 0.009})) - + reduce_mean_node = OPERATORS['ReduceMean']() - input_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], + input_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], dest_op=['reduce_mean'])] output_tensors = [Tensor(name='reduce_mean:0', source_op=['reduce_mean'], dest_op=[])] - reduce_mean_node.construct('reduce_mean', 'ReduceMean', input_tensors=input_tensors, + reduce_mean_node.construct('reduce_mean', 'ReduceMean', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict( {'axis': 1, 'keep_dims': False})) - + graph.insert_nodes(len(graph.nodes), [input_data_node, ln_node, reduce_mean_node]) graph = LayerNormWithReduceMean()(graph) self.assertEqual(5, len(graph.nodes)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_transpose.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_transpose.py index fc83828e6f7..e0fb037026f 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_transpose.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_layer_norm_with_transpose.py @@ -38,31 +38,31 @@ def test_layer_norm_with_transpose(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) mat_node = OPERATORS['MatMulWithBiasAdd']() input_tensors = [Tensor(), Tensor(shape=[768]), Tensor(shape=[768]), Tensor()] output_tensors = [Tensor(name='mat:0', source_op=['mat'], dest_op=['layer_norm'])] - mat_node.construct('mat', 'MatMulWithBiasAdd', input_tensors=input_tensors, + mat_node.construct('mat', 'MatMulWithBiasAdd', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'append_op': 'sum'})) ln_node = OPERATORS['LayerNorm']() input_tensors = [Tensor(name='mat:0', source_op=['mat'], dest_op=['layer_norm']), Tensor(shape=[768]), Tensor(shape=[768])] - output_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], + output_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], dest_op=['transpose'])] - ln_node.construct('layer_norm', 'LayerNorm', input_tensors=input_tensors, + ln_node.construct('layer_norm', 'LayerNorm', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'epsilon': 0.009})) - + transpose_node = OPERATORS['Transpose']() - input_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], + input_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], dest_op=['transpose'])] output_tensors = [Tensor(name='transpose:0', source_op=['transpose'], dest_op=[])] - transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, + transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict( {'src_perm': '0,1,2', 'dst_perm': '1,0,2'})) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_relu.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_relu.py index cd0be0b6872..737d486dea4 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_relu.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_relu.py @@ -32,33 +32,33 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_matmul_with_bias_relu_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) mat_node = OPERATORS['MatMulWithBias']() - input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), + input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), Tensor(data=np.array(1))] - output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['relu'])] - mat_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, + mat_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'src1_perm': '1,0'})) - + tanh_node = OPERATORS['Relu']() - input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['relu'])] output_tensors = [Tensor(name='relu:0', source_op=['relu'], dest_op=[])] - tanh_node.construct('relu', 'Relu', input_tensors=input_tensors, + tanh_node.construct('relu', 'Relu', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, mat_node, tanh_node]) graph = MatMulWithBiasRelu()(graph) self.assertEqual(2, len(graph.nodes)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_sigmoid.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_sigmoid.py index 2ad9b097a4b..b278d6e3421 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_sigmoid.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_sigmoid.py @@ -32,33 +32,33 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_matmul_with_bias_relu_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) mat_node = OPERATORS['MatMulWithBias']() - input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), + input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), Tensor(data=np.array(1))] - output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['sigmoid'])] - mat_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, + mat_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'src1_perm': '1,0'})) - + tanh_node = OPERATORS['Sigmoid']() - input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['sigmoid'])] output_tensors = [Tensor(name='sigmoid:0', source_op=['sigmoid'], dest_op=[])] - tanh_node.construct('sigmoid', 'Sigmoid', input_tensors=input_tensors, + tanh_node.construct('sigmoid', 'Sigmoid', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, mat_node, tanh_node]) graph = MatMulWithBiasSigmoid()(graph) self.assertEqual(2, len(graph.nodes)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_tanh.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_tanh.py index 7c806213375..8176cde449a 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_tanh.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_matmul_with_bias_tanh.py @@ -32,33 +32,33 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_matmul_with_bias_tanh_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) mat_node = OPERATORS['MatMulWithBias']() - input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), + input_tensors = [Tensor(data=np.array(1)), Tensor(data=np.array(1)), Tensor(data=np.array(1))] - output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['tanh'])] - mat_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, + mat_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'src1_perm': '1,0'})) - + tanh_node = OPERATORS['Tanh']() - input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], + input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['tanh'])] output_tensors = [Tensor(name='tanh:0', source_op=['tanh'], dest_op=[])] - tanh_node.construct('tanh', 'Tanh', input_tensors=input_tensors, + tanh_node.construct('tanh', 'Tanh', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, mat_node, tanh_node]) graph = MatmulWithBiasTanh()(graph) self.assertEqual(2, len(graph.nodes)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_onnx_utils.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_onnx_utils.py index 05109b645fe..bd773735bf2 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_onnx_utils.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_onnx_utils.py @@ -35,11 +35,11 @@ def tearDownClass(self): def test_change_num_name(self): out = compile.onnx_utils.change_num_name(1) self.assertEqual(1, out) - + def test_change_num_namei_same(self): out = compile.onnx_utils.change_num_name('1') self.assertEqual('1_tensor', out) - + def test_bias_to_int32_if1(self): fake_input_tensors = [ Tensor(data=np.array([[1,2],[3,4]], dtype=np.int8)), @@ -50,12 +50,12 @@ def test_bias_to_int32_if1(self): Tensor(data=None), ] fake_bias_node = OPERATORS['Add']() - fake_bias_node.construct('bias_add', 'Add', + fake_bias_node.construct('bias_add', 'Add', input_tensors=fake_input_tensors) out = compile.onnx_utils.bias_to_int32(fake_bias_node, 0.3, 0.4) golden_out = np.array([[1,2],[2,3]]) self.assertSequenceEqual(golden_out.tolist(), out.tolist()) - + def test_bias_to_int32_else(self): fake_input_tensors = [ Tensor(data=None, source_op=[None]), @@ -66,12 +66,12 @@ def test_bias_to_int32_else(self): Tensor(data=np.array(0.2, dtype=np.float32)), ] fake_bias_node = OPERATORS['Add']() - fake_bias_node.construct('bias_add', 'Add', + fake_bias_node.construct('bias_add', 'Add', input_tensors=fake_input_tensors) out = compile.onnx_utils.bias_to_int32(fake_bias_node, 0.3, 0.4) golden_out = np.array([[1,2],[2,3]]) self.assertSequenceEqual(golden_out.tolist(), out.tolist()) - + def test_bias_to_int32_if2(self): fake_input_tensors = [ Tensor(data=np.array([[1,2],[3,4]], dtype=np.int64)), @@ -82,7 +82,7 @@ def test_bias_to_int32_if2(self): Tensor(data=None), ] fake_bias_node = OPERATORS['Add']() - fake_bias_node.construct('bias_add', 'Add', + fake_bias_node.construct('bias_add', 'Add', input_tensors=fake_input_tensors) out = compile.onnx_utils.bias_to_int32(fake_bias_node, 0.3, 0.4) self.assertEqual(None, out) @@ -90,7 +90,7 @@ def test_bias_to_int32_if2(self): def test_bf16_tensor_to_array(self): numpy_array = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float16) print(f"Original Numpy array:\n{numpy_array}\n") - # Convert the Numpy array to a TensorProto + # Convert the Numpy array to a TensorProto tensor = onnx.helper.make_tensor('bf16_tensor', onnx.TensorProto.BFLOAT16, [2, 3], [1.0,2.0,3.0,4.0,5.0,6.0], False) print(f"TensorProto:\n{tensor}") out = compile.onnx_utils._bf16_tensor_to_array(tensor) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_operator_adaptor.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_operator_adaptor.py index 4a05c5c9c12..f4ee539a3c0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_operator_adaptor.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_operator_adaptor.py @@ -39,14 +39,14 @@ def test_gather_sweep(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) gather1_node = OPERATORS['Gather']() input_tensors = [Tensor(name='g1_src0'), Tensor(name='g1_idx:0', data=np.array(0).astype("int32"))] output_tensors = [Tensor(name='gather1:0', source_op=['gather1'], dest_op=[])] - gather1_node.construct('gather1', 'Gather', input_tensors=input_tensors, + gather1_node.construct('gather1', 'Gather', input_tensors=input_tensors, output_tensors=output_tensors) gather1_node._filling_method = 'extract_from_onnxruntime' @@ -54,7 +54,7 @@ def test_gather_sweep(self): input_tensors = [Tensor(name='g2_idx:0', data=np.array(0).astype("int32")), Tensor(name='g2_src1')] output_tensors = [Tensor(name='gather2:0', source_op=['gather2'], dest_op=[])] - gather2_node.construct('gather2', 'Gather', input_tensors=input_tensors, + gather2_node.construct('gather2', 'Gather', input_tensors=input_tensors, output_tensors=output_tensors) gather2_node._filling_method = 'extract_from_onnxruntime' @@ -70,13 +70,13 @@ def test_reshape_non_2d_src_before_inner_product(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) transpose_node = OPERATORS['Transpose']() input_tensors = [Tensor(name='t_src0')] output_tensors = [Tensor(name='transpose:0', source_op=['transpose'], dest_op=['matmul'])] - transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, + transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'dst_perm': '1,0,2'})) @@ -84,7 +84,7 @@ def test_reshape_non_2d_src_before_inner_product(self): input_tensors = [Tensor(name='transpose:0', source_op=['transpose'], dest_op=['matmul']), Tensor(name='m_src1', data=np.random.randn(768, 768).astype("float32"))] output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=[])] - matmul_node.construct('matmul', 'MatMul', input_tensors=input_tensors, + matmul_node.construct('matmul', 'MatMul', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, transpose_node, matmul_node]) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_ops.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_ops.py index 1a2297fdc17..a3c156442c8 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_ops.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_ops.py @@ -35,7 +35,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_all(self): all_node = node_def_pb2.NodeDef() all_node.name = 'all' @@ -58,7 +58,7 @@ def test_assert(self): assert_node_test.set_attr('tensorflow', assert_node) summarize = assert_node_test.attr['summarize'] self.assertEqual(3, summarize) - + def test_batch_matmul(self): batch_matmul_node = node_def_pb2.NodeDef() batch_matmul_node.name = 'batch_matmul' @@ -72,7 +72,7 @@ def test_batch_matmul(self): transpose_b = batch_matmul_node_test.attr['transpose_b'] self.assertFalse(transpose_a) self.assertTrue(transpose_b) - + def test_cast(self): cast_node = NodeProto() cast_node.name = 'cast' @@ -83,7 +83,7 @@ def test_cast(self): cast_node_test.set_attr('onnxruntime', cast_node) dst_dtype = cast_node_test.attr['DstT'] self.assertEqual('fp32', dst_dtype) - + def test_concat(self): concat_node = NodeProto() concat_node.name = 'concat' @@ -94,7 +94,7 @@ def test_concat(self): concat_node_test.set_attr('onnxruntime', concat_node) axis = concat_node_test.attr['axis'] self.assertSequenceEqual([1, 2], axis) - + def test_add(self): add_node = NodeProto() add_node.name = 'add' @@ -104,7 +104,7 @@ def test_add(self): add_node_test.name = add_node.name name = add_node_test.name self.assertEqual('add', name) - + def test_constant_of_shape(self): constant_of_shape_node = NodeProto() constant_of_shape_node.name = 'constant_of_shape' @@ -124,7 +124,7 @@ def test_dequantize_linear(self): dequantize_linear_node_test.name = dequantize_linear_node.name name = dequantize_linear_node_test.name self.assertEqual('dequantize_linear', name) - + def test_div(self): div_node = NodeProto() div_node.name = 'div' @@ -134,7 +134,7 @@ def test_div(self): div_node_test.name = div_node.name name = div_node_test.name self.assertEqual('div', name) - + def test_equal(self): equal_node = NodeProto() equal_node.name = 'equal' @@ -144,7 +144,7 @@ def test_equal(self): equal_node_test.name = equal_node.name name = equal_node_test.name self.assertEqual('equal', name) - + def test_expand(self): expand_node = NodeProto() expand_node.name = 'expand' @@ -154,7 +154,7 @@ def test_expand(self): expand_node_test.name = expand_node.name name = expand_node_test.name self.assertEqual('expand', name) - + def test_non_zero(self): non_zero_node = NodeProto() non_zero_node.name = 'non_zero' @@ -164,7 +164,7 @@ def test_non_zero(self): non_zero_node_test.name = non_zero_node.name name = non_zero_node_test.name self.assertEqual('non_zero', name) - + def test_qlinear_matmul(self): qlinear_matmul_node = NodeProto() qlinear_matmul_node.name = 'qlinear_matmul' @@ -174,7 +174,7 @@ def test_qlinear_matmul(self): qlinear_matmul_node_test.name = qlinear_matmul_node.name name = qlinear_matmul_node_test.name self.assertEqual('qlinear_matmul', name) - + def test_qlinear_add(self): qlinear_add_node = NodeProto() qlinear_add_node.name = 'qlinear_add' @@ -184,7 +184,7 @@ def test_qlinear_add(self): qlinear_add_node_test.name = qlinear_add_node.name name = qlinear_add_node_test.name self.assertEqual('qlinear_add', name) - + def test_qlinear_mul(self): qlinear_mul_node = NodeProto() qlinear_mul_node.name = 'qlinear_mul' @@ -194,7 +194,7 @@ def test_qlinear_mul(self): qlinear_mul_node_test.name = qlinear_mul_node.name name = qlinear_mul_node_test.name self.assertEqual('qlinear_mul', name) - + def test_where(self): where_node = NodeProto() where_node.name = 'where' @@ -214,7 +214,7 @@ def test_erf(self): erf_node_test.name = erf_node.name name = erf_node_test.name self.assertEqual('erf', name) - + def test_fill(self): fill_node = node_def_pb2.NodeDef() fill_node.name = 'fill' @@ -224,7 +224,7 @@ def test_fill(self): fill_node_test.name = fill_node.name name = fill_node_test.name self.assertEqual('fill', name) - + def test_flat_map_dataset(self): flat_map_dataset_node = node_def_pb2.NodeDef() flat_map_dataset_node.name = 'flat_map_dataset' @@ -234,7 +234,7 @@ def test_flat_map_dataset(self): flat_map_dataset_node_test.name = flat_map_dataset_node.name name = flat_map_dataset_node_test.name self.assertEqual('flat_map_dataset', name) - + def test_identity(self): identity_node = node_def_pb2.NodeDef() identity_node.name = 'identity' @@ -244,7 +244,7 @@ def test_identity(self): identity_node_test.name = identity_node.name name = identity_node_test.name self.assertEqual('identity', name) - + def test_innerproduct(self): innerproduct_node = node_def_pb2.NodeDef() innerproduct_node.name = 'innerproduct' @@ -254,7 +254,7 @@ def test_innerproduct(self): innerproduct_node_test.name = innerproduct_node.name name = innerproduct_node_test.name self.assertEqual('innerproduct', name) - + def test_less_equal(self): less_equal_node = node_def_pb2.NodeDef() less_equal_node.name = 'less_equal' @@ -264,7 +264,7 @@ def test_less_equal(self): less_equal_node_test.name = less_equal_node.name name = less_equal_node_test.name self.assertEqual('less_equal', name) - + def test_make_iterator(self): make_iterator_node = node_def_pb2.NodeDef() make_iterator_node.name = 'make_iterator' @@ -284,7 +284,7 @@ def test_matmul_with_bias_tanh(self): matmul_with_bias_tanh_node_test.name = matmul_with_bias_tanh_node.name name = matmul_with_bias_tanh_node_test.name self.assertEqual('matmul_with_bias_tanh', name) - + def test_pow(self): pow_node = node_def_pb2.NodeDef() pow_node.name = 'pow' @@ -294,7 +294,7 @@ def test_pow(self): pow_node_test.name = pow_node.name name = pow_node_test.name self.assertEqual('pow', name) - + def test_real_div(self): real_div_node = node_def_pb2.NodeDef() real_div_node.name = 'real_div' @@ -304,7 +304,7 @@ def test_real_div(self): real_div_node_test.name = real_div_node.name name = real_div_node_test.name self.assertEqual('real_div', name) - + def test_sqrt(self): sqrt_node = node_def_pb2.NodeDef() sqrt_node.name = 'sqrt' @@ -314,7 +314,7 @@ def test_sqrt(self): sqrt_node_test.name = sqrt_node.name name = sqrt_node_test.name self.assertEqual('sqrt', name) - + def test_square(self): square_node = node_def_pb2.NodeDef() square_node.name = 'square' @@ -324,7 +324,7 @@ def test_square(self): square_node_test.name = square_node.name name = square_node_test.name self.assertEqual('square', name) - + def test_stop_gradient(self): stop_gradient_node = node_def_pb2.NodeDef() stop_gradient_node.name = 'stop_gradient' @@ -334,7 +334,7 @@ def test_stop_gradient(self): stop_gradient_node_test.name = stop_gradient_node.name name = stop_gradient_node_test.name self.assertEqual('stop_gradient', name) - + def test_tanh(self): tanh_node = node_def_pb2.NodeDef() tanh_node.name = 'tanh' @@ -344,7 +344,7 @@ def test_tanh(self): tanh_node_test.name = tanh_node.name name = tanh_node_test.name self.assertEqual('tanh', name) - + def test_tensor_slice_dataset(self): tensor_slice_dataset_node = node_def_pb2.NodeDef() tensor_slice_dataset_node.name = 'tensor_slice_dataset' @@ -354,7 +354,7 @@ def test_tensor_slice_dataset(self): tensor_slice_dataset_node_test.name = tensor_slice_dataset_node.name name = tensor_slice_dataset_node_test.name self.assertEqual('tensor_slice_dataset', name) - + def test_fused_batch_matmul_v2(self): fused_batch_matmul_v2_node = node_def_pb2.NodeDef() fused_batch_matmul_v2_node.name = 'fused_batch_matmul_v2' @@ -368,7 +368,7 @@ def test_fused_batch_matmul_v2(self): adj_y = fused_batch_matmul_v2_node_test.attr['transpose_b'] self.assertFalse(adj_x) self.assertTrue(adj_y) - + def test_fused_batch_norm_v3(self): fused_batch_norm_v3_node = node_def_pb2.NodeDef() fused_batch_norm_v3_node.name = 'fused_batch_norm_v3' @@ -387,7 +387,7 @@ def test_fused_batch_norm_v3(self): self.assertEqual(0.0010000000474974513, epsilon) self.assertEqual(1, exponential_avg_factor) self.assertTrue(is_training) - + def test_fused_gemm(self): fused_gemm_node = NodeProto() fused_gemm_node.name = 'fused_gemm' @@ -412,7 +412,7 @@ def test_fused_gemm(self): self.assertEqual(2, alpha) self.assertEqual(2, beta) self.assertEqual('InnerProduct', op_type) - + def test_gemm(self): gemm_node = NodeProto() gemm_node.name = 'gemm' @@ -434,12 +434,12 @@ def test_gemm(self): self.assertEqual(2, alpha) self.assertEqual(2, beta) self.assertEqual('MatMulWithBias', op_type) - + def test_quantize_linear(self): quantize_linear_node = NodeProto() quantize_linear_node.name = 'quantize_linear' quantize_linear_node.op_type = 'QuantizeLinear' - + quantize_linear_node_test = OPERATORS['QuantizeLinear']() quantize_linear_node_test.set_attr('onnxruntime', quantize_linear_node) output_dtype = quantize_linear_node_test.attr['output_dtype'] @@ -465,7 +465,7 @@ def test_fused_matmul_onnx(self): self.assertTrue(transpose_a) self.assertFalse(transpose_b) self.assertEqual(0.125, alpha) - + def test_fused_matmul_tensorflow(self): fused_matmul_node = node_def_pb2.NodeDef() fused_matmul_node.name = 'fused_matmul' @@ -480,7 +480,7 @@ def test_fused_matmul_tensorflow(self): src1_perm = fused_matmul_node_test.attr['src1_perm'] self.assertEqual('1,0', src0_perm) self.assertEqual('0,1', src1_perm) - + def test_gather_onnx(self): gather_node = NodeProto() gather_node.name = 'gather' @@ -495,7 +495,7 @@ def test_gather_onnx(self): axis = gather_node_test.attr['axis'] self.assertEqual(0, batch_dims) self.assertEqual(0, axis) - + def test_gather_tensorflow(self): gather_node = node_def_pb2.NodeDef() gather_node.name = 'gather' @@ -508,7 +508,7 @@ def test_gather_tensorflow(self): axis = gather_node_test.attr['axis'] self.assertEqual(0, batch_dims) self.assertEqual(0, axis) - + def test_reduce_mean_onnx(self): reduce_mean_node = NodeProto() reduce_mean_node.name = 'reduce_mean' @@ -522,7 +522,7 @@ def test_reduce_mean_onnx(self): axis = reduce_mean_node_test.attr['axis'] self.assertFalse(keep_dims) self.assertEqual(0, axis) - + def test_reduce_mean_tensorflow(self): reduce_mean_node = node_def_pb2.NodeDef() reduce_mean_node.name = 'reduce_mean' @@ -536,7 +536,7 @@ def test_reduce_mean_tensorflow(self): axis = reduce_mean_node_test.attr['axis'] self.assertFalse(keep_dims) self.assertEqual(0, axis) - + def test_squeeze_onnx(self): squeeze_node = NodeProto() squeeze_node.name = 'squeeze' @@ -547,7 +547,7 @@ def test_squeeze_onnx(self): squeeze_node_test.set_attr('onnxruntime', squeeze_node) axis = squeeze_node_test.attr['axis'] self.assertEqual('0,1,2', axis) - + def test_squeeze_tensorflow(self): squeeze_node = node_def_pb2.NodeDef() squeeze_node.name = 'squeeze' @@ -571,7 +571,7 @@ def test_iter_dataset_related(self): attr = {'output_shapes': a(b(shape_list)), 'output_types': a(e([3]))} fake_node = f(attr) - + op_type_list = ['IteratorGetNext', 'IteratorV2', 'OptimizeDataset', 'MapAndBatchDataset'] for op_type in op_type_list: iterator_get_next_node_test = OPERATORS[op_type]() @@ -581,7 +581,7 @@ def test_iter_dataset_related(self): output_types = iterator_get_next_node_test.attr['output_types'] self.assertSequenceEqual([[-1, 128]], output_shapes) self.assertSequenceEqual(['int32'], output_types) - + def test_quantize_v2(self): quantize_v2_node = node_def_pb2.NodeDef() quantize_v2_node.name = 'quantize_v2' @@ -593,7 +593,7 @@ def test_quantize_v2(self): op_type = quantize_v2_node_test.op_type self.assertEqual('u8', output_dtype) self.assertEqual('Quantize', op_type) - + def test_quantized_fused_matmul_and_dequantize(self): quantized_fused_matmul_and_dequantize_node = node_def_pb2.NodeDef() quantized_fused_matmul_and_dequantize_node.name = 'quantized_fused_matmul_and_dequantize' @@ -610,7 +610,7 @@ def test_quantized_fused_matmul_and_dequantize(self): quantized_fused_matmul_and_dequantize_node_test = OPERATORS[ '_QuantizedFusedMatMulAndDequantize']() - quantized_fused_matmul_and_dequantize_node_test.set_attr('tensorflow', + quantized_fused_matmul_and_dequantize_node_test.set_attr('tensorflow', quantized_fused_matmul_and_dequantize_node) src0_perm = quantized_fused_matmul_and_dequantize_node_test.attr['src0_perm'] src1_perm = quantized_fused_matmul_and_dequantize_node_test.attr['src1_perm'] @@ -622,7 +622,7 @@ def test_quantized_fused_matmul_and_dequantize(self): self.assertEqual(1.0, epsilon) self.assertEqual(['add'], fused_ops) self.assertEqual('fp32', output_dtype) - + def test_quantized_matmul_with_bias_and_dequantize(self): quantized_matmul_with_bias_and_dequantize_node = node_def_pb2.NodeDef() quantized_matmul_with_bias_and_dequantize_node.name = \ @@ -632,11 +632,11 @@ def test_quantized_matmul_with_bias_and_dequantize(self): attr_value_pb2.AttrValue(b=True)) quantized_matmul_with_bias_and_dequantize_node.attr['transpose_b'].CopyFrom( attr_value_pb2.AttrValue(b=False)) - + quantized_matmul_with_bias_and_dequantize_node_test = OPERATORS[ 'QuantizedMatMulWithBiasAndDequantize']() - quantized_matmul_with_bias_and_dequantize_node_test.set_attr('tensorflow', + quantized_matmul_with_bias_and_dequantize_node_test.set_attr('tensorflow', quantized_matmul_with_bias_and_dequantize_node) transpose_a = quantized_matmul_with_bias_and_dequantize_node_test.attr['transpose_a'] transpose_b = quantized_matmul_with_bias_and_dequantize_node_test.attr['transpose_b'] @@ -652,7 +652,7 @@ def test_layer_normalization(self): layer_normalization_node.attribute.append(make_attribute('stash_type', 1)) layer_normalization_node.attribute.append(make_attribute('axis', 3)) layer_normalization_node.attribute.append(make_attribute('epsilon', 1.0)) - + op_type_list = ['LayerNormalization', '_MklLayerNorm'] for op_type in op_type_list: layer_normalization_node_test = OPERATORS[op_type]() @@ -663,7 +663,7 @@ def test_layer_normalization(self): self.assertEqual(3, axis) self.assertEqual(1.0, epsilon) self.assertEqual('LayerNorm', op_type) - + def test_one_hot(self): one_hot_node = node_def_pb2.NodeDef() one_hot_node.name = 'one_hot' @@ -683,7 +683,7 @@ def test_one_hot(self): self.assertEqual(1, on_value) self.assertEqual(2, off_value) self.assertEqual(1, len(one_hot_node_test.input_tensors)) - + def test_onnx_input(self): a = namedtuple('fake_node', ['name', 'type']) b = namedtuple('type', ['tensor_type']) @@ -720,7 +720,7 @@ def test_transpose(self): dst_perm = transpose_node_test.attr['dst_perm'] self.assertEqual('0,1,2,3', src_perm) self.assertEqual('0,2,3,1', dst_perm) - + def test_unpack(self): unpack_node = node_def_pb2.NodeDef() unpack_node.name = 'unpack' @@ -734,7 +734,7 @@ def test_unpack(self): num = unpack_node_test.attr['num'] self.assertEqual(0, axis) self.assertEqual(2, num) - + def test_unsqueeze(self): unsqueeze_node = NodeProto() unsqueeze_node.name = 'unsqueeze' @@ -745,7 +745,7 @@ def test_unsqueeze(self): unsqueeze_node_test.set_attr('onnxruntime', unsqueeze_node) axes = unsqueeze_node_test.attr['axes'] self.assertEqual('0,2,3,1', axes) - + def test_range(self): range_node = NodeProto() range_node.name = 'range' @@ -755,7 +755,7 @@ def test_range(self): range_node_test.name = range_node.name name = range_node_test.name self.assertEqual('range', name) - + def test_relu(self): relu_node = NodeProto() relu_node.name = 'relu' @@ -765,7 +765,7 @@ def test_relu(self): relu_node_test.name = relu_node.name name = relu_node_test.name self.assertEqual('relu', name) - + def test_matmul_with_bias_relu(self): mat_node = NodeProto() mat_node.name = 'matmul_with_bias_relu' @@ -775,7 +775,7 @@ def test_matmul_with_bias_relu(self): mat_node_test.name = mat_node.name name = mat_node_test.name self.assertEqual('matmul_with_bias_relu', name) - + def test_matmul(self): mat_node = NodeProto() mat_node.name = 'matmul' @@ -785,7 +785,7 @@ def test_matmul(self): mat_node_test.name = mat_node.name name = mat_node_test.name self.assertEqual('matmul', name) - + def test_quantize(self): qat_node = NodeProto() qat_node.name = 'quantize' @@ -795,7 +795,7 @@ def test_quantize(self): qat_node_test.name = qat_node.name name = qat_node_test.name self.assertEqual('quantize', name) - + def test_not(self): not_node = NodeProto() not_node.name = 'not' @@ -805,7 +805,7 @@ def test_not(self): not_node_test.name = not_node.name name = not_node_test.name self.assertEqual('not', name) - + def test_cumsum(self): cumsum_node = NodeProto() cumsum_node.name = 'cumsum' @@ -815,7 +815,7 @@ def test_cumsum(self): cumsum_node_test.name = cumsum_node.name name = cumsum_node_test.name self.assertEqual('cumsum', name) - + def test_onehot(self): onehot_node = NodeProto() onehot_node.name = 'onehot' @@ -825,7 +825,7 @@ def test_onehot(self): onehot_node_test.name = onehot_node.name name = onehot_node_test.name self.assertEqual('onehot', name) - + def test_toke_type_ids(self): token_node = NodeProto() token_node.name = 'toke_type_ids' @@ -835,7 +835,7 @@ def test_toke_type_ids(self): token_node_test.name = token_node.name name = token_node_test.name self.assertEqual('toke_type_ids', name) - + def test_positio_ids(self): position_node = NodeProto() position_node.name = 'position_ids' @@ -845,7 +845,7 @@ def test_positio_ids(self): position_node_test.name = position_node.name name = position_node_test.name self.assertEqual('position_ids', name) - + def test_loop(self): loop_node = NodeProto() loop_node.name = 'loop' @@ -855,7 +855,7 @@ def test_loop(self): loop_node_test.name = loop_node.name name = loop_node_test.name self.assertEqual('loop', name) - + def test_sigmoid(self): sigmoid_node = NodeProto() sigmoid_node.name = 'sigmoid' @@ -865,7 +865,7 @@ def test_sigmoid(self): sigmoid_node_test.name = sigmoid_node.name name = sigmoid_node_test.name self.assertEqual('sigmoid', name) - + def test_matmul_with_bias_sigmoid(self): mat_node = NodeProto() mat_node.name = 'matmul_with_bias_sigmoid' @@ -875,7 +875,7 @@ def test_matmul_with_bias_sigmoid(self): mat_node_test.name = mat_node.name name = mat_node_test.name self.assertEqual('matmul_with_bias_sigmoid', name) - + def test_embedding_bag(self): eb_node = NodeProto() eb_node.name = 'embedding_bag' @@ -885,7 +885,7 @@ def test_embedding_bag(self): eb_node_test.name = eb_node.name name = eb_node_test.name self.assertEqual('embedding_bag', name) - + def test_flatten(self): flatten_node = NodeProto() flatten_node.name = 'flatten' diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_padding_sequence.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_padding_sequence.py index a313436b971..378b848cc38 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_padding_sequence.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_padding_sequence.py @@ -31,86 +31,86 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_padding_sequence_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) - + shape_node = OPERATORS['Shape']() input_tensors = [Tensor()] output_tensors = [Tensor(name='shape:0', source_op=['shape'], dest_op=['strided_slice'])] - shape_node.construct('shape', 'Shape', input_tensors=input_tensors, + shape_node.construct('shape', 'Shape', input_tensors=input_tensors, output_tensors=output_tensors) - + strided_slice_node = OPERATORS['StridedSlice']() input_tensors = [Tensor(name='shape:0', source_op=['shape'], dest_op=['strided_slice'])] output_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], dest_op=['pack_0', 'pack_1'])] - strided_slice_node.construct('strided_slice', 'StridedSlice', input_tensors=input_tensors, + strided_slice_node.construct('strided_slice', 'StridedSlice', input_tensors=input_tensors, output_tensors=output_tensors) - + pack_0_node = OPERATORS['Pack']() input_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], dest_op=['pack_0'])] output_tensors = [Tensor(name='pack_0:0', source_op=['pack_0'], dest_op=['fill'])] - pack_0_node.construct('pack_0', 'Pack', input_tensors=input_tensors, + pack_0_node.construct('pack_0', 'Pack', input_tensors=input_tensors, output_tensors=output_tensors) - + fill_node = OPERATORS['Fill']() input_tensors = [Tensor(name='pack_0:0', source_op=['pack_0'], dest_op=['fill'])] output_tensors = [Tensor(name='fill:0', source_op=['fill'], dest_op=['mul_1'])] - fill_node.construct('fill', 'Fill', input_tensors=input_tensors, + fill_node.construct('fill', 'Fill', input_tensors=input_tensors, output_tensors=output_tensors) - + pack_1_node = OPERATORS['Pack']() input_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], dest_op=['pack_1'])] output_tensors = [Tensor(name='pack_1:0', source_op=['pack_1'], dest_op=['reshape'])] - pack_1_node.construct('pack_1', 'Pack', input_tensors=input_tensors, + pack_1_node.construct('pack_1', 'Pack', input_tensors=input_tensors, output_tensors=output_tensors) - + reshape_node = OPERATORS['Reshape']() input_tensors = [Tensor(name='pack_1:0', source_op=['pack_1'], dest_op=['reshape'])] output_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=['cast'])] - reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, + reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors) - + cast_node = OPERATORS['Cast']() input_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=['cast'])] output_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['mul_1'])] - cast_node.construct('cast', 'Cast', input_tensors=input_tensors, + cast_node.construct('cast', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors) - + mul_1_node = OPERATORS['Mul']() input_tensors = [Tensor(name='fill:0', source_op=['fill'], dest_op=['mul_1']), Tensor(name='cast:0', source_op=['cast'], dest_op=['mul_1'])] output_tensors = [Tensor(name='mul_1:0', source_op=['mul_1'], dest_op=['expand_dims'])] - mul_1_node.construct('mul_1', 'Mul', input_tensors=input_tensors, + mul_1_node.construct('mul_1', 'Mul', input_tensors=input_tensors, output_tensors=output_tensors) - + expand_dims_node = OPERATORS['ExpandDims']() input_tensors = [Tensor(name='mul_1:0', source_op=['mul_1'], dest_op=['expand_dims'])] output_tensors = [Tensor(name='expand_dims:0', source_op=['expand_dims'], dest_op=['sub'])] - expand_dims_node.construct('expand_dims', 'ExpandDims', input_tensors=input_tensors, + expand_dims_node.construct('expand_dims', 'ExpandDims', input_tensors=input_tensors, output_tensors=output_tensors) sub_node = OPERATORS['Sub']() @@ -118,7 +118,7 @@ def test_padding_sequence_1(self): dest_op=['sub'])] output_tensors = [Tensor(name='sub:0', source_op=['sub'], dest_op=['mul_2'])] - sub_node.construct('sub', 'Sub', input_tensors=input_tensors, + sub_node.construct('sub', 'Sub', input_tensors=input_tensors, output_tensors=output_tensors) mul_2_node = OPERATORS['Mul']() @@ -126,16 +126,16 @@ def test_padding_sequence_1(self): dest_op=['mul_2'])] output_tensors = [Tensor(name='mul_2:0', source_op=['mul_2'], dest_op=['add'])] - mul_2_node.construct('mul_2', 'Mul', input_tensors=input_tensors, - output_tensors=output_tensors) - + mul_2_node.construct('mul_2', 'Mul', input_tensors=input_tensors, + output_tensors=output_tensors) + add_node = OPERATORS['Add']() input_tensors = [Tensor(name='mul_2:0', source_op=['mul_2'], dest_op=['add'])] output_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=[])] - add_node.construct('add', 'Add', input_tensors=input_tensors, - output_tensors=output_tensors) - + add_node.construct('add', 'Add', input_tensors=input_tensors, + output_tensors=output_tensors) + graph.insert_nodes(len(graph.nodes), [input_data_node, shape_node, strided_slice_node, pack_0_node, fill_node, pack_1_node, reshape_node, cast_node, mul_1_node, expand_dims_node, sub_node, @@ -152,83 +152,83 @@ def test_padding_sequence_2(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) unsqueeze_1_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor()] output_tensors = [Tensor(name='unsqueeze_1:0', source_op=['unsqueeze_1'], dest_op=['unsqueeze_2'])] - unsqueeze_1_node.construct('unsqueeze_1', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze_1_node.construct('unsqueeze_1', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) - + unsqueeze_2_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='unsqueeze_1:0', source_op=['unsqueeze_1'], dest_op=['unsqueeze_2'])] output_tensors = [Tensor(name='unsqueeze_2:0', source_op=['unsqueeze_2'], dest_op=['cast'])] - unsqueeze_2_node.construct('unsqueeze_2', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze_2_node.construct('unsqueeze_2', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) - + cast_node = OPERATORS['Cast']() input_tensors = [Tensor(name='unsqueeze_2:0', source_op=['unsqueeze_2'], dest_op=['cast'])] output_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['sub'])] - cast_node.construct('cast', 'Cast', input_tensors=input_tensors, + cast_node.construct('cast', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors) - + sub_node = OPERATORS['Sub']() input_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['sub'])] output_tensors = [Tensor(name='sub:0', source_op=['sub'], dest_op=['mul'])] - sub_node.construct('sub', 'Sub', input_tensors=input_tensors, + sub_node.construct('sub', 'Sub', input_tensors=input_tensors, output_tensors=output_tensors) - + mul_node = OPERATORS['Mul']() input_tensors = [Tensor(name='sub:0', source_op=['sub'], dest_op=['mul'])] output_tensors = [Tensor(name='mul:0', source_op=['mul'], dest_op=['add'])] - mul_node.construct('mul', 'Mul', input_tensors=input_tensors, + mul_node.construct('mul', 'Mul', input_tensors=input_tensors, output_tensors=output_tensors) - + add_node = OPERATORS['Add']() input_tensors = [Tensor(name='mul:0', source_op=['mul'], dest_op=['add']), Tensor(data=np.array(1))] output_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=[])] - add_node.construct('add', 'Add', input_tensors=input_tensors, + add_node.construct('add', 'Add', input_tensors=input_tensors, output_tensors=output_tensors) - + mat_node = OPERATORS['MatMul']() - input_tensors = [Tensor(), + input_tensors = [Tensor(), Tensor(name='src:0', dest_op=['matmul'], shape=[768])] output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['add_1'])] - mat_node.construct('matmul', 'MatMul', input_tensors=input_tensors, + mat_node.construct('matmul', 'MatMul', input_tensors=input_tensors, output_tensors=output_tensors) - + add_1_node = OPERATORS['Add']() input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['add_1']), Tensor(data=np.array(1))] output_tensors = [Tensor(name='add_1:0', source_op=['add_1'], dest_op=['add_2'])] - add_1_node.construct('add_1', 'Add', input_tensors=input_tensors, + add_1_node.construct('add_1', 'Add', input_tensors=input_tensors, output_tensors=output_tensors) - + add_2_node = OPERATORS['Add']() input_tensors = [Tensor(name='add_1:0', source_op=['add_1'], dest_op=['add_2']), Tensor(data=np.array(1))] output_tensors = [Tensor(name='add_2:0', source_op=['add_2'], dest_op=['layernorm'])] - add_2_node.construct('add_2', 'Add', input_tensors=input_tensors, + add_2_node.construct('add_2', 'Add', input_tensors=input_tensors, output_tensors=output_tensors) - + layernorm_node = OPERATORS['LayerNorm']() input_tensors = [Tensor(name='add_2:0', source_op=['add_2'], dest_op=['layernorm']), - Tensor(data=np.array(1), shape=[768, 768]), + Tensor(data=np.array(1), shape=[768, 768]), Tensor(data=np.array(1), shape=[768])] output_tensors = [Tensor(name='layernorm:0', source_op=['layernorm'])] - layernorm_node.construct('layernorm', 'LayerNorm', input_tensors=input_tensors, + layernorm_node.construct('layernorm', 'LayerNorm', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, unsqueeze_1_node, unsqueeze_2_node, @@ -237,7 +237,7 @@ def test_padding_sequence_2(self): graph = PaddingSequence()(graph) self.assertEqual(7, len(graph.nodes)) self.assertEqual('-1,12,0,-1', graph.nodes[1].attr['dst_shape']) - + def test_padding_sequence_3(self): graph = Graph() @@ -245,86 +245,86 @@ def test_padding_sequence_3(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) - + equal_node = OPERATORS['Equal']() input_tensors = [Tensor()] output_tensors = [Tensor(name='equal:0', source_op=['equal'], dest_op=['reshape'])] - equal_node.construct('equal', 'Equal', input_tensors=input_tensors, + equal_node.construct('equal', 'Equal', input_tensors=input_tensors, output_tensors=output_tensors) shape_0_node = OPERATORS['Shape']() input_tensors = [Tensor()] output_tensors = [Tensor(name='shape_0:0', source_op=['shape_0'], dest_op=['gather'])] - shape_0_node.construct('shape_0', 'Shape', input_tensors=input_tensors, + shape_0_node.construct('shape_0', 'Shape', input_tensors=input_tensors, output_tensors=output_tensors) - + gather_node = OPERATORS['Gather']() input_tensors = [Tensor(name='shape_0:0', source_op=['shape_0'], dest_op=['gather'])] output_tensors = [Tensor(name='gather:0', source_op=['gather'], dest_op=['unsqueeze_2'])] - gather_node.construct('gather', 'Gather', input_tensors=input_tensors, + gather_node.construct('gather', 'Gather', input_tensors=input_tensors, output_tensors=output_tensors) - + unsqueeze_1_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor()] output_tensors = [Tensor(name='unsqueeze_1:0', source_op=['unsqueeze_1'], dest_op=['concat'])] - unsqueeze_1_node.construct('unsqueeze_1', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze_1_node.construct('unsqueeze_1', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) - + unsqueeze_2_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='gather:0', source_op=['gather'], dest_op=['unsqueeze_2'])] output_tensors = [Tensor(name='unsqueeze_2:0', source_op=['unsqueeze_2'], dest_op=['concat'])] - unsqueeze_2_node.construct('unsqueeze_2', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze_2_node.construct('unsqueeze_2', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) concat_node = OPERATORS['Concat']() input_tensors = [Tensor(name='unsqueeze_1:0', source_op=['unsqueeze_1'], - dest_op=['concat']), Tensor(name='unsqueeze_2:0', + dest_op=['concat']), Tensor(name='unsqueeze_2:0', source_op=['unsqueeze_2'], dest_op=['concat'])] output_tensors = [Tensor(name='concat:0', source_op=['concat'], dest_op=['reshape'])] - concat_node.construct('concat', 'Concat', input_tensors=input_tensors, + concat_node.construct('concat', 'Concat', input_tensors=input_tensors, output_tensors=output_tensors) - + reshape_node = OPERATORS['Reshape']() input_tensors = [Tensor(name='equal:0', source_op=['equal'], - dest_op=['reshape']), Tensor(name='concat:0', + dest_op=['reshape']), Tensor(name='concat:0', source_op=['concat'], dest_op=['reshape'])] output_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=['expand'])] - reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, + reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors) - + shape_1_node = OPERATORS['Shape']() input_tensors = [Tensor()] output_tensors = [Tensor(name='shape_1:0', source_op=['shape_1'], dest_op=['expand'])] - shape_1_node.construct('shape_1', 'Shape', input_tensors=input_tensors, + shape_1_node.construct('shape_1', 'Shape', input_tensors=input_tensors, output_tensors=output_tensors) - + expand_node = OPERATORS['Expand']() input_tensors = [Tensor(name='reshape:0', source_op=['reshape'], - dest_op=['expand']), Tensor(name='shape_1:0', + dest_op=['expand']), Tensor(name='shape_1:0', source_op=['shape_1'], dest_op=['expand'])] output_tensors = [Tensor(name='expand:0', source_op=['expand'], dest_op=['cast'])] - expand_node.construct('expand', 'Expand', input_tensors=input_tensors, + expand_node.construct('expand', 'Expand', input_tensors=input_tensors, output_tensors=output_tensors) - + cast_node = OPERATORS['Cast']() input_tensors = [Tensor(name='expand:0', source_op=['expand'], dest_op=['cast'])] output_tensors = [Tensor(name='cast:0', source_op=['cast'], dest_op=['where'])] - cast_node.construct('cast', 'Cast', input_tensors=input_tensors, + cast_node.construct('cast', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors) where_node = OPERATORS['Where']() @@ -332,12 +332,12 @@ def test_padding_sequence_3(self): dest_op=['where'])] output_tensors = [Tensor(name='where:0', source_op=['where'], dest_op=[])] - where_node.construct('where', 'Where', input_tensors=input_tensors, + where_node.construct('where', 'Where', input_tensors=input_tensors, output_tensors=output_tensors) - - graph.insert_nodes(len(graph.nodes), [input_data_node, shape_0_node, gather_node, - equal_node, unsqueeze_1_node, unsqueeze_2_node, - concat_node, reshape_node, shape_1_node, + + graph.insert_nodes(len(graph.nodes), [input_data_node, shape_0_node, gather_node, + equal_node, unsqueeze_1_node, unsqueeze_2_node, + concat_node, reshape_node, shape_1_node, expand_node, cast_node, where_node]) graph = PaddingSequence()(graph) self.assertEqual(3, len(graph.nodes)) @@ -351,88 +351,88 @@ def test_padding_sequence_4(self): input_tensors = [] output_tensors = [Tensor(name='src', source_op=['input_data'], dest_op=['reducemax', 'unsqueeze1']), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) reducemax_node = OPERATORS['OpAny']() input_tensors = [Tensor(name='src', source_op=['input_data'], dest_op=['reducemax'])] output_tensors = [Tensor(name='reducemax:0', source_op=['reducemax'], dest_op=['cast1'])] - reducemax_node.construct('reducemax', 'ReduceMax', input_tensors=input_tensors, + reducemax_node.construct('reducemax', 'ReduceMax', input_tensors=input_tensors, output_tensors=output_tensors) cast1_node = OPERATORS['Cast']() input_tensors = [Tensor(name='reducemax:0', source_op=['reducemax'], dest_op=['cast1'])] output_tensors = [Tensor(name='cast1:0', source_op=['cast1'], dest_op=['range'])] - cast1_node.construct('cast1', 'Cast', input_tensors=input_tensors, + cast1_node.construct('cast1', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors) range_node = OPERATORS['Range']() input_tensors = [Tensor(name='cast1:0', source_op=['cast1'], dest_op=['range'])] output_tensors = [Tensor(name='range:0', source_op=['range'], dest_op=['expand'])] - range_node.construct('range', 'Range', input_tensors=input_tensors, + range_node.construct('range', 'Range', input_tensors=input_tensors, output_tensors=output_tensors) conshape_node = OPERATORS['ConstantOfShape']() input_tensors = [Tensor(name='cons_src0:0')] output_tensors = [Tensor(name='conshape:0', source_op=['conshape'], dest_op=['expand'])] - conshape_node.construct('conshape', 'ConstantOfShape', input_tensors=input_tensors, + conshape_node.construct('conshape', 'ConstantOfShape', input_tensors=input_tensors, output_tensors=output_tensors) expand_node = OPERATORS['Expand']() input_tensors = [Tensor(name='range:0', source_op=['range'], dest_op=['expand']), Tensor(name='conshape:0', source_op=['conshape'], dest_op=['expand'])] output_tensors = [Tensor(name='expand:0', source_op=['expand'], dest_op=['tile'])] - expand_node.construct('expand', 'Expand', input_tensors=input_tensors, + expand_node.construct('expand', 'Expand', input_tensors=input_tensors, output_tensors=output_tensors) tile_node = OPERATORS['Tile']() input_tensors = [Tensor(name='expand:0', source_op=['expand'], dest_op=['tile'])] output_tensors = [Tensor(name='tile:0', source_op=['tile'], dest_op=['less'])] - tile_node.construct('tile', 'Tile', input_tensors=input_tensors, + tile_node.construct('tile', 'Tile', input_tensors=input_tensors, output_tensors=output_tensors) unsqueeze1_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='src', source_op=['input_data'], dest_op=['unsqueeze1'])] output_tensors = [Tensor(name='unsqueeze1:0', source_op=['unsqueeze1'], dest_op=['less'])] - unsqueeze1_node.construct('unsqueeze1', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze1_node.construct('unsqueeze1', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) less_node = OPERATORS['Less']() input_tensors = [Tensor(name='tile:0', source_op=['tile'], dest_op=['less']), Tensor(name='unsqueeze1:0', source_op=['unsqueeze1'], dest_op=['less'])] output_tensors = [Tensor(name='less:0', source_op=['less'], dest_op=['unsqueeze2'])] - less_node.construct('less', 'Less', input_tensors=input_tensors, + less_node.construct('less', 'Less', input_tensors=input_tensors, output_tensors=output_tensors) unsqueeze2_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='less:0', source_op=['less'], dest_op=['unsqueeze2'])] output_tensors = [Tensor(name='unsqueeze2:0', source_op=['unsqueeze2'], dest_op=['not'])] - unsqueeze2_node.construct('unsqueeze2', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze2_node.construct('unsqueeze2', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) not_node = OPERATORS['Not']() input_tensors = [Tensor(name='unsqueeze2:0', source_op=['unsqueeze2'], dest_op=['not'])] output_tensors = [Tensor(name='not:0', source_op=['not'], dest_op=['unsqueeze3'])] - not_node.construct('not', 'Not', input_tensors=input_tensors, + not_node.construct('not', 'Not', input_tensors=input_tensors, output_tensors=output_tensors) - + unsqueeze3_node = OPERATORS['Unsqueeze']() input_tensors = [Tensor(name='not:0', source_op=['not'], dest_op=['unsqueeze2'])] output_tensors = [Tensor(name='unsqueeze3:0', source_op=['unsqueeze3'], dest_op=['cast2'])] - unsqueeze3_node.construct('unsqueeze3', 'Unsqueeze', input_tensors=input_tensors, + unsqueeze3_node.construct('unsqueeze3', 'Unsqueeze', input_tensors=input_tensors, output_tensors=output_tensors) cast2_node = OPERATORS['Cast']() input_tensors = [Tensor(name='unsqueeze3:0', source_op=['unsqueeze3'], dest_op=['cast2'])] output_tensors = [Tensor(name='cast2:0', source_op=['cast2'], dest_op=['where'])] - cast2_node.construct('cast2', 'Cast', input_tensors=input_tensors, + cast2_node.construct('cast2', 'Cast', input_tensors=input_tensors, output_tensors=output_tensors) where_node = OPERATORS['Where']() input_tensors = [Tensor(name='cast2:0', source_op=['cast2'], dest_op=['where']), Tensor(name='where_src1'), Tensor(name='where_src2')] output_tensors = [Tensor(name='where:0', source_op=['where'], dest_op=[])] - where_node.construct('where', 'Where', input_tensors=input_tensors, + where_node.construct('where', 'Where', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, reducemax_node, cast1_node, diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_pattern_dispatch.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_pattern_dispatch.py index 249d7b403b9..21fb339583d 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_pattern_dispatch.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_pattern_dispatch.py @@ -43,7 +43,7 @@ def test_pattern_dispatch(self): input_0 = np.random.uniform(low=0, high=128, size=shape).astype('int32') input_1 = np.random.uniform(low=0, high=1, size=shape).astype('int32') input_2 = np.random.uniform(low=0, high=1, size=shape).astype('int32') - + # validate pattern tuning fp32_model_path = "/tf_dataset2/inc-ut/nlptoolkit_ut_model/bert_mini_sst2_1x4_fp32.onnx" if is_win(): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_position_embeddings.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_position_embeddings.py index 7479df39703..c0d52cd7b64 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_position_embeddings.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_position_embeddings.py @@ -32,54 +32,54 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_position_embeddings_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) slice_node = OPERATORS['Slice']() input_tensors = [Tensor(shape=[512, 768], data=np.array(1)), Tensor(data=np.array(1))] - output_tensors = [Tensor(name='slice:0', source_op=['slice'], + output_tensors = [Tensor(name='slice:0', source_op=['slice'], dest_op=['reshape'])] - slice_node.construct('slice', 'Slice', input_tensors=input_tensors, + slice_node.construct('slice', 'Slice', input_tensors=input_tensors, output_tensors=output_tensors) - + reshape_node = OPERATORS['Reshape']() - input_tensors = [Tensor(name='slice:0', source_op=['slice'], + input_tensors = [Tensor(name='slice:0', source_op=['slice'], dest_op=['reshape'])] output_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=[])] - reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, + reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors) - + less_equal_node = OPERATORS['LessEqual']() input_tensors = [Tensor()] output_tensors = [Tensor(name='less_equal:0', source_op=['less_equal'], dest_op=['all'])] - less_equal_node.construct('less_equal', 'LessEqual', input_tensors=input_tensors, + less_equal_node.construct('less_equal', 'LessEqual', input_tensors=input_tensors, output_tensors=output_tensors) - + all_node = OPERATORS['All']() input_tensors = [Tensor(name='less_equal:0', source_op=['less_equal'], dest_op=['all'])] output_tensors = [Tensor(name='all:0', source_op=['all'], dest_op=['assert'])] - all_node.construct('all', 'All', input_tensors=input_tensors, + all_node.construct('all', 'All', input_tensors=input_tensors, output_tensors=output_tensors) - + assert_node = OPERATORS['Assert']() input_tensors = [Tensor(name='all:0', source_op=['all'], dest_op=['assert'])] output_tensors = [Tensor(name='assert:0', source_op=['assert'], dest_op=[])] - assert_node.construct('assert', 'Assert', input_tensors=input_tensors, + assert_node.construct('assert', 'Assert', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, slice_node, reshape_node, less_equal_node, all_node, assert_node]) graph = PositionEmbeddings()(graph) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_merge.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_merge.py index 3ba348142eb..9ea0169396e 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_merge.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_merge.py @@ -37,7 +37,7 @@ def tearDownClass(cls): def test_qkv_merge_1(self): model_path = "/tf_dataset2/inc-ut/nlptoolkit_ut_model/onnx_best_acc_distilbert.onnx" content = "pattern_switch:\n 'QKVMerge': True\n 'MultiHeadAttention': False" - pattern_config = "qkv_merge_pattern_config" + pattern_config = "qkv_merge_pattern_config" with open("qkv_merge_pattern_config", "w") as file: file.write(content) if is_win(): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_reshape.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_reshape.py index 8c34ba431d5..da7116a9f3b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_reshape.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_qkv_reshape.py @@ -31,32 +31,32 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_attention_reshape_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) matmul_node = OPERATORS['MatMulWithBias']() input_tensors = [Tensor(), Tensor(), Tensor()] output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['reshape'])] - matmul_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, + matmul_node.construct('matmul', 'MatMulWithBias', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict( {'src1_perm': '0,1'})) - + reshape_node = OPERATORS['Reshape']() input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['reshape'])] output_tensors = [Tensor(name='reshape:0', source_op=['reshape'], dest_op=[])] - reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, + reshape_node.construct('reshape', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'dst_shape': '0,0,12,64'})) - + graph.insert_nodes(len(graph.nodes), [input_data_node, matmul_node, reshape_node]) graph = QKVReshape()(graph) self.assertEqual(3, len(graph.nodes)) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_quant_onnx_execute.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_quant_onnx_execute.py index 3f0355846d5..1831828e21f 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_quant_onnx_execute.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_quant_onnx_execute.py @@ -47,7 +47,7 @@ def test_onnx_qlinear_compile(self): 'ONNX QLinear model is not found, please set your own model path!') qlinear_model = compile(qlinear_model_path) qlinear_output_dict = qlinear_model.inference([input_0, input_1, input_2]) - qlinear_output = copy.deepcopy(list(qlinear_output_dict.values())[0]) + qlinear_output = copy.deepcopy(list(qlinear_output_dict.values())[0]) # compile and execute qdq model qdq_model_path = "/tf_dataset2/inc-ut/nlptoolkit_ut_model/qlinear/bert_mini_sst2_qdq.onnx" if is_win(): diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_start_end_logits.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_start_end_logits.py index 3d83ec57a1c..460cc089be6 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_start_end_logits.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_start_end_logits.py @@ -31,38 +31,38 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_start_end_logits(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) transpose_node = OPERATORS['Transpose']() input_tensors = [Tensor()] output_tensors = [Tensor(name='transpose:0', source_op=['transpose'], dest_op=['unpack'])] - transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, + transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors) - + unpack_node = OPERATORS['Unpack']() input_tensors = [Tensor(name='transpose:0', source_op=['transpose'], dest_op=['unpack'])] output_tensors = [Tensor(name='unpack:0', source_op=['unpack'], dest_op=['identity'])] - unpack_node.construct('unpack', 'Unpack', input_tensors=input_tensors, + unpack_node.construct('unpack', 'Unpack', input_tensors=input_tensors, output_tensors=output_tensors) identity_node = OPERATORS['Identity']() input_tensors = [Tensor(name='unpack:0', source_op=['unpack'], dest_op=['identity'])] output_tensors = [Tensor(name='identity:0', source_op=['identity'], dest_op=[])] - identity_node.construct('identity', 'Identity', input_tensors=input_tensors, + identity_node.construct('identity', 'Identity', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, transpose_node, unpack_node, identity_node]) graph = StartEndLogits()(graph) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_tf_utils.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_tf_utils.py index b16f9aacdaf..fa04614461a 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_tf_utils.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_tf_utils.py @@ -16,8 +16,8 @@ # limitations under the License. import unittest -from tensorflow.core.framework import node_def_pb2 -import intel_extension_for_transformers.llm.runtime.deprecated.compile.tf_utils as util +from tensorflow.core.framework import node_def_pb2 +import intel_extension_for_transformers.llm.runtime.deprecated.compile.tf_utils as util class TestTfUtils(unittest.TestCase): @@ -28,7 +28,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_create_tf_node(self): test_node = util.create_tf_node('Reshape', 'test_name', ['input_0']) self.assertEqual('Reshape', test_node.op) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_token_type_embeddings.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_token_type_embeddings.py index 0333dcc4d02..8ec6ea97771 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_token_type_embeddings.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_token_type_embeddings.py @@ -32,76 +32,76 @@ def setUpClass(self): @classmethod def tearDownClass(self): pass - + def test_token_type_embeddings_1(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) - + reshape_1_node = OPERATORS['Reshape']() input_tensors = [Tensor(data=np.array(1))] output_tensors = [Tensor(name='reshape_1:0', source_op=['reshape_1'], dest_op=['one_hot'])] - reshape_1_node.construct('reshape_1', 'Reshape', input_tensors=input_tensors, + reshape_1_node.construct('reshape_1', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors) - + one_hot_node = OPERATORS['OneHot']() - input_tensors = [Tensor(name='reshape_1:0', source_op=['reshape_1'], + input_tensors = [Tensor(name='reshape_1:0', source_op=['reshape_1'], dest_op=['one_hot'])] output_tensors = [Tensor(name='one_hot:0', source_op=['one_hot'], dest_op=['matmul'])] - one_hot_node.construct('one_hot', 'OneHot', input_tensors=input_tensors, + one_hot_node.construct('one_hot', 'OneHot', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({'test': 1})) - + matmul_node = OPERATORS['MatMul']() - input_tensors = [Tensor(name='one_hot:0', source_op=['one_hot'], dest_op=['matmul']), + input_tensors = [Tensor(name='one_hot:0', source_op=['one_hot'], dest_op=['matmul']), Tensor(name='embeddings', shape=[2, 768], dest_op=['matmul'], data=np.array(1))] output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['reshape_2'])] - matmul_node.construct('matmul', 'MatMul', input_tensors=input_tensors, + matmul_node.construct('matmul', 'MatMul', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict( {'transpose_a': False, 'transpose_b': False})) - + shape_node = OPERATORS['Shape']() input_tensors = [Tensor(np.array(1))] - output_tensors = [Tensor(name='shape:0', source_op=['shape'], + output_tensors = [Tensor(name='shape:0', source_op=['shape'], dest_op=['strided_slice'])] - shape_node.construct('shape', 'Shape', input_tensors=input_tensors, + shape_node.construct('shape', 'Shape', input_tensors=input_tensors, output_tensors=output_tensors) strided_slice_node = OPERATORS['StridedSlice']() - input_tensors = [Tensor(name='shape:0', source_op=['shape'], + input_tensors = [Tensor(name='shape:0', source_op=['shape'], dest_op=['strided_slice'])] - output_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], + output_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], dest_op=['pack'])] - strided_slice_node.construct('strided_slice', 'StridedSlice', input_tensors=input_tensors, + strided_slice_node.construct('strided_slice', 'StridedSlice', input_tensors=input_tensors, output_tensors=output_tensors) - + pack_node = OPERATORS['Pack']() - input_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], + input_tensors = [Tensor(name='strided_slice:0', source_op=['strided_slice'], dest_op=['pack'])] output_tensors = [Tensor(name='pack:0', source_op=['pack'], dest_op=['reshape_2'])] - pack_node.construct('pack', 'Pack', input_tensors=input_tensors, + pack_node.construct('pack', 'Pack', input_tensors=input_tensors, output_tensors=output_tensors) - + reshape_2_node = OPERATORS['Reshape']() input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['reshape_2']), Tensor(name='pack:0', source_op=['pack'], dest_op=['reshape_2'])] output_tensors = [Tensor(name='reshape_2:0', source_op=['reshape_2'])] - reshape_2_node.construct('reshape_2', 'Reshape', input_tensors=input_tensors, + reshape_2_node.construct('reshape_2', 'Reshape', input_tensors=input_tensors, output_tensors=output_tensors) - + graph.insert_nodes(len(graph.nodes), [input_data_node, reshape_1_node, one_hot_node, matmul_node, shape_node, strided_slice_node, pack_node, reshape_2_node]) - + graph = TokenTypeEmbeddings()(graph) self.assertEqual(6, len(graph.nodes)) self.assertEqual(-1, graph.nodes[1].attr['dst_shape']) @@ -109,38 +109,38 @@ def test_token_type_embeddings_1(self): self.assertEqual('1,0', graph.nodes[3].attr['src1_perm']) self.assertEqual('-1,-1,768', graph.nodes[4].attr['dst_shape']) self.assertEqual('1,2', graph.nodes[5].attr['mul']) - - + + def test_token_type_embeddings_2(self): graph = Graph() graph.framework_modeling_config['framework'] = 'onnxruntime' input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(name='segment_ids', dest_op=['gather']), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) - + gather_node = OPERATORS['Gather']() - input_tensors = [Tensor(shape=[2, 768], data=np.array(1)), Tensor(name='segment_ids', + input_tensors = [Tensor(shape=[2, 768], data=np.array(1)), Tensor(name='segment_ids', source_op=['input_data'], dest_op=['gather'])] output_tensors = [Tensor(name='gather:0', source_op=['gather'], dest_op=['add'])] - gather_node.construct('gather', 'Gather', input_tensors=input_tensors, + gather_node.construct('gather', 'Gather', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'axis': 0, 'batch_dims': 0})) - + add_node = OPERATORS['Add']() input_tensors = [Tensor(name='gather:0', source_op=['gather'], dest_op=['add']), Tensor( data=np.array(1))] output_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=['layer_norm'])] - add_node.construct('add', 'Add', input_tensors=input_tensors, + add_node.construct('add', 'Add', input_tensors=input_tensors, output_tensors=output_tensors) - + ln_node = OPERATORS['LayerNorm']() - input_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=['layer_norm']), - Tensor(shape=[768], data=np.array(1)), + input_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=['layer_norm']), + Tensor(shape=[768], data=np.array(1)), Tensor(shape=[768], data=np.array(1))] output_tensors = [Tensor(name='layer_norm:0', source_op=['layer_norm'], dest_op=[])] - ln_node.construct('layer_norm', 'LayerNorm', input_tensors=input_tensors, + ln_node.construct('layer_norm', 'LayerNorm', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, gather_node, add_node, ln_node]) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_transpose_batch_matmul.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_transpose_batch_matmul.py index f74bcba6860..b0734286892 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_transpose_batch_matmul.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_transpose_batch_matmul.py @@ -39,14 +39,14 @@ def test_transpose_batch_matmul_1(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) transpose_1_node = OPERATORS['Transpose']() input_tensors = [Tensor(data=np.array(1))] output_tensors = [Tensor(name='transpose_1:0', source_op=['transpose_1'], dest_op=['fused_matmul'])] - transpose_1_node.construct('transpose_1', 'Transpose', input_tensors=input_tensors, + transpose_1_node.construct('transpose_1', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'dst_perm': '0,2,1,3'})) @@ -54,17 +54,17 @@ def test_transpose_batch_matmul_1(self): input_tensors = [Tensor(data=np.array(1))] output_tensors = [Tensor(name='transpose_2:0', source_op=['transpose_2'], dest_op=['fused_matmul'])] - transpose_2_node.construct('transpose_2', 'Transpose', input_tensors=input_tensors, + transpose_2_node.construct('transpose_2', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'dst_perm': '0,2,3,1'})) fused_matmul_node = OPERATORS['FusedMatMul']() input_tensors = [Tensor(name='transpose_1:0', source_op=['transpose_1'], - dest_op=['fused_matmul']), Tensor(name='transpose_2:0', + dest_op=['fused_matmul']), Tensor(name='transpose_2:0', source_op=['transpose_2'], dest_op=['fused_matmul'])] output_tensors = [Tensor(name='fused_matmul:0', source_op=['fused_matmul'], dest_op=['add'])] - fused_matmul_node.construct('fused_matmul', 'FusedMatMul', input_tensors=input_tensors, + fused_matmul_node.construct('fused_matmul', 'FusedMatMul', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'transpose_a': False, 'transpose_b': False, 'alpha': 0.125})) @@ -72,7 +72,7 @@ def test_transpose_batch_matmul_1(self): input_tensors = [Tensor(name='fused_matmul:0', source_op=['fused_matmul'], dest_op=['add']), Tensor(data=np.array(1))] output_tensors = [Tensor(name='add:0', source_op=['add'], dest_op=[])] - add_node.construct('add', 'Add', input_tensors=input_tensors, + add_node.construct('add', 'Add', input_tensors=input_tensors, output_tensors=output_tensors) graph.insert_nodes(len(graph.nodes), [input_data_node, transpose_1_node, transpose_2_node, @@ -90,14 +90,14 @@ def test_transpose_batch_matmul_2(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) transpose_1_node = OPERATORS['Transpose']() input_tensors = [Tensor(data=np.array(1))] output_tensors = [Tensor(name='transpose_1:0', source_op=['transpose_1'], dest_op=['matmul'])] - transpose_1_node.construct('transpose_1', 'Transpose', input_tensors=input_tensors, + transpose_1_node.construct('transpose_1', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'dst_perm': '0,2,1,3'})) @@ -105,14 +105,14 @@ def test_transpose_batch_matmul_2(self): input_tensors = [Tensor(name='transpose_1:0', source_op=['transpose_1'], dest_op=['matmul']), Tensor(name='m_src1:0')] output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['transpose_2'])] - matmul_node.construct('matmul', 'BatchMatMul', input_tensors=input_tensors, + matmul_node.construct('matmul', 'BatchMatMul', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'transpose_a': False, 'transpose_b': False})) transpose_2_node = OPERATORS['Transpose']() input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['transpose_2'])] output_tensors = [Tensor(name='transpose_2:0', source_op=['transpose_2'])] - transpose_2_node.construct('transpose_2', 'Transpose', input_tensors=input_tensors, + transpose_2_node.construct('transpose_2', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'dst_perm': '0,2,3,1'})) @@ -129,20 +129,20 @@ def test_transpose_batch_matmul_3(self): input_data_node = OPERATORS['Input']() input_tensors = [] output_tensors = [Tensor(), Tensor(), Tensor()] - input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, + input_data_node.construct('input_data', 'Input', input_tensors=input_tensors, output_tensors=output_tensors) matmul_node = OPERATORS['BatchMatMul']() input_tensors = [Tensor(name='m_src0:0'), Tensor(name='m_src1:0')] output_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['transpose'])] - matmul_node.construct('matmul', 'BatchMatMul', input_tensors=input_tensors, + matmul_node.construct('matmul', 'BatchMatMul', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'transpose_a': False, 'transpose_b': False})) transpose_node = OPERATORS['Transpose']() input_tensors = [Tensor(name='matmul:0', source_op=['matmul'], dest_op=['transpose'])] output_tensors = [Tensor(name='transpose:0', source_op=['transpose'])] - transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, + transpose_node.construct('transpose', 'Transpose', input_tensors=input_tensors, output_tensors=output_tensors, attr=OrderedDict({ 'dst_perm': '0,2,3,1'})) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_unet_qat.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_unet_qat.py index 274f17192cd..8cb494a3aee 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_unet_qat.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/test_unet_qat.py @@ -119,7 +119,7 @@ def test_unet(self): self.assertTrue(os.path.exists(model_dir), 'model is not found, please set your own model path!') graph = compile(model_dir, config=qat_unet_pattern_config) - + input_0_path = root_dir + 'sample.pt' inputs_0 = torch.load(input_0_path) inputs_1 = torch.tensor([301], dtype=torch.float32) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_add_zeros.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_add_zeros.py index d1ef664c9f5..bb00d4e73cd 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_add_zeros.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_add_zeros.py @@ -50,10 +50,10 @@ def test_1(self): n = Net() example_in = torch.rand(3, 256) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_baddbmm.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_baddbmm.py index e5efc6ff408..ffa5a793223 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_baddbmm.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_baddbmm.py @@ -51,10 +51,10 @@ def test_1(self): batch1 = torch.randn(10, 3, 4) batch2 = torch.randn(10, 4, 5) traced_model = torch.jit.trace(n, (M, batch1, batch2)) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(M, batch1, batch2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() @@ -71,10 +71,10 @@ def test_2(self): batch1 = torch.randn(10, 3, 4) batch2 = torch.randn(10, 4, 5) traced_model = torch.jit.trace(n, (M, batch1, batch2)) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(M, batch1, batch2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_bert_mini_fp32.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_bert_mini_fp32.py index d2bb2f4406d..a4115c5cf93 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_bert_mini_fp32.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_bert_mini_fp32.py @@ -53,7 +53,7 @@ def test_1(self): example_in = torch.rand(8, 128) # TODO: enable check accuracy ref_out = traced_model(ids, tok, att, ids)[0].detach().numpy() - + graph = compile(pt_file) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_binaryop.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_binaryop.py index 5fabb9c3da6..0c85756c5c7 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_binaryop.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_binaryop.py @@ -65,7 +65,7 @@ def test_1(self): traced_model = torch.jit.trace(n, (example_in, example_in2)) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in, example_in2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() @@ -83,7 +83,7 @@ def test_2(self): traced_model = torch.jit.trace(n, (example_in, example_in2)) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in, example_in2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() @@ -99,10 +99,10 @@ def test_2(self): # example_in = torch.rand(3, 256) # example_in2 = torch.rand(256) # traced_model = torch.jit.trace(n, (example_in, example_in2)) - # + # # torch.jit.save(traced_model, '{}.pt'.format(file_name)) # ref_out = traced_model(example_in, example_in2).detach().numpy() - + # graph = compile('{}.pt'.format(file_name)) # graph.save(file_name) # newgraph = Graph() @@ -118,10 +118,10 @@ def test_2(self): # example_in = torch.rand(3, 256) # example_in2 = torch.rand(256) # traced_model = torch.jit.trace(n, (example_in, example_in2)) - # + # # torch.jit.save(traced_model, '{}.pt'.format(file_name)) # ref_out = traced_model(example_in, example_in2).detach().numpy() - + # graph = compile('{}.pt'.format(file_name)) # graph.save(file_name) # newgraph = Graph() @@ -137,10 +137,10 @@ def test_2(self): # example_in = torch.rand(3, 256) # example_in2 = torch.rand(256) # traced_model = torch.jit.trace(n, (example_in, example_in2)) - # + # # torch.jit.save(traced_model, '{}.pt'.format(file_name)) # ref_out = traced_model(example_in, example_in2).detach().numpy() - + # graph = compile('{}.pt'.format(file_name)) # graph.save(file_name) # newgraph = Graph() @@ -156,10 +156,10 @@ def test_2(self): # example_in = torch.rand(3, 256) # example_in2 = torch.rand(256) # traced_model = torch.jit.trace(n, (example_in, example_in2)) - # + # # torch.jit.save(traced_model, '{}.pt'.format(file_name)) # ref_out = traced_model(example_in, example_in2).detach().numpy() - + # graph = compile('{}.pt'.format(file_name)) # graph.save(file_name) # newgraph = Graph() @@ -175,10 +175,10 @@ def test_2(self): # example_in = torch.rand(3, 256) # example_in2 = torch.rand(256) # traced_model = torch.jit.trace(n, (example_in, example_in2)) - # + # # torch.jit.save(traced_model, '{}.pt'.format(file_name)) # ref_out = traced_model(example_in, example_in2).detach().numpy() - + # graph = compile('{}.pt'.format(file_name)) # graph.save(file_name) # newgraph = Graph() @@ -194,10 +194,10 @@ def test_2(self): # example_in = torch.rand(3, 256) # example_in2 = torch.rand(256) # traced_model = torch.jit.trace(n, (example_in, example_in2)) - # + # # torch.jit.save(traced_model, '{}.pt'.format(file_name)) # ref_out = traced_model(example_in, example_in2).detach().numpy() - + # graph = compile('{}.pt'.format(file_name)) # graph.save(file_name) # newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_concat.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_concat.py index 4263abda43c..2f756857d2b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_concat.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_concat.py @@ -51,7 +51,7 @@ def test_1(self): traced_model = torch.jit.trace(n, (example_in, example_in2)) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in, example_in2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv.py index c154711d103..6918d9d29dc 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv.py @@ -57,7 +57,7 @@ def test_1(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + np.testing.assert_almost_equal(ref_out, [*out.values()][0], decimal=5) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv_int8.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv_int8.py index 98dafc56645..71406e9c94e 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv_int8.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_conv_int8.py @@ -59,7 +59,7 @@ def test_1(self): from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig qconfig = QConfig(activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8), weight=MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)) - + n = Net().eval() n.apply(weight_init) example_in = torch.rand(3, 16, 13, 13) @@ -76,7 +76,7 @@ def test_1(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + np.testing.assert_almost_equal(ref_out, [*out.values()][0], decimal=5) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) @@ -85,7 +85,7 @@ def test_2(self): from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig qconfig = QConfig(activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8), weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric)) - + n = Net().eval() n.apply(weight_init) example_in = torch.rand(3, 16, 13, 13) @@ -102,7 +102,7 @@ def test_2(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + np.testing.assert_almost_equal(ref_out, [*out.values()][0], decimal=5) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_dolly_pattern.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_dolly_pattern.py index 11ab75c6793..676200c87e2 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_dolly_pattern.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_dolly_pattern.py @@ -1089,4 +1089,4 @@ def test_1(self): dollygraph = p_fusion(dollygraph) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_embedding.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_embedding.py index 2e07d81c900..31a2f9eaa10 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_embedding.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_embedding.py @@ -53,7 +53,7 @@ def test_1(self): traced_model = torch.jit.trace(n, example_in) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).squeeze(0).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gather.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gather.py index 8db816f58d8..3675c250b95 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gather.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gather.py @@ -49,11 +49,11 @@ def test_1(self): n = Net() example_in = torch.randint(0, 22, (1, 10)) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() print(ref_out.shape) - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gelu.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gelu.py index eba3026e5ae..a7530295ef5 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gelu.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gelu.py @@ -56,10 +56,10 @@ def test_1(self): n = Net() example_in = torch.rand(3, 256) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() @@ -74,17 +74,17 @@ def test_2(self): n = Net2() example_in = torch.rand(3, 256) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - np.testing.assert_almost_equal(ref_out, [*out.values()][0], decimal=5) + np.testing.assert_almost_equal(ref_out, [*out.values()][0], decimal=5) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gpt_int8.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gpt_int8.py index b36008632f7..989b4f39014 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gpt_int8.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_gpt_int8.py @@ -153,4 +153,4 @@ def test_1(self): os.remove('int8_pattern.conf') if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_layernorm.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_layernorm.py index 2efc3d5fbc8..168d81e89b5 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_layernorm.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_layernorm.py @@ -49,11 +49,11 @@ def test_1(self): n = Net() example_in = torch.randn(20, 5, 10) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) # torch.onnx.export(n, example_in, '{}.onnx'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear.py index 63a3635745d..fd3e122a20c 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear.py @@ -57,7 +57,7 @@ def test_1(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + np.testing.assert_almost_equal(ref_out, [*out.values()][0], decimal=5) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear_int8.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear_int8.py index b501a49715f..eb2970ccab1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear_int8.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_linear_int8.py @@ -88,7 +88,7 @@ def test_per_tensor(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + self.assertTrue(cmpData(ref_out, [*out.values()][0]) < 0.01) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) @@ -114,7 +114,7 @@ def test_per_channel(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + self.assertTrue(cmpData(ref_out, [*out.values()][0]) < 0.01) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) @@ -140,7 +140,7 @@ def test_per_tensor_wo_bias(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + self.assertTrue(cmpData(ref_out, [*out.values()][0]) < 0.01) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) @@ -166,7 +166,7 @@ def test_per_channel_wo_bias(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + self.assertTrue(cmpData(ref_out, [*out.values()][0]) < 0.01) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_llama_pattern.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_llama_pattern.py index d3d22b73622..06398c328bb 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_llama_pattern.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_llama_pattern.py @@ -293,4375 +293,4375 @@ def test_1(self): '479': dtype: fp32 shape: [32000, 5120] - + '483': dtype: s64 shape: [1] - + aten::pow_2539_other: dtype: fp32 shape: [1] - + '485': dtype: fp32 shape: [1] - + '486': dtype: fp32 shape: [5120] - + '487': dtype: fp32 shape: [5120] - + '488': dtype: s8 shape: [5120, 5120] - + aten::linear_3436_bias: dtype: fp32 shape: [5120] - + '489': dtype: fp32 shape: [5120] - + '490': dtype: s8 shape: [5120, 5120] - + aten::linear_3437_bias: dtype: fp32 shape: [5120] - + '491': dtype: fp32 shape: [5120] - + '492': dtype: s8 shape: [5120, 5120] - + aten::linear_3438_bias: dtype: fp32 shape: [5120] - + '493': dtype: fp32 shape: [1, 1, 2048, 128] - + '494': dtype: fp32 shape: [1, 1, 2048, 128] - + '495': dtype: s64 shape: [1] - + aten::neg_4508_mul_val: dtype: fp32 shape: [1] - + aten::neg_4510_mul_val: dtype: fp32 shape: [1] - + '496': dtype: fp32 shape: [1] - + '497': dtype: fp32 shape: [1] - + '498': dtype: fp32 shape: [5120] - + '499': dtype: s8 shape: [5120, 5120] - + aten::linear_3439_bias: dtype: fp32 shape: [5120] - + aten::pow_2550_other: dtype: fp32 shape: [1] - + '500': dtype: fp32 shape: [5120] - + '501': dtype: fp32 shape: [5120] - + '502': dtype: s8 shape: [13824, 5120] - + aten::linear_3440_bias: dtype: fp32 shape: [13824] - + '503': dtype: fp32 shape: [5120] - + '504': dtype: s8 shape: [13824, 5120] - + aten::linear_3441_bias: dtype: fp32 shape: [13824] - + '505': dtype: fp32 shape: [13824] - + '506': dtype: s8 shape: [5120, 13824] - + aten::linear_3442_bias: dtype: fp32 shape: [5120] - + aten::pow_2551_other: dtype: fp32 shape: [1] - + '507': dtype: fp32 shape: [5120] - + '508': dtype: fp32 shape: [5120] - + '509': dtype: s8 shape: [5120, 5120] - + aten::linear_3443_bias: dtype: fp32 shape: [5120] - + '510': dtype: fp32 shape: [5120] - + '511': dtype: s8 shape: [5120, 5120] - + aten::linear_3444_bias: dtype: fp32 shape: [5120] - + '512': dtype: fp32 shape: [5120] - + '513': dtype: s8 shape: [5120, 5120] - + aten::linear_3445_bias: dtype: fp32 shape: [5120] - + aten::neg_4540_mul_val: dtype: fp32 shape: [1] - + aten::neg_4542_mul_val: dtype: fp32 shape: [1] - + '514': dtype: fp32 shape: [5120] - + '515': dtype: s8 shape: [5120, 5120] - + aten::linear_3446_bias: dtype: fp32 shape: [5120] - + aten::pow_2563_other: dtype: fp32 shape: [1] - + '516': dtype: fp32 shape: [5120] - + '517': dtype: fp32 shape: [5120] - + '518': dtype: s8 shape: [13824, 5120] - + aten::linear_3447_bias: dtype: fp32 shape: [13824] - + '519': dtype: fp32 shape: [5120] - + '520': dtype: s8 shape: [13824, 5120] - + aten::linear_3448_bias: dtype: fp32 shape: [13824] - + '521': dtype: fp32 shape: [13824] - + '522': dtype: s8 shape: [5120, 13824] - + aten::linear_3449_bias: dtype: fp32 shape: [5120] - + aten::pow_2564_other: dtype: fp32 shape: [1] - + '523': dtype: fp32 shape: [5120] - + '524': dtype: fp32 shape: [5120] - + '525': dtype: s8 shape: [5120, 5120] - + aten::linear_3450_bias: dtype: fp32 shape: [5120] - + '526': dtype: fp32 shape: [5120] - + '527': dtype: s8 shape: [5120, 5120] - + aten::linear_3451_bias: dtype: fp32 shape: [5120] - + '528': dtype: fp32 shape: [5120] - + '529': dtype: s8 shape: [5120, 5120] - + aten::linear_3452_bias: dtype: fp32 shape: [5120] - + aten::neg_4572_mul_val: dtype: fp32 shape: [1] - + aten::neg_4574_mul_val: dtype: fp32 shape: [1] - + '530': dtype: fp32 shape: [5120] - + '531': dtype: s8 shape: [5120, 5120] - + aten::linear_3453_bias: dtype: fp32 shape: [5120] - + aten::pow_2576_other: dtype: fp32 shape: [1] - + '532': dtype: fp32 shape: [5120] - + '533': dtype: fp32 shape: [5120] - + '534': dtype: s8 shape: [13824, 5120] - + aten::linear_3454_bias: dtype: fp32 shape: [13824] - + '535': dtype: fp32 shape: [5120] - + '536': dtype: s8 shape: [13824, 5120] - + aten::linear_3455_bias: dtype: fp32 shape: [13824] - + '537': dtype: fp32 shape: [13824] - + '538': dtype: s8 shape: [5120, 13824] - + aten::linear_3456_bias: dtype: fp32 shape: [5120] - + aten::pow_2577_other: dtype: fp32 shape: [1] - + '539': dtype: fp32 shape: [5120] - + '540': dtype: fp32 shape: [5120] - + '541': dtype: s8 shape: [5120, 5120] - + aten::linear_3457_bias: dtype: fp32 shape: [5120] - + '542': dtype: fp32 shape: [5120] - + '543': dtype: s8 shape: [5120, 5120] - + aten::linear_3458_bias: dtype: fp32 shape: [5120] - + '544': dtype: fp32 shape: [5120] - + '545': dtype: s8 shape: [5120, 5120] - + aten::linear_3459_bias: dtype: fp32 shape: [5120] - + aten::neg_4604_mul_val: dtype: fp32 shape: [1] - + aten::neg_4606_mul_val: dtype: fp32 shape: [1] - + '546': dtype: fp32 shape: [5120] - + '547': dtype: s8 shape: [5120, 5120] - + aten::linear_3460_bias: dtype: fp32 shape: [5120] - + aten::pow_2589_other: dtype: fp32 shape: [1] - + '548': dtype: fp32 shape: [5120] - + '549': dtype: fp32 shape: [5120] - + '550': dtype: s8 shape: [13824, 5120] - + aten::linear_3461_bias: dtype: fp32 shape: [13824] - + '551': dtype: fp32 shape: [5120] - + '552': dtype: s8 shape: [13824, 5120] - + aten::linear_3462_bias: dtype: fp32 shape: [13824] - + '553': dtype: fp32 shape: [13824] - + '554': dtype: s8 shape: [5120, 13824] - + aten::linear_3463_bias: dtype: fp32 shape: [5120] - + aten::pow_2590_other: dtype: fp32 shape: [1] - + '555': dtype: fp32 shape: [5120] - + '556': dtype: fp32 shape: [5120] - + '557': dtype: s8 shape: [5120, 5120] - + aten::linear_3464_bias: dtype: fp32 shape: [5120] - + '558': dtype: fp32 shape: [5120] - + '559': dtype: s8 shape: [5120, 5120] - + aten::linear_3465_bias: dtype: fp32 shape: [5120] - + '560': dtype: fp32 shape: [5120] - + '561': dtype: s8 shape: [5120, 5120] - + aten::linear_3466_bias: dtype: fp32 shape: [5120] - + aten::neg_4636_mul_val: dtype: fp32 shape: [1] - + aten::neg_4638_mul_val: dtype: fp32 shape: [1] - + '562': dtype: fp32 shape: [5120] - + '563': dtype: s8 shape: [5120, 5120] - + aten::linear_3467_bias: dtype: fp32 shape: [5120] - + aten::pow_2602_other: dtype: fp32 shape: [1] - + '564': dtype: fp32 shape: [5120] - + '565': dtype: fp32 shape: [5120] - + '566': dtype: s8 shape: [13824, 5120] - + aten::linear_3468_bias: dtype: fp32 shape: [13824] - + '567': dtype: fp32 shape: [5120] - + '568': dtype: s8 shape: [13824, 5120] - + aten::linear_3469_bias: dtype: fp32 shape: [13824] - + '569': dtype: fp32 shape: [13824] - + '570': dtype: s8 shape: [5120, 13824] - + aten::linear_3470_bias: dtype: fp32 shape: [5120] - + aten::pow_2603_other: dtype: fp32 shape: [1] - + '571': dtype: fp32 shape: [5120] - + '572': dtype: fp32 shape: [5120] - + '573': dtype: s8 shape: [5120, 5120] - + aten::linear_3471_bias: dtype: fp32 shape: [5120] - + '574': dtype: fp32 shape: [5120] - + '575': dtype: s8 shape: [5120, 5120] - + aten::linear_3472_bias: dtype: fp32 shape: [5120] - + '576': dtype: fp32 shape: [5120] - + '577': dtype: s8 shape: [5120, 5120] - + aten::linear_3473_bias: dtype: fp32 shape: [5120] - + aten::neg_4668_mul_val: dtype: fp32 shape: [1] - + aten::neg_4670_mul_val: dtype: fp32 shape: [1] - + '578': dtype: fp32 shape: [5120] - + '579': dtype: s8 shape: [5120, 5120] - + aten::linear_3474_bias: dtype: fp32 shape: [5120] - + aten::pow_2615_other: dtype: fp32 shape: [1] - + '580': dtype: fp32 shape: [5120] - + '581': dtype: fp32 shape: [5120] - + '582': dtype: s8 shape: [13824, 5120] - + aten::linear_3475_bias: dtype: fp32 shape: [13824] - + '583': dtype: fp32 shape: [5120] - + '584': dtype: s8 shape: [13824, 5120] - + aten::linear_3476_bias: dtype: fp32 shape: [13824] - + '585': dtype: fp32 shape: [13824] - + '586': dtype: s8 shape: [5120, 13824] - + aten::linear_3477_bias: dtype: fp32 shape: [5120] - + aten::pow_2616_other: dtype: fp32 shape: [1] - + '587': dtype: fp32 shape: [5120] - + '588': dtype: fp32 shape: [5120] - + '589': dtype: s8 shape: [5120, 5120] - + aten::linear_3478_bias: dtype: fp32 shape: [5120] - + '590': dtype: fp32 shape: [5120] - + '591': dtype: s8 shape: [5120, 5120] - + aten::linear_3479_bias: dtype: fp32 shape: [5120] - + '592': dtype: fp32 shape: [5120] - + '593': dtype: s8 shape: [5120, 5120] - + aten::linear_3480_bias: dtype: fp32 shape: [5120] - + aten::neg_4700_mul_val: dtype: fp32 shape: [1] - + aten::neg_4702_mul_val: dtype: fp32 shape: [1] - + '594': dtype: fp32 shape: [5120] - + '595': dtype: s8 shape: [5120, 5120] - + aten::linear_3481_bias: dtype: fp32 shape: [5120] - + aten::pow_2628_other: dtype: fp32 shape: [1] - + '596': dtype: fp32 shape: [5120] - + '597': dtype: fp32 shape: [5120] - + '598': dtype: s8 shape: [13824, 5120] - + aten::linear_3482_bias: dtype: fp32 shape: [13824] - + '599': dtype: fp32 shape: [5120] - + '600': dtype: s8 shape: [13824, 5120] - + aten::linear_3483_bias: dtype: fp32 shape: [13824] - + '601': dtype: fp32 shape: [13824] - + '602': dtype: s8 shape: [5120, 13824] - + aten::linear_3484_bias: dtype: fp32 shape: [5120] - + aten::pow_2629_other: dtype: fp32 shape: [1] - + '603': dtype: fp32 shape: [5120] - + '604': dtype: fp32 shape: [5120] - + '605': dtype: s8 shape: [5120, 5120] - + aten::linear_3485_bias: dtype: fp32 shape: [5120] - + '606': dtype: fp32 shape: [5120] - + '607': dtype: s8 shape: [5120, 5120] - + aten::linear_3486_bias: dtype: fp32 shape: [5120] - + '608': dtype: fp32 shape: [5120] - + '609': dtype: s8 shape: [5120, 5120] - + aten::linear_3487_bias: dtype: fp32 shape: [5120] - + aten::neg_4732_mul_val: dtype: fp32 shape: [1] - + aten::neg_4734_mul_val: dtype: fp32 shape: [1] - + '610': dtype: fp32 shape: [5120] - + '611': dtype: s8 shape: [5120, 5120] - + aten::linear_3488_bias: dtype: fp32 shape: [5120] - + aten::pow_2641_other: dtype: fp32 shape: [1] - + '612': dtype: fp32 shape: [5120] - + '613': dtype: fp32 shape: [5120] - + '614': dtype: s8 shape: [13824, 5120] - + aten::linear_3489_bias: dtype: fp32 shape: [13824] - + '615': dtype: fp32 shape: [5120] - + '616': dtype: s8 shape: [13824, 5120] - + aten::linear_3490_bias: dtype: fp32 shape: [13824] - + '617': dtype: fp32 shape: [13824] - + '618': dtype: s8 shape: [5120, 13824] - + aten::linear_3491_bias: dtype: fp32 shape: [5120] - + aten::pow_2642_other: dtype: fp32 shape: [1] - + '619': dtype: fp32 shape: [5120] - + '620': dtype: fp32 shape: [5120] - + '621': dtype: s8 shape: [5120, 5120] - + aten::linear_3492_bias: dtype: fp32 shape: [5120] - + '622': dtype: fp32 shape: [5120] - + '623': dtype: s8 shape: [5120, 5120] - + aten::linear_3493_bias: dtype: fp32 shape: [5120] - + '624': dtype: fp32 shape: [5120] - + '625': dtype: s8 shape: [5120, 5120] - + aten::linear_3494_bias: dtype: fp32 shape: [5120] - + aten::neg_4764_mul_val: dtype: fp32 shape: [1] - + aten::neg_4766_mul_val: dtype: fp32 shape: [1] - + '626': dtype: fp32 shape: [5120] - + '627': dtype: s8 shape: [5120, 5120] - + aten::linear_3495_bias: dtype: fp32 shape: [5120] - + aten::pow_2654_other: dtype: fp32 shape: [1] - + '628': dtype: fp32 shape: [5120] - + '629': dtype: fp32 shape: [5120] - + '630': dtype: s8 shape: [13824, 5120] - + aten::linear_3496_bias: dtype: fp32 shape: [13824] - + '631': dtype: fp32 shape: [5120] - + '632': dtype: s8 shape: [13824, 5120] - + aten::linear_3497_bias: dtype: fp32 shape: [13824] - + '633': dtype: fp32 shape: [13824] - + '634': dtype: s8 shape: [5120, 13824] - + aten::linear_3498_bias: dtype: fp32 shape: [5120] - + aten::pow_2655_other: dtype: fp32 shape: [1] - + '635': dtype: fp32 shape: [5120] - + '636': dtype: fp32 shape: [5120] - + '637': dtype: s8 shape: [5120, 5120] - + aten::linear_3499_bias: dtype: fp32 shape: [5120] - + '638': dtype: fp32 shape: [5120] - + '639': dtype: s8 shape: [5120, 5120] - + aten::linear_3500_bias: dtype: fp32 shape: [5120] - + '640': dtype: fp32 shape: [5120] - + '641': dtype: s8 shape: [5120, 5120] - + aten::linear_3501_bias: dtype: fp32 shape: [5120] - + aten::neg_4796_mul_val: dtype: fp32 shape: [1] - + aten::neg_4798_mul_val: dtype: fp32 shape: [1] - + '642': dtype: fp32 shape: [5120] - + '643': dtype: s8 shape: [5120, 5120] - + aten::linear_3502_bias: dtype: fp32 shape: [5120] - + aten::pow_2667_other: dtype: fp32 shape: [1] - + '644': dtype: fp32 shape: [5120] - + '645': dtype: fp32 shape: [5120] - + '646': dtype: s8 shape: [13824, 5120] - + aten::linear_3503_bias: dtype: fp32 shape: [13824] - + '647': dtype: fp32 shape: [5120] - + '648': dtype: s8 shape: [13824, 5120] - + aten::linear_3504_bias: dtype: fp32 shape: [13824] - + '649': dtype: fp32 shape: [13824] - + '650': dtype: s8 shape: [5120, 13824] - + aten::linear_3505_bias: dtype: fp32 shape: [5120] - + aten::pow_2668_other: dtype: fp32 shape: [1] - + '651': dtype: fp32 shape: [5120] - + '652': dtype: fp32 shape: [5120] - + '653': dtype: s8 shape: [5120, 5120] - + aten::linear_3506_bias: dtype: fp32 shape: [5120] - + '654': dtype: fp32 shape: [5120] - + '655': dtype: s8 shape: [5120, 5120] - + aten::linear_3507_bias: dtype: fp32 shape: [5120] - + '656': dtype: fp32 shape: [5120] - + '657': dtype: s8 shape: [5120, 5120] - + aten::linear_3508_bias: dtype: fp32 shape: [5120] - + aten::neg_4828_mul_val: dtype: fp32 shape: [1] - + aten::neg_4830_mul_val: dtype: fp32 shape: [1] - + '658': dtype: fp32 shape: [5120] - + '659': dtype: s8 shape: [5120, 5120] - + aten::linear_3509_bias: dtype: fp32 shape: [5120] - + aten::pow_2680_other: dtype: fp32 shape: [1] - + '660': dtype: fp32 shape: [5120] - + '661': dtype: fp32 shape: [5120] - + '662': dtype: s8 shape: [13824, 5120] - + aten::linear_3510_bias: dtype: fp32 shape: [13824] - + '663': dtype: fp32 shape: [5120] - + '664': dtype: s8 shape: [13824, 5120] - + aten::linear_3511_bias: dtype: fp32 shape: [13824] - + '665': dtype: fp32 shape: [13824] - + '666': dtype: s8 shape: [5120, 13824] - + aten::linear_3512_bias: dtype: fp32 shape: [5120] - + aten::pow_2681_other: dtype: fp32 shape: [1] - + '667': dtype: fp32 shape: [5120] - + '668': dtype: fp32 shape: [5120] - + '669': dtype: s8 shape: [5120, 5120] - + aten::linear_3513_bias: dtype: fp32 shape: [5120] - + '670': dtype: fp32 shape: [5120] - + '671': dtype: s8 shape: [5120, 5120] - + aten::linear_3514_bias: dtype: fp32 shape: [5120] - + '672': dtype: fp32 shape: [5120] - + '673': dtype: s8 shape: [5120, 5120] - + aten::linear_3515_bias: dtype: fp32 shape: [5120] - + aten::neg_4860_mul_val: dtype: fp32 shape: [1] - + aten::neg_4862_mul_val: dtype: fp32 shape: [1] - + '674': dtype: fp32 shape: [5120] - + '675': dtype: s8 shape: [5120, 5120] - + aten::linear_3516_bias: dtype: fp32 shape: [5120] - + aten::pow_2693_other: dtype: fp32 shape: [1] - + '676': dtype: fp32 shape: [5120] - + '677': dtype: fp32 shape: [5120] - + '678': dtype: s8 shape: [13824, 5120] - + aten::linear_3517_bias: dtype: fp32 shape: [13824] - + '679': dtype: fp32 shape: [5120] - + '680': dtype: s8 shape: [13824, 5120] - + aten::linear_3518_bias: dtype: fp32 shape: [13824] - + '681': dtype: fp32 shape: [13824] - + '682': dtype: s8 shape: [5120, 13824] - + aten::linear_3519_bias: dtype: fp32 shape: [5120] - + aten::pow_2694_other: dtype: fp32 shape: [1] - + '683': dtype: fp32 shape: [5120] - + '684': dtype: fp32 shape: [5120] - + '685': dtype: s8 shape: [5120, 5120] - + aten::linear_3520_bias: dtype: fp32 shape: [5120] - + '686': dtype: fp32 shape: [5120] - + '687': dtype: s8 shape: [5120, 5120] - + aten::linear_3521_bias: dtype: fp32 shape: [5120] - + '688': dtype: fp32 shape: [5120] - + '689': dtype: s8 shape: [5120, 5120] - + aten::linear_3522_bias: dtype: fp32 shape: [5120] - + aten::neg_4892_mul_val: dtype: fp32 shape: [1] - + aten::neg_4894_mul_val: dtype: fp32 shape: [1] - + '690': dtype: fp32 shape: [5120] - + '691': dtype: s8 shape: [5120, 5120] - + aten::linear_3523_bias: dtype: fp32 shape: [5120] - + aten::pow_2706_other: dtype: fp32 shape: [1] - + '692': dtype: fp32 shape: [5120] - + '693': dtype: fp32 shape: [5120] - + '694': dtype: s8 shape: [13824, 5120] - + aten::linear_3524_bias: dtype: fp32 shape: [13824] - + '695': dtype: fp32 shape: [5120] - + '696': dtype: s8 shape: [13824, 5120] - + aten::linear_3525_bias: dtype: fp32 shape: [13824] - + '697': dtype: fp32 shape: [13824] - + '698': dtype: s8 shape: [5120, 13824] - + aten::linear_3526_bias: dtype: fp32 shape: [5120] - + aten::pow_2707_other: dtype: fp32 shape: [1] - + '699': dtype: fp32 shape: [5120] - + '700': dtype: fp32 shape: [5120] - + '701': dtype: s8 shape: [5120, 5120] - + aten::linear_3527_bias: dtype: fp32 shape: [5120] - + '702': dtype: fp32 shape: [5120] - + '703': dtype: s8 shape: [5120, 5120] - + aten::linear_3528_bias: dtype: fp32 shape: [5120] - + '704': dtype: fp32 shape: [5120] - + '705': dtype: s8 shape: [5120, 5120] - + aten::linear_3529_bias: dtype: fp32 shape: [5120] - + aten::neg_4924_mul_val: dtype: fp32 shape: [1] - + aten::neg_4926_mul_val: dtype: fp32 shape: [1] - + '706': dtype: fp32 shape: [5120] - + '707': dtype: s8 shape: [5120, 5120] - + aten::linear_3530_bias: dtype: fp32 shape: [5120] - + aten::pow_2719_other: dtype: fp32 shape: [1] - + '708': dtype: fp32 shape: [5120] - + '709': dtype: fp32 shape: [5120] - + '710': dtype: s8 shape: [13824, 5120] - + aten::linear_3531_bias: dtype: fp32 shape: [13824] - + '711': dtype: fp32 shape: [5120] - + '712': dtype: s8 shape: [13824, 5120] - + aten::linear_3532_bias: dtype: fp32 shape: [13824] - + '713': dtype: fp32 shape: [13824] - + '714': dtype: s8 shape: [5120, 13824] - + aten::linear_3533_bias: dtype: fp32 shape: [5120] - + aten::pow_2720_other: dtype: fp32 shape: [1] - + '715': dtype: fp32 shape: [5120] - + '716': dtype: fp32 shape: [5120] - + '717': dtype: s8 shape: [5120, 5120] - + aten::linear_3534_bias: dtype: fp32 shape: [5120] - + '718': dtype: fp32 shape: [5120] - + '719': dtype: s8 shape: [5120, 5120] - + aten::linear_3535_bias: dtype: fp32 shape: [5120] - + '720': dtype: fp32 shape: [5120] - + '721': dtype: s8 shape: [5120, 5120] - + aten::linear_3536_bias: dtype: fp32 shape: [5120] - + aten::neg_4956_mul_val: dtype: fp32 shape: [1] - + aten::neg_4958_mul_val: dtype: fp32 shape: [1] - + '722': dtype: fp32 shape: [5120] - + '723': dtype: s8 shape: [5120, 5120] - + aten::linear_3537_bias: dtype: fp32 shape: [5120] - + aten::pow_2732_other: dtype: fp32 shape: [1] - + '724': dtype: fp32 shape: [5120] - + '725': dtype: fp32 shape: [5120] - + '726': dtype: s8 shape: [13824, 5120] - + aten::linear_3538_bias: dtype: fp32 shape: [13824] - + '727': dtype: fp32 shape: [5120] - + '728': dtype: s8 shape: [13824, 5120] - + aten::linear_3539_bias: dtype: fp32 shape: [13824] - + '729': dtype: fp32 shape: [13824] - + '730': dtype: s8 shape: [5120, 13824] - + aten::linear_3540_bias: dtype: fp32 shape: [5120] - + aten::pow_2733_other: dtype: fp32 shape: [1] - + '731': dtype: fp32 shape: [5120] - + '732': dtype: fp32 shape: [5120] - + '733': dtype: s8 shape: [5120, 5120] - + aten::linear_3541_bias: dtype: fp32 shape: [5120] - + '734': dtype: fp32 shape: [5120] - + '735': dtype: s8 shape: [5120, 5120] - + aten::linear_3542_bias: dtype: fp32 shape: [5120] - + '736': dtype: fp32 shape: [5120] - + '737': dtype: s8 shape: [5120, 5120] - + aten::linear_3543_bias: dtype: fp32 shape: [5120] - + aten::neg_4988_mul_val: dtype: fp32 shape: [1] - + aten::neg_4990_mul_val: dtype: fp32 shape: [1] - + '738': dtype: fp32 shape: [5120] - + '739': dtype: s8 shape: [5120, 5120] - + aten::linear_3544_bias: dtype: fp32 shape: [5120] - + aten::pow_2745_other: dtype: fp32 shape: [1] - + '740': dtype: fp32 shape: [5120] - + '741': dtype: fp32 shape: [5120] - + '742': dtype: s8 shape: [13824, 5120] - + aten::linear_3545_bias: dtype: fp32 shape: [13824] - + '743': dtype: fp32 shape: [5120] - + '744': dtype: s8 shape: [13824, 5120] - + aten::linear_3546_bias: dtype: fp32 shape: [13824] - + '745': dtype: fp32 shape: [13824] - + '746': dtype: s8 shape: [5120, 13824] - + aten::linear_3547_bias: dtype: fp32 shape: [5120] - + aten::pow_2746_other: dtype: fp32 shape: [1] - + '747': dtype: fp32 shape: [5120] - + '748': dtype: fp32 shape: [5120] - + '749': dtype: s8 shape: [5120, 5120] - + aten::linear_3548_bias: dtype: fp32 shape: [5120] - + '750': dtype: fp32 shape: [5120] - + '751': dtype: s8 shape: [5120, 5120] - + aten::linear_3549_bias: dtype: fp32 shape: [5120] - + '752': dtype: fp32 shape: [5120] - + '753': dtype: s8 shape: [5120, 5120] - + aten::linear_3550_bias: dtype: fp32 shape: [5120] - + aten::neg_5020_mul_val: dtype: fp32 shape: [1] - + aten::neg_5022_mul_val: dtype: fp32 shape: [1] - + '754': dtype: fp32 shape: [5120] - + '755': dtype: s8 shape: [5120, 5120] - + aten::linear_3551_bias: dtype: fp32 shape: [5120] - + aten::pow_2758_other: dtype: fp32 shape: [1] - + '756': dtype: fp32 shape: [5120] - + '757': dtype: fp32 shape: [5120] - + '758': dtype: s8 shape: [13824, 5120] - + aten::linear_3552_bias: dtype: fp32 shape: [13824] - + '759': dtype: fp32 shape: [5120] - + '760': dtype: s8 shape: [13824, 5120] - + aten::linear_3553_bias: dtype: fp32 shape: [13824] - + '761': dtype: fp32 shape: [13824] - + '762': dtype: s8 shape: [5120, 13824] - + aten::linear_3554_bias: dtype: fp32 shape: [5120] - + aten::pow_2759_other: dtype: fp32 shape: [1] - + '763': dtype: fp32 shape: [5120] - + '764': dtype: fp32 shape: [5120] - + '765': dtype: s8 shape: [5120, 5120] - + aten::linear_3555_bias: dtype: fp32 shape: [5120] - + '766': dtype: fp32 shape: [5120] - + '767': dtype: s8 shape: [5120, 5120] - + aten::linear_3556_bias: dtype: fp32 shape: [5120] - + '768': dtype: fp32 shape: [5120] - + '769': dtype: s8 shape: [5120, 5120] - + aten::linear_3557_bias: dtype: fp32 shape: [5120] - + aten::neg_5052_mul_val: dtype: fp32 shape: [1] - + aten::neg_5054_mul_val: dtype: fp32 shape: [1] - + '770': dtype: fp32 shape: [5120] - + '771': dtype: s8 shape: [5120, 5120] - + aten::linear_3558_bias: dtype: fp32 shape: [5120] - + aten::pow_2771_other: dtype: fp32 shape: [1] - + '772': dtype: fp32 shape: [5120] - + '773': dtype: fp32 shape: [5120] - + '774': dtype: s8 shape: [13824, 5120] - + aten::linear_3559_bias: dtype: fp32 shape: [13824] - + '775': dtype: fp32 shape: [5120] - + '776': dtype: s8 shape: [13824, 5120] - + aten::linear_3560_bias: dtype: fp32 shape: [13824] - + '777': dtype: fp32 shape: [13824] - + '778': dtype: s8 shape: [5120, 13824] - + aten::linear_3561_bias: dtype: fp32 shape: [5120] - + aten::pow_2772_other: dtype: fp32 shape: [1] - + '779': dtype: fp32 shape: [5120] - + '780': dtype: fp32 shape: [5120] - + '781': dtype: s8 shape: [5120, 5120] - + aten::linear_3562_bias: dtype: fp32 shape: [5120] - + '782': dtype: fp32 shape: [5120] - + '783': dtype: s8 shape: [5120, 5120] - + aten::linear_3563_bias: dtype: fp32 shape: [5120] - + '784': dtype: fp32 shape: [5120] - + '785': dtype: s8 shape: [5120, 5120] - + aten::linear_3564_bias: dtype: fp32 shape: [5120] - + aten::neg_5084_mul_val: dtype: fp32 shape: [1] - + aten::neg_5086_mul_val: dtype: fp32 shape: [1] - + '786': dtype: fp32 shape: [5120] - + '787': dtype: s8 shape: [5120, 5120] - + aten::linear_3565_bias: dtype: fp32 shape: [5120] - + aten::pow_2784_other: dtype: fp32 shape: [1] - + '788': dtype: fp32 shape: [5120] - + '789': dtype: fp32 shape: [5120] - + '790': dtype: s8 shape: [13824, 5120] - + aten::linear_3566_bias: dtype: fp32 shape: [13824] - + '791': dtype: fp32 shape: [5120] - + '792': dtype: s8 shape: [13824, 5120] - + aten::linear_3567_bias: dtype: fp32 shape: [13824] - + '793': dtype: fp32 shape: [13824] - + '794': dtype: s8 shape: [5120, 13824] - + aten::linear_3568_bias: dtype: fp32 shape: [5120] - + aten::pow_2785_other: dtype: fp32 shape: [1] - + '795': dtype: fp32 shape: [5120] - + '796': dtype: fp32 shape: [5120] - + '797': dtype: s8 shape: [5120, 5120] - + aten::linear_3569_bias: dtype: fp32 shape: [5120] - + '798': dtype: fp32 shape: [5120] - + '799': dtype: s8 shape: [5120, 5120] - + aten::linear_3570_bias: dtype: fp32 shape: [5120] - + '800': dtype: fp32 shape: [5120] - + '801': dtype: s8 shape: [5120, 5120] - + aten::linear_3571_bias: dtype: fp32 shape: [5120] - + aten::neg_5116_mul_val: dtype: fp32 shape: [1] - + aten::neg_5118_mul_val: dtype: fp32 shape: [1] - + '802': dtype: fp32 shape: [5120] - + '803': dtype: s8 shape: [5120, 5120] - + aten::linear_3572_bias: dtype: fp32 shape: [5120] - + aten::pow_2797_other: dtype: fp32 shape: [1] - + '804': dtype: fp32 shape: [5120] - + '805': dtype: fp32 shape: [5120] - + '806': dtype: s8 shape: [13824, 5120] - + aten::linear_3573_bias: dtype: fp32 shape: [13824] - + '807': dtype: fp32 shape: [5120] - + '808': dtype: s8 shape: [13824, 5120] - + aten::linear_3574_bias: dtype: fp32 shape: [13824] - + '809': dtype: fp32 shape: [13824] - + '810': dtype: s8 shape: [5120, 13824] - + aten::linear_3575_bias: dtype: fp32 shape: [5120] - + aten::pow_2798_other: dtype: fp32 shape: [1] - + '811': dtype: fp32 shape: [5120] - + '812': dtype: fp32 shape: [5120] - + '813': dtype: s8 shape: [5120, 5120] - + aten::linear_3576_bias: dtype: fp32 shape: [5120] - + '814': dtype: fp32 shape: [5120] - + '815': dtype: s8 shape: [5120, 5120] - + aten::linear_3577_bias: dtype: fp32 shape: [5120] - + '816': dtype: fp32 shape: [5120] - + '817': dtype: s8 shape: [5120, 5120] - + aten::linear_3578_bias: dtype: fp32 shape: [5120] - + aten::neg_5148_mul_val: dtype: fp32 shape: [1] - + aten::neg_5150_mul_val: dtype: fp32 shape: [1] - + '818': dtype: fp32 shape: [5120] - + '819': dtype: s8 shape: [5120, 5120] - + aten::linear_3579_bias: dtype: fp32 shape: [5120] - + aten::pow_2810_other: dtype: fp32 shape: [1] - + '820': dtype: fp32 shape: [5120] - + '821': dtype: fp32 shape: [5120] - + '822': dtype: s8 shape: [13824, 5120] - + aten::linear_3580_bias: dtype: fp32 shape: [13824] - + '823': dtype: fp32 shape: [5120] - + '824': dtype: s8 shape: [13824, 5120] - + aten::linear_3581_bias: dtype: fp32 shape: [13824] - + '825': dtype: fp32 shape: [13824] - + '826': dtype: s8 shape: [5120, 13824] - + aten::linear_3582_bias: dtype: fp32 shape: [5120] - + aten::pow_2811_other: dtype: fp32 shape: [1] - + '827': dtype: fp32 shape: [5120] - + '828': dtype: fp32 shape: [5120] - + '829': dtype: s8 shape: [5120, 5120] - + aten::linear_3583_bias: dtype: fp32 shape: [5120] - + '830': dtype: fp32 shape: [5120] - + '831': dtype: s8 shape: [5120, 5120] - + aten::linear_3584_bias: dtype: fp32 shape: [5120] - + '832': dtype: fp32 shape: [5120] - + '833': dtype: s8 shape: [5120, 5120] - + aten::linear_3585_bias: dtype: fp32 shape: [5120] - + aten::neg_5180_mul_val: dtype: fp32 shape: [1] - + aten::neg_5182_mul_val: dtype: fp32 shape: [1] - + '834': dtype: fp32 shape: [5120] - + '835': dtype: s8 shape: [5120, 5120] - + aten::linear_3586_bias: dtype: fp32 shape: [5120] - + aten::pow_2823_other: dtype: fp32 shape: [1] - + '836': dtype: fp32 shape: [5120] - + '837': dtype: fp32 shape: [5120] - + '838': dtype: s8 shape: [13824, 5120] - + aten::linear_3587_bias: dtype: fp32 shape: [13824] - + '839': dtype: fp32 shape: [5120] - + '840': dtype: s8 shape: [13824, 5120] - + aten::linear_3588_bias: dtype: fp32 shape: [13824] - + '841': dtype: fp32 shape: [13824] - + '842': dtype: s8 shape: [5120, 13824] - + aten::linear_3589_bias: dtype: fp32 shape: [5120] - + aten::pow_2824_other: dtype: fp32 shape: [1] - + '843': dtype: fp32 shape: [5120] - + '844': dtype: fp32 shape: [5120] - + '845': dtype: s8 shape: [5120, 5120] - + aten::linear_3590_bias: dtype: fp32 shape: [5120] - + '846': dtype: fp32 shape: [5120] - + '847': dtype: s8 shape: [5120, 5120] - + aten::linear_3591_bias: dtype: fp32 shape: [5120] - + '848': dtype: fp32 shape: [5120] - + '849': dtype: s8 shape: [5120, 5120] - + aten::linear_3592_bias: dtype: fp32 shape: [5120] - + aten::neg_5212_mul_val: dtype: fp32 shape: [1] - + aten::neg_5214_mul_val: dtype: fp32 shape: [1] - + '850': dtype: fp32 shape: [5120] - + '851': dtype: s8 shape: [5120, 5120] - + aten::linear_3593_bias: dtype: fp32 shape: [5120] - + aten::pow_2836_other: dtype: fp32 shape: [1] - + '852': dtype: fp32 shape: [5120] - + '853': dtype: fp32 shape: [5120] - + '854': dtype: s8 shape: [13824, 5120] - + aten::linear_3594_bias: dtype: fp32 shape: [13824] - + '855': dtype: fp32 shape: [5120] - + '856': dtype: s8 shape: [13824, 5120] - + aten::linear_3595_bias: dtype: fp32 shape: [13824] - + '857': dtype: fp32 shape: [13824] - + '858': dtype: s8 shape: [5120, 13824] - + aten::linear_3596_bias: dtype: fp32 shape: [5120] - + aten::pow_2837_other: dtype: fp32 shape: [1] - + '859': dtype: fp32 shape: [5120] - + '860': dtype: fp32 shape: [5120] - + '861': dtype: s8 shape: [5120, 5120] - + aten::linear_3597_bias: dtype: fp32 shape: [5120] - + '862': dtype: fp32 shape: [5120] - + '863': dtype: s8 shape: [5120, 5120] - + aten::linear_3598_bias: dtype: fp32 shape: [5120] - + '864': dtype: fp32 shape: [5120] - + '865': dtype: s8 shape: [5120, 5120] - + aten::linear_3599_bias: dtype: fp32 shape: [5120] - + aten::neg_5244_mul_val: dtype: fp32 shape: [1] - + aten::neg_5246_mul_val: dtype: fp32 shape: [1] - + '866': dtype: fp32 shape: [5120] - + '867': dtype: s8 shape: [5120, 5120] - + aten::linear_3600_bias: dtype: fp32 shape: [5120] - + aten::pow_2849_other: dtype: fp32 shape: [1] - + '868': dtype: fp32 shape: [5120] - + '869': dtype: fp32 shape: [5120] - + '870': dtype: s8 shape: [13824, 5120] - + aten::linear_3601_bias: dtype: fp32 shape: [13824] - + '871': dtype: fp32 shape: [5120] - + '872': dtype: s8 shape: [13824, 5120] - + aten::linear_3602_bias: dtype: fp32 shape: [13824] - + '873': dtype: fp32 shape: [13824] - + '874': dtype: s8 shape: [5120, 13824] - + aten::linear_3603_bias: dtype: fp32 shape: [5120] - + aten::pow_2850_other: dtype: fp32 shape: [1] - + '875': dtype: fp32 shape: [5120] - + '876': dtype: fp32 shape: [5120] - + '877': dtype: s8 shape: [5120, 5120] - + aten::linear_3604_bias: dtype: fp32 shape: [5120] - + '878': dtype: fp32 shape: [5120] - + '879': dtype: s8 shape: [5120, 5120] - + aten::linear_3605_bias: dtype: fp32 shape: [5120] - + '880': dtype: fp32 shape: [5120] - + '881': dtype: s8 shape: [5120, 5120] - + aten::linear_3606_bias: dtype: fp32 shape: [5120] - + aten::neg_5276_mul_val: dtype: fp32 shape: [1] - + aten::neg_5278_mul_val: dtype: fp32 shape: [1] - + '882': dtype: fp32 shape: [5120] - + '883': dtype: s8 shape: [5120, 5120] - + aten::linear_3607_bias: dtype: fp32 shape: [5120] - + aten::pow_2862_other: dtype: fp32 shape: [1] - + '884': dtype: fp32 shape: [5120] - + '885': dtype: fp32 shape: [5120] - + '886': dtype: s8 shape: [13824, 5120] - + aten::linear_3608_bias: dtype: fp32 shape: [13824] - + '887': dtype: fp32 shape: [5120] - + '888': dtype: s8 shape: [13824, 5120] - + aten::linear_3609_bias: dtype: fp32 shape: [13824] - + '889': dtype: fp32 shape: [13824] - + '890': dtype: s8 shape: [5120, 13824] - + aten::linear_3610_bias: dtype: fp32 shape: [5120] - + aten::pow_2863_other: dtype: fp32 shape: [1] - + '891': dtype: fp32 shape: [5120] - + '892': dtype: fp32 shape: [5120] - + '893': dtype: s8 shape: [5120, 5120] - + aten::linear_3611_bias: dtype: fp32 shape: [5120] - + '894': dtype: fp32 shape: [5120] - + '895': dtype: s8 shape: [5120, 5120] - + aten::linear_3612_bias: dtype: fp32 shape: [5120] - + '896': dtype: fp32 shape: [5120] - + '897': dtype: s8 shape: [5120, 5120] - + aten::linear_3613_bias: dtype: fp32 shape: [5120] - + aten::neg_5308_mul_val: dtype: fp32 shape: [1] - + aten::neg_5310_mul_val: dtype: fp32 shape: [1] - + '898': dtype: fp32 shape: [5120] - + '899': dtype: s8 shape: [5120, 5120] - + aten::linear_3614_bias: dtype: fp32 shape: [5120] - + aten::pow_2875_other: dtype: fp32 shape: [1] - + '900': dtype: fp32 shape: [5120] - + '901': dtype: fp32 shape: [5120] - + '902': dtype: s8 shape: [13824, 5120] - + aten::linear_3615_bias: dtype: fp32 shape: [13824] - + '903': dtype: fp32 shape: [5120] - + '904': dtype: s8 shape: [13824, 5120] - + aten::linear_3616_bias: dtype: fp32 shape: [13824] - + '905': dtype: fp32 shape: [13824] - + '906': dtype: s8 shape: [5120, 13824] - + aten::linear_3617_bias: dtype: fp32 shape: [5120] - + aten::pow_2876_other: dtype: fp32 shape: [1] - + '907': dtype: fp32 shape: [5120] - + '908': dtype: fp32 shape: [5120] - + '909': dtype: s8 shape: [5120, 5120] - + aten::linear_3618_bias: dtype: fp32 shape: [5120] - + '910': dtype: fp32 shape: [5120] - + '911': dtype: s8 shape: [5120, 5120] - + aten::linear_3619_bias: dtype: fp32 shape: [5120] - + '912': dtype: fp32 shape: [5120] - + '913': dtype: s8 shape: [5120, 5120] - + aten::linear_3620_bias: dtype: fp32 shape: [5120] - + aten::neg_5340_mul_val: dtype: fp32 shape: [1] - + aten::neg_5342_mul_val: dtype: fp32 shape: [1] - + '914': dtype: fp32 shape: [5120] - + '915': dtype: s8 shape: [5120, 5120] - + aten::linear_3621_bias: dtype: fp32 shape: [5120] - + aten::pow_2888_other: dtype: fp32 shape: [1] - + '916': dtype: fp32 shape: [5120] - + '917': dtype: fp32 shape: [5120] - + '918': dtype: s8 shape: [13824, 5120] - + aten::linear_3622_bias: dtype: fp32 shape: [13824] - + '919': dtype: fp32 shape: [5120] - + '920': dtype: s8 shape: [13824, 5120] - + aten::linear_3623_bias: dtype: fp32 shape: [13824] - + '921': dtype: fp32 shape: [13824] - + '922': dtype: s8 shape: [5120, 13824] - + aten::linear_3624_bias: dtype: fp32 shape: [5120] - + aten::pow_2889_other: dtype: fp32 shape: [1] - + '923': dtype: fp32 shape: [5120] - + '924': dtype: fp32 shape: [5120] - + '925': dtype: s8 shape: [5120, 5120] - + aten::linear_3625_bias: dtype: fp32 shape: [5120] - + '926': dtype: fp32 shape: [5120] - + '927': dtype: s8 shape: [5120, 5120] - + aten::linear_3626_bias: dtype: fp32 shape: [5120] - + '928': dtype: fp32 shape: [5120] - + '929': dtype: s8 shape: [5120, 5120] - + aten::linear_3627_bias: dtype: fp32 shape: [5120] - + aten::neg_5372_mul_val: dtype: fp32 shape: [1] - + aten::neg_5374_mul_val: dtype: fp32 shape: [1] - + '930': dtype: fp32 shape: [5120] - + '931': dtype: s8 shape: [5120, 5120] - + aten::linear_3628_bias: dtype: fp32 shape: [5120] - + aten::pow_2901_other: dtype: fp32 shape: [1] - + '932': dtype: fp32 shape: [5120] - + '933': dtype: fp32 shape: [5120] - + '934': dtype: s8 shape: [13824, 5120] - + aten::linear_3629_bias: dtype: fp32 shape: [13824] - + '935': dtype: fp32 shape: [5120] - + '936': dtype: s8 shape: [13824, 5120] - + aten::linear_3630_bias: dtype: fp32 shape: [13824] - + '937': dtype: fp32 shape: [13824] - + '938': dtype: s8 shape: [5120, 13824] - + aten::linear_3631_bias: dtype: fp32 shape: [5120] - + aten::pow_2902_other: dtype: fp32 shape: [1] - + '939': dtype: fp32 shape: [5120] - + '940': dtype: fp32 shape: [5120] - + '941': dtype: s8 shape: [5120, 5120] - + aten::linear_3632_bias: dtype: fp32 shape: [5120] - + '942': dtype: fp32 shape: [5120] - + '943': dtype: s8 shape: [5120, 5120] - + aten::linear_3633_bias: dtype: fp32 shape: [5120] - + '944': dtype: fp32 shape: [5120] - + '945': dtype: s8 shape: [5120, 5120] - + aten::linear_3634_bias: dtype: fp32 shape: [5120] - + aten::neg_5404_mul_val: dtype: fp32 shape: [1] - + aten::neg_5406_mul_val: dtype: fp32 shape: [1] - + '946': dtype: fp32 shape: [5120] - + '947': dtype: s8 shape: [5120, 5120] - + aten::linear_3635_bias: dtype: fp32 shape: [5120] - + aten::pow_2914_other: dtype: fp32 shape: [1] - + '948': dtype: fp32 shape: [5120] - + '949': dtype: fp32 shape: [5120] - + '950': dtype: s8 shape: [13824, 5120] - + aten::linear_3636_bias: dtype: fp32 shape: [13824] - + '951': dtype: fp32 shape: [5120] - + '952': dtype: s8 shape: [13824, 5120] - + aten::linear_3637_bias: dtype: fp32 shape: [13824] - + '953': dtype: fp32 shape: [13824] - + '954': dtype: s8 shape: [5120, 13824] - + aten::linear_3638_bias: dtype: fp32 shape: [5120] - + aten::pow_2915_other: dtype: fp32 shape: [1] - + '955': dtype: fp32 shape: [5120] - + '956': dtype: fp32 shape: [5120] - + '957': dtype: s8 shape: [5120, 5120] - + aten::linear_3639_bias: dtype: fp32 shape: [5120] - + '958': dtype: fp32 shape: [5120] - + '959': dtype: s8 shape: [5120, 5120] - + aten::linear_3640_bias: dtype: fp32 shape: [5120] - + '960': dtype: fp32 shape: [5120] - + '961': dtype: s8 shape: [5120, 5120] - + aten::linear_3641_bias: dtype: fp32 shape: [5120] - + aten::neg_5436_mul_val: dtype: fp32 shape: [1] - + aten::neg_5438_mul_val: dtype: fp32 shape: [1] - + '962': dtype: fp32 shape: [5120] - + '963': dtype: s8 shape: [5120, 5120] - + aten::linear_3642_bias: dtype: fp32 shape: [5120] - + aten::pow_2927_other: dtype: fp32 shape: [1] - + '964': dtype: fp32 shape: [5120] - + '965': dtype: fp32 shape: [5120] - + '966': dtype: s8 shape: [13824, 5120] - + aten::linear_3643_bias: dtype: fp32 shape: [13824] - + '967': dtype: fp32 shape: [5120] - + '968': dtype: s8 shape: [13824, 5120] - + aten::linear_3644_bias: dtype: fp32 shape: [13824] - + '969': dtype: fp32 shape: [13824] - + '970': dtype: s8 shape: [5120, 13824] - + aten::linear_3645_bias: dtype: fp32 shape: [5120] - + aten::pow_2928_other: dtype: fp32 shape: [1] - + '971': dtype: fp32 shape: [5120] - + '972': dtype: fp32 shape: [5120] - + '973': dtype: s8 shape: [5120, 5120] - + aten::linear_3646_bias: dtype: fp32 shape: [5120] - + '974': dtype: fp32 shape: [5120] - + '975': dtype: s8 shape: [5120, 5120] - + aten::linear_3647_bias: dtype: fp32 shape: [5120] - + '976': dtype: fp32 shape: [5120] - + '977': dtype: s8 shape: [5120, 5120] - + aten::linear_3648_bias: dtype: fp32 shape: [5120] - + aten::neg_5468_mul_val: dtype: fp32 shape: [1] - + aten::neg_5470_mul_val: dtype: fp32 shape: [1] - + '978': dtype: fp32 shape: [5120] - + '979': dtype: s8 shape: [5120, 5120] - + aten::linear_3649_bias: dtype: fp32 shape: [5120] - + aten::pow_2940_other: dtype: fp32 shape: [1] - + '980': dtype: fp32 shape: [5120] - + '981': dtype: fp32 shape: [5120] - + '982': dtype: s8 shape: [13824, 5120] - + aten::linear_3650_bias: dtype: fp32 shape: [13824] - + '983': dtype: fp32 shape: [5120] - + '984': dtype: s8 shape: [13824, 5120] - + aten::linear_3651_bias: dtype: fp32 shape: [13824] - + '985': dtype: fp32 shape: [13824] - + '986': dtype: s8 shape: [5120, 13824] - + aten::linear_3652_bias: dtype: fp32 shape: [5120] - + aten::pow_2941_other: dtype: fp32 shape: [1] - + '987': dtype: fp32 shape: [5120] - + '988': dtype: fp32 shape: [5120] - + '989': dtype: s8 shape: [5120, 5120] - + aten::linear_3653_bias: dtype: fp32 shape: [5120] - + '990': dtype: fp32 shape: [5120] - + '991': dtype: s8 shape: [5120, 5120] - + aten::linear_3654_bias: dtype: fp32 shape: [5120] - + '992': dtype: fp32 shape: [5120] - + '993': dtype: s8 shape: [5120, 5120] - + aten::linear_3655_bias: dtype: fp32 shape: [5120] - + aten::neg_5500_mul_val: dtype: fp32 shape: [1] - + aten::neg_5502_mul_val: dtype: fp32 shape: [1] - + '994': dtype: fp32 shape: [5120] - + '995': dtype: s8 shape: [5120, 5120] - + aten::linear_3656_bias: dtype: fp32 shape: [5120] - + aten::pow_2953_other: dtype: fp32 shape: [1] - + '996': dtype: fp32 shape: [5120] - + '997': dtype: fp32 shape: [5120] - + '998': dtype: s8 shape: [13824, 5120] - + aten::linear_3657_bias: dtype: fp32 shape: [13824] - + '999': dtype: fp32 shape: [5120] - + '1000': dtype: s8 shape: [13824, 5120] - + aten::linear_3658_bias: dtype: fp32 shape: [13824] - + '1001': dtype: fp32 shape: [13824] - + '1002': dtype: s8 shape: [5120, 13824] - + aten::linear_3659_bias: dtype: fp32 shape: [5120] - + aten::pow_2954_other: dtype: fp32 shape: [1] - + '1003': dtype: fp32 shape: [5120] - + '1004': dtype: fp32 shape: [5120] - + '1005': dtype: s8 shape: [5120, 5120] - + aten::linear_3660_bias: dtype: fp32 shape: [5120] - + '1006': dtype: fp32 shape: [5120] - + '1007': dtype: s8 shape: [5120, 5120] - + aten::linear_3661_bias: dtype: fp32 shape: [5120] - + '1008': dtype: fp32 shape: [5120] - + '1009': dtype: s8 shape: [5120, 5120] - + aten::linear_3662_bias: dtype: fp32 shape: [5120] - + aten::neg_5532_mul_val: dtype: fp32 shape: [1] - + aten::neg_5534_mul_val: dtype: fp32 shape: [1] - + '1010': dtype: fp32 shape: [5120] - + '1011': dtype: s8 shape: [5120, 5120] - + aten::linear_3663_bias: dtype: fp32 shape: [5120] - + aten::pow_2966_other: dtype: fp32 shape: [1] - + '1012': dtype: fp32 shape: [5120] - + '1013': dtype: fp32 shape: [5120] - + '1014': dtype: s8 shape: [13824, 5120] - + aten::linear_3664_bias: dtype: fp32 shape: [13824] - + '1015': dtype: fp32 shape: [5120] - + '1016': dtype: s8 shape: [13824, 5120] - + aten::linear_3665_bias: dtype: fp32 shape: [13824] - + '1017': dtype: fp32 shape: [13824] - + '1018': dtype: s8 shape: [5120, 13824] - + aten::linear_3666_bias: dtype: fp32 shape: [5120] - + aten::pow_2967_other: dtype: fp32 shape: [1] - + '1019': dtype: fp32 shape: [5120] - + '1020': dtype: fp32 shape: [5120] - + '1021': dtype: s8 shape: [5120, 5120] - + aten::linear_3667_bias: dtype: fp32 shape: [5120] - + '1022': dtype: fp32 shape: [5120] - + '1023': dtype: s8 shape: [5120, 5120] - + aten::linear_3668_bias: dtype: fp32 shape: [5120] - + '1024': dtype: fp32 shape: [5120] - + '1025': dtype: s8 shape: [5120, 5120] - + aten::linear_3669_bias: dtype: fp32 shape: [5120] - + aten::neg_5564_mul_val: dtype: fp32 shape: [1] - + aten::neg_5566_mul_val: dtype: fp32 shape: [1] - + '1026': dtype: fp32 shape: [5120] - + '1027': dtype: s8 shape: [5120, 5120] - + aten::linear_3670_bias: dtype: fp32 shape: [5120] - + aten::pow_2979_other: dtype: fp32 shape: [1] - + '1028': dtype: fp32 shape: [5120] - + '1029': dtype: fp32 shape: [5120] - + '1030': dtype: s8 shape: [13824, 5120] - + aten::linear_3671_bias: dtype: fp32 shape: [13824] - + '1031': dtype: fp32 shape: [5120] - + '1032': dtype: s8 shape: [13824, 5120] - + aten::linear_3672_bias: dtype: fp32 shape: [13824] - + '1033': dtype: fp32 shape: [13824] - + '1034': dtype: s8 shape: [5120, 13824] - + aten::linear_3673_bias: dtype: fp32 shape: [5120] - + aten::pow_2980_other: dtype: fp32 shape: [1] - + '1035': dtype: fp32 shape: [5120] - + '1036': dtype: fp32 shape: [5120] - + '1037': dtype: s8 shape: [5120, 5120] - + aten::linear_3674_bias: dtype: fp32 shape: [5120] - + '1038': dtype: fp32 shape: [5120] - + '1039': dtype: s8 shape: [5120, 5120] - + aten::linear_3675_bias: dtype: fp32 shape: [5120] - + '1040': dtype: fp32 shape: [5120] - + '1041': dtype: s8 shape: [5120, 5120] - + aten::linear_3676_bias: dtype: fp32 shape: [5120] - + aten::neg_5596_mul_val: dtype: fp32 shape: [1] - + aten::neg_5598_mul_val: dtype: fp32 shape: [1] - + '1042': dtype: fp32 shape: [5120] - + '1043': dtype: s8 shape: [5120, 5120] - + aten::linear_3677_bias: dtype: fp32 shape: [5120] - + aten::pow_2992_other: dtype: fp32 shape: [1] - + '1044': dtype: fp32 shape: [5120] - + '1045': dtype: fp32 shape: [5120] - + '1046': dtype: s8 shape: [13824, 5120] - + aten::linear_3678_bias: dtype: fp32 shape: [13824] - + '1047': dtype: fp32 shape: [5120] - + '1048': dtype: s8 shape: [13824, 5120] - + aten::linear_3679_bias: dtype: fp32 shape: [13824] - + '1049': dtype: fp32 shape: [13824] - + '1050': dtype: s8 shape: [5120, 13824] - + aten::linear_3680_bias: dtype: fp32 shape: [5120] - + aten::pow_2993_other: dtype: fp32 shape: [1] - + '1051': dtype: fp32 shape: [5120] - + '1052': dtype: fp32 shape: [5120] - + '1053': dtype: s8 shape: [5120, 5120] - + aten::linear_3681_bias: dtype: fp32 shape: [5120] - + '1054': dtype: fp32 shape: [5120] - + '1055': dtype: s8 shape: [5120, 5120] - + aten::linear_3682_bias: dtype: fp32 shape: [5120] - + '1056': dtype: fp32 shape: [5120] - + '1057': dtype: s8 shape: [5120, 5120] - + aten::linear_3683_bias: dtype: fp32 shape: [5120] - + aten::neg_5628_mul_val: dtype: fp32 shape: [1] - + aten::neg_5630_mul_val: dtype: fp32 shape: [1] - + '1058': dtype: fp32 shape: [5120] - + '1059': dtype: s8 shape: [5120, 5120] - + aten::linear_3684_bias: dtype: fp32 shape: [5120] - + aten::pow_3005_other: dtype: fp32 shape: [1] - + '1060': dtype: fp32 shape: [5120] - + '1061': dtype: fp32 shape: [5120] - + '1062': dtype: s8 shape: [13824, 5120] - + aten::linear_3685_bias: dtype: fp32 shape: [13824] - + '1063': dtype: fp32 shape: [5120] - + '1064': dtype: s8 shape: [13824, 5120] - + aten::linear_3686_bias: dtype: fp32 shape: [13824] - + '1065': dtype: fp32 shape: [13824] - + '1066': dtype: s8 shape: [5120, 13824] - + aten::linear_3687_bias: dtype: fp32 shape: [5120] - + aten::pow_3006_other: dtype: fp32 shape: [1] - + '1067': dtype: fp32 shape: [5120] - + '1068': dtype: fp32 shape: [5120] - + '1069': dtype: s8 shape: [5120, 5120] - + aten::linear_3688_bias: dtype: fp32 shape: [5120] - + '1070': dtype: fp32 shape: [5120] - + '1071': dtype: s8 shape: [5120, 5120] - + aten::linear_3689_bias: dtype: fp32 shape: [5120] - + '1072': dtype: fp32 shape: [5120] - + '1073': dtype: s8 shape: [5120, 5120] - + aten::linear_3690_bias: dtype: fp32 shape: [5120] - + aten::neg_5660_mul_val: dtype: fp32 shape: [1] - + aten::neg_5662_mul_val: dtype: fp32 shape: [1] - + '1074': dtype: fp32 shape: [5120] - + '1075': dtype: s8 shape: [5120, 5120] - + aten::linear_3691_bias: dtype: fp32 shape: [5120] - + aten::pow_3018_other: dtype: fp32 shape: [1] - + '1076': dtype: fp32 shape: [5120] - + '1077': dtype: fp32 shape: [5120] - + '1078': dtype: s8 shape: [13824, 5120] - + aten::linear_3692_bias: dtype: fp32 shape: [13824] - + '1079': dtype: fp32 shape: [5120] - + '1080': dtype: s8 shape: [13824, 5120] - + aten::linear_3693_bias: dtype: fp32 shape: [13824] - + '1081': dtype: fp32 shape: [13824] - + '1082': dtype: s8 shape: [5120, 13824] - + aten::linear_3694_bias: dtype: fp32 shape: [5120] - + aten::pow_3019_other: dtype: fp32 shape: [1] - + '1083': dtype: fp32 shape: [5120] - + '1084': dtype: fp32 shape: [5120] - + '1085': dtype: s8 shape: [5120, 5120] - + aten::linear_3695_bias: dtype: fp32 shape: [5120] - + '1086': dtype: fp32 shape: [5120] - + '1087': dtype: s8 shape: [5120, 5120] - + aten::linear_3696_bias: dtype: fp32 shape: [5120] - + '1088': dtype: fp32 shape: [5120] - + '1089': dtype: s8 shape: [5120, 5120] - + aten::linear_3697_bias: dtype: fp32 shape: [5120] - + aten::neg_5692_mul_val: dtype: fp32 shape: [1] - + aten::neg_5694_mul_val: dtype: fp32 shape: [1] - + '1090': dtype: fp32 shape: [5120] - + '1091': dtype: s8 shape: [5120, 5120] - + aten::linear_3698_bias: dtype: fp32 shape: [5120] - + aten::pow_3031_other: dtype: fp32 shape: [1] - + '1092': dtype: fp32 shape: [5120] - + '1093': dtype: fp32 shape: [5120] - + '1094': dtype: s8 shape: [13824, 5120] - + aten::linear_3699_bias: dtype: fp32 shape: [13824] - + '1095': dtype: fp32 shape: [5120] - + '1096': dtype: s8 shape: [13824, 5120] - + aten::linear_3700_bias: dtype: fp32 shape: [13824] - + '1097': dtype: fp32 shape: [13824] - + '1098': dtype: s8 shape: [5120, 13824] - + aten::linear_3701_bias: dtype: fp32 shape: [5120] - + aten::pow_3032_other: dtype: fp32 shape: [1] - + '1099': dtype: fp32 shape: [5120] - + '1100': dtype: fp32 shape: [5120] - + '1101': dtype: s8 shape: [5120, 5120] - + aten::linear_3702_bias: dtype: fp32 shape: [5120] - + '1102': dtype: fp32 shape: [5120] - + '1103': dtype: s8 shape: [5120, 5120] - + aten::linear_3703_bias: dtype: fp32 shape: [5120] - + '1104': dtype: fp32 shape: [5120] - + '1105': dtype: s8 shape: [5120, 5120] - + aten::linear_3704_bias: dtype: fp32 shape: [5120] - + aten::neg_5724_mul_val: dtype: fp32 shape: [1] - + aten::neg_5726_mul_val: dtype: fp32 shape: [1] - + '1106': dtype: fp32 shape: [5120] - + '1107': dtype: s8 shape: [5120, 5120] - + aten::linear_3705_bias: dtype: fp32 shape: [5120] - + aten::pow_3044_other: dtype: fp32 shape: [1] - + '1108': dtype: fp32 shape: [5120] - + '1109': dtype: fp32 shape: [5120] - + '1110': dtype: s8 shape: [13824, 5120] - + aten::linear_3706_bias: dtype: fp32 shape: [13824] - + '1111': dtype: fp32 shape: [5120] - + '1112': dtype: s8 shape: [13824, 5120] - + aten::linear_3707_bias: dtype: fp32 shape: [13824] - + '1113': dtype: fp32 shape: [13824] - + '1114': dtype: s8 shape: [5120, 13824] - + aten::linear_3708_bias: dtype: fp32 shape: [5120] - + aten::pow_3045_other: dtype: fp32 shape: [1] - + '1115': dtype: fp32 shape: [5120] - + '1116': dtype: fp32 shape: [5120] - + '1117': dtype: s8 shape: [5120, 5120] - + aten::linear_3709_bias: dtype: fp32 shape: [5120] - + '1118': dtype: fp32 shape: [5120] - + '1119': dtype: s8 shape: [5120, 5120] - + aten::linear_3710_bias: dtype: fp32 shape: [5120] - + '1120': dtype: fp32 shape: [5120] - + '1121': dtype: s8 shape: [5120, 5120] - + aten::linear_3711_bias: dtype: fp32 shape: [5120] - + aten::neg_5756_mul_val: dtype: fp32 shape: [1] - + aten::neg_5758_mul_val: dtype: fp32 shape: [1] - + '1122': dtype: fp32 shape: [5120] - + '1123': dtype: s8 shape: [5120, 5120] - + aten::linear_3712_bias: dtype: fp32 shape: [5120] - + aten::pow_3057_other: dtype: fp32 shape: [1] - + '1124': dtype: fp32 shape: [5120] - + '1125': dtype: fp32 shape: [5120] - + '1126': dtype: s8 shape: [13824, 5120] - + aten::linear_3713_bias: dtype: fp32 shape: [13824] - + '1127': dtype: fp32 shape: [5120] - + '1128': dtype: s8 shape: [13824, 5120] - + aten::linear_3714_bias: dtype: fp32 shape: [13824] - + '1129': dtype: fp32 shape: [13824] - + '1130': dtype: s8 shape: [5120, 13824] - + aten::linear_3715_bias: dtype: fp32 shape: [5120] - + aten::pow_3058_other: dtype: fp32 shape: [1] - + '1131': dtype: fp32 shape: [5120] - + '1132': dtype: fp32 shape: [5120] - + '1133': dtype: s8 shape: [32000, 5120] - + aten::linear_3716_bias: dtype: fp32 shape: [32000] - + aten::size_0: type: Shape input: @@ -32138,7 +32138,7 @@ def test_1(self): llamagraph = p_fusion(llamagraph) newlen = len(llamagraph.nodes) self.assertTrue(oldlen != newlen) - + if __name__ == "__main__": unittest.main() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul.py index d5d5a581126..3faf4261e40 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul.py @@ -48,11 +48,11 @@ def test_1(self): example_in = torch.rand(3, 256) example_in2 = torch.rand(256, 10) traced_model = torch.jit.trace(n, (example_in, example_in2)) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) # torch.onnx.export(n, example_in, '{}.onnx'.format(file_name)) ref_out = traced_model(example_in, example_in2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul_int8.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul_int8.py index 3b978c8f799..13f13d9a50e 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul_int8.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_matmul_int8.py @@ -65,11 +65,11 @@ def test_1(self): convert_model = convert(prepared_model) traced_model = torch.jit.trace(convert_model, (example_in, example_in2)) print(traced_model.inlined_graph) - + torch.jit.freeze(traced_model.eval()) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in, example_in2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_neox_rotary_pos_emb.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_neox_rotary_pos_emb.py index b55e2e769e2..42233b6646f 100755 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_neox_rotary_pos_emb.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_neox_rotary_pos_emb.py @@ -751,4 +751,4 @@ def test_1(self): self.assertTrue(oldlen != newlen) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_padding_seq.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_padding_seq.py index a81230086a8..70c90dca3c1 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_padding_seq.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_padding_seq.py @@ -55,7 +55,7 @@ def test_1(self): traced_model = torch.jit.trace(n, (example_in, example_in2)) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in, example_in2).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_permute.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_permute.py index 2a491999343..76899e2e680 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_permute.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_permute.py @@ -48,11 +48,11 @@ def test_1(self): n = Net() example_in = torch.randn(20, 5, 10) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) torch.onnx.export(n, example_in, '{}.onnx'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_pow.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_pow.py index 03988318ed0..3d87a08cf1b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_pow.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_pow.py @@ -50,7 +50,7 @@ def test_1(self): traced_model = torch.jit.trace(n, example_in) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_size.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_size.py index eb1ba91698d..faf67d1f2f9 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_size.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_size.py @@ -47,10 +47,10 @@ def test_1(self): n = Net() example_in = torch.rand(3, 30) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_slice.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_slice.py index a67941f0645..54e9c00b4f2 100755 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_slice.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_slice.py @@ -47,10 +47,10 @@ def test_1(self): n = Net() example_in = torch.rand(4, 64, 96, 300) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_softmax.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_softmax.py index a059d3ab542..10ef0a7398b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_softmax.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_softmax.py @@ -51,7 +51,7 @@ def test_1(self): traced_model = torch.jit.trace(n, example_in) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_tanh.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_tanh.py index b3cf3a2c8a2..1b2d860166b 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_tanh.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_tanh.py @@ -51,7 +51,7 @@ def test_1(self): traced_model = torch.jit.trace(n, example_in) torch.jit.save(traced_model, '{}.pt'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_transpose.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_transpose.py index 16014b72070..f9f519680f0 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_transpose.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_transpose.py @@ -48,12 +48,12 @@ def test_1(self): n = Net() example_in = torch.rand(3, 4, 5) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) # torch.onnx.export(n, example_in, '{}.onnx'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() print(ref_out.shape) - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() @@ -68,12 +68,12 @@ def test_2(self): n = Net() example_in = torch.rand(3, 4) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) # torch.onnx.export(n, example_in, '{}.onnx'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() print(ref_out.shape) - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_two_linear.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_two_linear.py index 431ddc0c80e..c4d63257b86 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_two_linear.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_two_linear.py @@ -80,7 +80,7 @@ def test_per_tensor(self): prepared_model(example_in) convert_model = convert(prepared_model) traced_model = torch.jit.trace(convert_model, example_in) - + torch.jit.freeze(traced_model.eval()) torch.jit.save(traced_model, '{}.pt'.format(file_name)) @@ -91,7 +91,7 @@ def test_per_tensor(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + self.assertTrue(cmpData(ref_out, [*out.values()][0]) < 0.01) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) diff --git a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_unsqueeze.py b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_unsqueeze.py index b957431c251..ae087a164ea 100644 --- a/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_unsqueeze.py +++ b/intel_extension_for_transformers/llm/runtime/deprecated/test/pytest/torch_test/test_unsqueeze.py @@ -47,11 +47,11 @@ def test_1(self): n = Net() example_in = torch.randn(1, 77, 77) traced_model = torch.jit.trace(n, example_in) - + torch.jit.save(traced_model, '{}.pt'.format(file_name)) # torch.onnx.export(n, example_in, '{}.onnx'.format(file_name)) ref_out = traced_model(example_in).detach().numpy() - + graph = compile('{}.pt'.format(file_name)) graph.save(file_name) newgraph = Graph() diff --git a/intel_extension_for_transformers/llm/runtime/graph/requirements.txt b/intel_extension_for_transformers/llm/runtime/graph/requirements.txt index 1601a33c0d7..92b9b33437b 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/requirements.txt +++ b/intel_extension_for_transformers/llm/runtime/graph/requirements.txt @@ -1,17 +1,17 @@ --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.1.0+cpu -transformers -numpy -sentencepiece -protobuf<3.20 -einops accelerate -peft -datasets -transformers_stream_generator -tiktoken -py-cpuinfo +auto-gptq cmake +datasets +einops gguf -auto-gptq -neural-speed \ No newline at end of file +neural-speed +numpy +peft +protobuf<3.20 +py-cpuinfo +sentencepiece +tiktoken +torch==2.1.0+cpu +transformers +transformers_stream_generator diff --git a/intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py b/intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py index ea479c281ca..4b9407c1767 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py +++ b/intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py @@ -49,7 +49,7 @@ def test_llm_runtime(self): tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) inputs = tokenizer(prompt, return_tensors="pt") - + pt_logits = torch.load("/tf_dataset2/inc-ut/nlptoolkit_ut_model/llama2_pt_logits.pth")[:,-1] pt_generate_ids = torch.load("/tf_dataset2/inc-ut/nlptoolkit_ut_model/llama2_pt_generate_ids.pth")[0].tolist() print(tokenizer.decode(pt_generate_ids)) @@ -70,7 +70,7 @@ def test_llm_runtime(self): # "jblas_int8": WeightOnlyQuantConfig(compute_dtype="bf16", weight_dtype="int8"), } for config_type in woq_configs: - itrex_model = AutoModel.from_pretrained(model_name, quantization_config=woq_configs[config_type], + itrex_model = AutoModel.from_pretrained(model_name, quantization_config=woq_configs[config_type], use_llm_runtime=True, trust_remote_code=True) itrex_logits = itrex_model(inputs.input_ids) print(config_type, cmpData(pt_logits.detach().numpy().flatten(), itrex_logits.flatten())) diff --git a/intel_extension_for_transformers/llm/utils/__init__.py b/intel_extension_for_transformers/llm/utils/__init__.py index 407b29e53be..18896e7b549 100644 --- a/intel_extension_for_transformers/llm/utils/__init__.py +++ b/intel_extension_for_transformers/llm/utils/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/intel_extension_for_transformers/llm/utils/mpt_trace.py b/intel_extension_for_transformers/llm/utils/mpt_trace.py index f316b9e7ab4..5634025a276 100644 --- a/intel_extension_for_transformers/llm/utils/mpt_trace.py +++ b/intel_extension_for_transformers/llm/utils/mpt_trace.py @@ -85,4 +85,4 @@ def forward( logits=outputs["logits"], past_key_values=outputs["past_key_values"] if self.use_cache else None ) - return outputs \ No newline at end of file + return outputs diff --git a/intel_extension_for_transformers/neural_chat/__init__.py b/intel_extension_for_transformers/neural_chat/__init__.py index 3a3d819f5c4..263aa977940 100644 --- a/intel_extension_for_transformers/neural_chat/__init__.py +++ b/intel_extension_for_transformers/neural_chat/__init__.py @@ -29,4 +29,3 @@ from .server.neuralchat_server import NeuralChatServerExecutor from .server.neuralchat_client import TextChatClientExecutor, VoiceChatClientExecutor, FinetuningClientExecutor from .plugins import plugins - diff --git a/intel_extension_for_transformers/neural_chat/cli/README.md b/intel_extension_for_transformers/neural_chat/cli/README.md index 6a809d2c2fb..cec1cd7a3a7 100644 --- a/intel_extension_for_transformers/neural_chat/cli/README.md +++ b/intel_extension_for_transformers/neural_chat/cli/README.md @@ -33,4 +33,3 @@ neuralchat help voicechat = TextVoiceChatExecutor() voicechat(query="../../assets/audio/say_hello.wav", output_audio_path="response.wav") ``` - diff --git a/intel_extension_for_transformers/neural_chat/cli/base_executor.py b/intel_extension_for_transformers/neural_chat/cli/base_executor.py index c841ae06233..21bd560cc94 100644 --- a/intel_extension_for_transformers/neural_chat/cli/base_executor.py +++ b/intel_extension_for_transformers/neural_chat/cli/base_executor.py @@ -48,4 +48,4 @@ def __call__(self, *arg, **kwargs): """ Python API to call an executor. """ - pass \ No newline at end of file + pass diff --git a/intel_extension_for_transformers/neural_chat/config.py b/intel_extension_for_transformers/neural_chat/config.py index 2e00efcbb74..f2c41a551f0 100644 --- a/intel_extension_for_transformers/neural_chat/config.py +++ b/intel_extension_for_transformers/neural_chat/config.py @@ -366,7 +366,7 @@ class TTSModelArguments: step: int = field(default=0, metadata={"help": "TTS model step."}) warmup_step: int = field(default=0, metadata={"help": "TTS model warmup step."}) learning_rate: float = field(default=1e-5, metadata={"help": "Learning rate."}) - + @dataclass class BaseFinetuningConfig: model_args: ModelArguments diff --git a/intel_extension_for_transformers/neural_chat/config_logging.py b/intel_extension_for_transformers/neural_chat/config_logging.py index 6763a9c9ee3..fc37ee36d2c 100644 --- a/intel_extension_for_transformers/neural_chat/config_logging.py +++ b/intel_extension_for_transformers/neural_chat/config_logging.py @@ -50,4 +50,3 @@ def configure_logging(log_file="app.log", log_level=logging.INFO): logger.addHandler(console_handler) return logger - diff --git a/intel_extension_for_transformers/neural_chat/docker/finetuning/README.md b/intel_extension_for_transformers/neural_chat/docker/finetuning/README.md index 6fb46da43ea..711d788fa9d 100644 --- a/intel_extension_for_transformers/neural_chat/docker/finetuning/README.md +++ b/intel_extension_for_transformers/neural_chat/docker/finetuning/README.md @@ -326,4 +326,3 @@ Where the `--dataset_concatenation` argument is a way to vastly accelerate the f For finetuning on SPR, add `--bf16` argument will speedup the finetuning process without the loss of model's performance. You could also indicate `--peft` to switch peft method in P-tuning, Prefix tuning, Prompt tuning, LLama Adapter, LoRA, see https://github.com/huggingface/peft. Note for MPT, only LoRA is supported. - diff --git a/intel_extension_for_transformers/neural_chat/docker/tgi_serving/README.md b/intel_extension_for_transformers/neural_chat/docker/tgi_serving/README.md index 331534d2eac..98e8974519d 100644 --- a/intel_extension_for_transformers/neural_chat/docker/tgi_serving/README.md +++ b/intel_extension_for_transformers/neural_chat/docker/tgi_serving/README.md @@ -34,5 +34,3 @@ curl ${your_ip}:${your_port}/v1/tgi/generate \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` - - diff --git a/intel_extension_for_transformers/neural_chat/docs/advanced_features.md b/intel_extension_for_transformers/neural_chat/docs/advanced_features.md index b2e11d8c2f1..ec5eb8751ab 100644 --- a/intel_extension_for_transformers/neural_chat/docs/advanced_features.md +++ b/intel_extension_for_transformers/neural_chat/docs/advanced_features.md @@ -196,4 +196,3 @@ For detailed fine-tuning instructions, please refer to the documentation below. [Text-To-Speech (TTS) model finetuning](./examples/finetuning/tts/README.md) And NeuralChat also provides Docker file tailored for easy fine-tuning. Explore details in [finetuning with Docker](./docker/finetuning/README.md). - diff --git a/intel_extension_for_transformers/neural_chat/docs/neuralchat_api.md b/intel_extension_for_transformers/neural_chat/docs/neuralchat_api.md index 57c738892b9..5bcd76cb357 100644 --- a/intel_extension_for_transformers/neural_chat/docs/neuralchat_api.md +++ b/intel_extension_for_transformers/neural_chat/docs/neuralchat_api.md @@ -108,4 +108,3 @@ curl http://localhost:80/v1/completions \ "temperature": 0.5 }' ``` - diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/README.md b/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/README.md index 5488ea43938..93ddd5954a8 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/README.md @@ -4,5 +4,3 @@ Welcome to Photo AI! This example introduces how to deploy the Text Chatbot syst | ---------------------| --------------------------| | Backend Setup | [Backend README](./backend/README.md) | | Frontend Setup | [Frontend README](../../../ui/customized/talking_photo/README.md) | - - diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/backend/photoai.py b/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/backend/photoai.py index b4e2c17db43..6c277a505d9 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/backend/photoai.py +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/photo_ai/backend/photoai.py @@ -26,4 +26,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/README.md b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/README.md index 5170ef58706..4ffee71f7f2 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/README.md @@ -75,4 +75,4 @@ nohup bash run.sh & To call the started audio service, the APIs are listed as follows: 1. http://127.0.0.1:7777/plugin/audio/asr , upload an audio file and return the text contents. 2. http://127.0.0.1:7777/plugin/audio/tts , input text string and return the binary content of the audio. -3. http://127.0.0.1:7777/plugin/audio/create_embedding, upload an audio file and create an embedding of your voice. \ No newline at end of file +3. http://127.0.0.1:7777/plugin/audio/create_embedding, upload an audio file and create an embedding of your voice. diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/audio_service.py b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/audio_service.py index 0b538bc1777..fc1bd3a09f3 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/audio_service.py +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/audio/audio_service.py @@ -26,4 +26,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/image2image_service.py b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/image2image_service.py index 8c27e9686b5..dbd0a508faf 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/image2image_service.py +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/image2image_service.py @@ -26,4 +26,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/prepare_model.py b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/prepare_model.py index 5977c186e01..9cdf50b6670 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/prepare_model.py +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/prepare_model.py @@ -254,7 +254,7 @@ def wrapper(*args, **kwargs): return output return wrapper unet.forward = model_wrapper(unet.forward) - + torch.onnx.export( unet, args=( diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/quantization_modules.py b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/quantization_modules.py index 0470456db31..94f0c0f8043 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/quantization_modules.py +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/quantization_modules.py @@ -160,4 +160,4 @@ def load_int8_model(fp32_model, int8_model_path, fake_quantize_model=False): print('Converted to quantized model.') if not fake_quantize_model: int8_model.load_state_dict(torch.load(int8_model_path)) - return int8_model \ No newline at end of file + return int8_model diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/requirements.txt b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/requirements.txt index 71e156b6c16..580d9b63f8d 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/plugin/image2image/requirements.txt @@ -1,12 +1,12 @@ -neural-compressor -transformers accelerate datasets >= 1.8.0 -sentencepiece != 0.1.92 -protobuf -torch==2.1.0 +diffusers==0.12.1 +neural-compressor onnx>=1.12 onnxruntime==1.13.1 -diffusers==0.12.1 -pytorch_fid optimum +protobuf +pytorch_fid +sentencepiece != 0.1.92 +torch==2.1.0 +transformers diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.py b/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.py index 294e83b7a21..0f8079d9948 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.py +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.py @@ -26,4 +26,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.yaml b/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.yaml index 00f3855bee6..c6b4f04a687 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.yaml +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/rag/askdoc.yaml @@ -38,4 +38,3 @@ safety_checker: enable: true tasks_list: ['textchat', 'retrieval'] - diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/README.md b/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/README.md index 1bac6791d2b..8c91d373cd0 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/README.md @@ -4,4 +4,3 @@ Welcome to Talkingbot! This example introduces how to deploy the Talkingbot syst | ---------------------| --------------------------| | Backend Setup | [Backend README](./backend/README.md) | | Frontend Setup | [Frontend README](../../../../ui/customized/talkingbot/README.md) | - diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.py b/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.py index 5985deb0d0a..1922c744cc9 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.py +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/talkingbot/server/backend/talkingbot.py @@ -22,4 +22,4 @@ def main(): server_executor(config_file="./talkingbot.yaml", log_file="./talkingbot.log") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/README.md b/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/README.md index d10d313e1e6..722da719533 100644 --- a/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/deployment/textbot/README.md @@ -6,4 +6,3 @@ Welcome to the Text Chatbot! This example introduces how to deploy the Text Chat | Frontend Setup | [Frontend README](../../../ui/gradio/basic/README.md) | You can enhance the capabilities of the Text Chatbot by enabling plugins, such as the cache plugin. This plugin is designed to help you reduce costs effectively by caching query results, resulting in fewer requests and tokens sent to the Language Model service. As a result, it offers superior query throughput compared to standard Language Model services. To deploy a Text Chatbot with caching functionality, please refer to the instructions provided in the [README](./backend_with_cache/README.md) for backend setup. - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/README.md b/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/README.md index 7fa401327bd..0cd6232a0ae 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/README.md @@ -77,5 +77,3 @@ We verify DPO training on our finetuned `mpt-7b` model [Intel/neural-chat-7b-v1- | [mosaicml/mpt-7b-chat](https://huggingface.co/mosaicml/mpt-7b-chat) | 49.95 | 46.5 | 75.55 | 37.60 | 40.17 | ours | | [Intel/neural-chat-7b-v1-1](https://huggingface.co/Intel/neural-chat-7b-v1-1) | **51.41** | 50.09 | 76.69 | 38.79 | 40.07 | ours | | **[Intel/neural-chat-7b-v1-1](https://huggingface.co/Intel/neural-chat-7b-v1-1) with DPO** | **52.39** | 51.54 | 76.45 | 39.47| 42.10 | ours | - - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/dpo_clm.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/dpo_clm.py index 76ca1bbad9b..54315a73c5f 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/dpo_clm.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/dpo_clm.py @@ -269,7 +269,7 @@ def return_prompt_and_responses(samples) -> Dict[str, str]: # model config config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs) torch_dtype = ( - model_args.torch_dtype if model_args.torch_dtype in ["auto", None] + model_args.torch_dtype if model_args.torch_dtype in ["auto", None] else getattr(torch, model_args.torch_dtype) ) diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/requirements.txt b/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/requirements.txt index 3b5e3f073a0..573855a44e1 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/dpo_pipeline/requirements.txt @@ -1,10 +1,10 @@ datasets -torch -transformers -sentencepiece -peft +einops evaluate +fire nltk +peft rouge_score -einops -fire +sentencepiece +torch +transformers diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/README.md b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/README.md index 1d5f6102048..1bec6ac3950 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/README.md @@ -635,4 +635,3 @@ see https://github.com/huggingface/peft. Note for MPT, only LoRA is supported. - **rouge related metrics:** the metrics will be calculated when the finetuning task is summarization `--task summarization` - **human eval (code generation metric):** the metric will be calculated when the finetuning task is code-generation `--task code-generation` - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_seq2seq.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_seq2seq.py index f96e79e88a2..7af300a8c2e 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_seq2seq.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_seq2seq.py @@ -57,4 +57,4 @@ def main(): finetune_model(finetune_cfg) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/gaudi_spawn.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/gaudi_spawn.py index 6978f69fa73..9fd57f89886 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/gaudi_spawn.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/gaudi_spawn.py @@ -110,4 +110,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/requirements.txt b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/requirements.txt index 38a77749c4c..4701e52a719 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/requirements.txt @@ -1,15 +1,15 @@ datasets -torch==2.1.0 -transformers -sentencepiece -peft +einops evaluate +fastapi nltk +peft +pydub +python-multipart rouge_score -einops +sentencepiece +shortuuid +torch==2.1.0 +transformers uvicorn yacs -fastapi -shortuuid -pydub -python-multipart diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/conversation_utils.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/conversation_utils.py index d8b4704bdef..3ec5da5e046 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/conversation_utils.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/conversation_utils.py @@ -262,4 +262,3 @@ def dict(self): "v1": conv_llava_v1, "plain": conv_llava_plain, } - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/gaudi_spawn.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/gaudi_spawn.py index 6978f69fa73..9fd57f89886 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/gaudi_spawn.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/gaudi_spawn.py @@ -110,4 +110,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_utils.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_utils.py index 745cde58cff..d38fb918c28 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_utils.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_utils.py @@ -686,4 +686,3 @@ def make_supervised_data_module(tokenizer: transformers.PreTrainedTokenizer, return dict(train_dataset=train_dataset, eval_dataset=None, data_collator=data_collator) - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/ppo_pipeline/requirements.txt b/intel_extension_for_transformers/neural_chat/examples/finetuning/ppo_pipeline/requirements.txt index 5365f82d20b..5fae17c3f2c 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/ppo_pipeline/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/ppo_pipeline/requirements.txt @@ -1,9 +1,9 @@ -transformers -peft accelerate -datasets bitsandbytes +datasets evaluate -scikit-learn intel-extension-for-transformers +peft +scikit-learn +transformers tyro diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/finetune.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/finetune.py index c073c0f7ae8..53d3ed199da 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/finetune.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/finetune.py @@ -30,5 +30,3 @@ finetuned_model = tts_fintuner.finetune() torch.save(finetuned_model, "finetuned_model.pt") - - diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/inference.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/inference.py index dcb556c8981..223a8fddc50 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/inference.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/tts/inference.py @@ -138,4 +138,3 @@ def correct_number(text): except Exception as e: logging.info("Catch exception: %s", e) logging.info("Restarting\n") - \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/examples/plugins/retrieval/README.md b/intel_extension_for_transformers/neural_chat/examples/plugins/retrieval/README.md index 8e07e70ee0a..3057b8eee27 100644 --- a/intel_extension_for_transformers/neural_chat/examples/plugins/retrieval/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/plugins/retrieval/README.md @@ -37,5 +37,3 @@ You can utilize the following script to execute the code on Intel® Xeon SPR pro conda install jemalloc gperftools -c conda-forge -y bash run_retrieval_on_cpu.sh ``` - - diff --git a/intel_extension_for_transformers/neural_chat/examples/plugins/video/README.md b/intel_extension_for_transformers/neural_chat/examples/plugins/video/README.md index 4632c939b96..82e7abcec3d 100644 --- a/intel_extension_for_transformers/neural_chat/examples/plugins/video/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/plugins/video/README.md @@ -31,4 +31,4 @@ neuralchat_server start --config_file face_animation.yaml ## Acknowledgements -This plugin is mostly adapted from [SadTalker](https://github.com/OpenTalker/SadTalker). We thank the related authors for their great work! \ No newline at end of file +This plugin is mostly adapted from [SadTalker](https://github.com/OpenTalker/SadTalker). We thank the related authors for their great work! diff --git a/intel_extension_for_transformers/neural_chat/examples/plugins/video/main.py b/intel_extension_for_transformers/neural_chat/examples/plugins/video/main.py index 3316170973b..cebc92c62f8 100644 --- a/intel_extension_for_transformers/neural_chat/examples/plugins/video/main.py +++ b/intel_extension_for_transformers/neural_chat/examples/plugins/video/main.py @@ -36,4 +36,3 @@ sadtalker = SadTalker(device=device, checkpoint_dir=checkpoint_dir, bf16=True, p_num=4, enhancer=None, output_video_path=output_video_path) sadtalker.convert(source_image=source_image, driven_audio=driven_audio) - diff --git a/intel_extension_for_transformers/neural_chat/examples/serving/TGI/README.md b/intel_extension_for_transformers/neural_chat/examples/serving/TGI/README.md index 74b4ecd80ac..03c325a6652 100644 --- a/intel_extension_for_transformers/neural_chat/examples/serving/TGI/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/serving/TGI/README.md @@ -89,4 +89,4 @@ curl ${your_ip}:${your_port}/v1/tgi/generate \ -H 'Content-Type: application/json' ``` -Of course, you can also consume the service via `postman`, `http request`, or other ways. \ No newline at end of file +Of course, you can also consume the service via `postman`, `http request`, or other ways. diff --git a/intel_extension_for_transformers/neural_chat/examples/serving/vllm/README.md b/intel_extension_for_transformers/neural_chat/examples/serving/vllm/README.md index 729b873c5b8..78d99a45319 100644 --- a/intel_extension_for_transformers/neural_chat/examples/serving/vllm/README.md +++ b/intel_extension_for_transformers/neural_chat/examples/serving/vllm/README.md @@ -21,4 +21,4 @@ neuralchat_server start --config_file textbot_vllm.yaml ``` curl -X POST -H "Content-Type: application/json" -d '{"prompt": "Tell me about Intel Xeon processors."}' http://localhost:8000/v1/chat/completions -``` \ No newline at end of file +``` diff --git a/intel_extension_for_transformers/neural_chat/examples/serving/vllm/textbot_vllm.yaml b/intel_extension_for_transformers/neural_chat/examples/serving/vllm/textbot_vllm.yaml index 3e78f742930..91de0d5dbf7 100644 --- a/intel_extension_for_transformers/neural_chat/examples/serving/vllm/textbot_vllm.yaml +++ b/intel_extension_for_transformers/neural_chat/examples/serving/vllm/textbot_vllm.yaml @@ -41,4 +41,4 @@ serving: max_context_len_to_capture: 8192 # task choices = ['textchat', 'voicechat', 'retrieval', 'text2image', 'finetune'] -tasks_list: ['textchat'] \ No newline at end of file +tasks_list: ['textchat'] diff --git a/intel_extension_for_transformers/neural_chat/models/__init__.py b/intel_extension_for_transformers/neural_chat/models/__init__.py index 407b29e53be..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/models/__init__.py +++ b/intel_extension_for_transformers/neural_chat/models/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/intel_extension_for_transformers/neural_chat/models/model_utils.py b/intel_extension_for_transformers/neural_chat/models/model_utils.py index c8db4e738c4..43ca611faac 100644 --- a/intel_extension_for_transformers/neural_chat/models/model_utils.py +++ b/intel_extension_for_transformers/neural_chat/models/model_utils.py @@ -961,7 +961,7 @@ def predict_stream(**params): `num_return_sequences` (int): Specifies the number of alternative sequences to generate. `bad_words_ids` (list or None): Contains a list of token IDs that should not appear in the generated text. `force_words_ids` (list or None): Contains a list of token IDs that must be included in the generated text. - `use_hpu_graphs` (bool): + `use_hpu_graphs` (bool): Determines whether to utilize Habana Processing Units (HPUs) for accelerated generation. `use_cache` (bool): Determines whether to utilize kv cache for accelerated generation. `ipex_int8` (bool): Whether to use IPEX int8 model to inference. @@ -1036,7 +1036,7 @@ def predict_stream(**params): return generate_kwargs = get_generate_kwargs( - max_new_tokens, input_token_len, + max_new_tokens, input_token_len, get_stop_token_ids(model, tokenizer), assistant_model=assistant_model ) @@ -1256,7 +1256,7 @@ def predict(**params): `num_return_sequences` (int): Specifies the number of alternative sequences to generate. `bad_words_ids` (list or None): Contains a list of token IDs that should not appear in the generated text. `force_words_ids` (list or None): Contains a list of token IDs that must be included in the generated text. - `use_hpu_graphs` (bool): + `use_hpu_graphs` (bool): Determines whether to utilize Habana Processing Units (HPUs) for accelerated generation. `use_cache` (bool): Determines whether to utilize kv cache for accelerated generation. `ipex_int8` (bool): Whether to use IPEX int8 model to inference. @@ -1321,8 +1321,8 @@ def predict(**params): input_tokens, input_token_len = tokenization(prompt, tokenizer, device) generate_kwargs = get_generate_kwargs( - max_new_tokens, input_token_len, - get_stop_token_ids(model, tokenizer), + max_new_tokens, input_token_len, + get_stop_token_ids(model, tokenizer), assistant_model=assistant_model ) diff --git a/intel_extension_for_transformers/neural_chat/models/solar_model.py b/intel_extension_for_transformers/neural_chat/models/solar_model.py index d1f29ae1028..42d54d698f7 100644 --- a/intel_extension_for_transformers/neural_chat/models/solar_model.py +++ b/intel_extension_for_transformers/neural_chat/models/solar_model.py @@ -66,4 +66,3 @@ def get_default_conv_template(self, model_path: str) -> Conversation: return get_conv_template("solar") register_model_adapter(SolarModel) - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/__init__.py index 74a982ca49c..7df490129b8 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/__init__.py @@ -16,4 +16,3 @@ # limitations under the License. """Intel® Neural Chat: An open-source Python library supporting LLM chatbot.""" - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/finetuning/tts_finetuning.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/finetuning/tts_finetuning.py index ebcdbfd83b1..7c108518c66 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/finetuning/tts_finetuning.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/finetuning/tts_finetuning.py @@ -72,7 +72,7 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> batch["speaker_embeddings"] = torch.tensor(speaker_features) return batch - + class TTSFinetuning: def __init__(self, finetuning_config: TTSFinetuningConfig): self.dataset_args, self.model_args = ( @@ -108,7 +108,7 @@ def _construct_audio_list(self): audio_names = os.listdir(self.audio_folder_path) audio_paths = [os.path.join(self.audio_folder_path, i) for i in audio_names] return audio_paths - + def _construct_finetuning_dataset(self): raw_texts, normalized_texts = self._construct_text_list() audio_paths = self._construct_audio_list() @@ -203,4 +203,4 @@ def finetune(self): ) trainer.train() - return self.model \ No newline at end of file + return self.model diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/requirements.txt b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/requirements.txt index 1cfd62866c2..b7de6e9bdd6 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/requirements.txt @@ -1,11 +1,11 @@ +datasets +librosa +num2words paddlepaddle paddlespeech -transformers -soundfile -datasets pydub python-multipart +soundfile speechbrain -librosa +transformers zhconv -num2words \ No newline at end of file diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/cut_video.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/cut_video.py index 79a9d4c152a..b659d09052e 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/cut_video.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/cut_video.py @@ -54,7 +54,7 @@ def cut_video(args, outdir): end_min = 0 end_sec = 0 mark = 0 - + for i in range(0, min + 1, delta_X): logging.info("i: %s", i) if min >= delta_X: @@ -72,7 +72,7 @@ def cut_video(args, outdir): end_min = t_min if i == t_min: - end_sec = t_second + end_sec = t_second start_hour = '0' + str(start_hour) if len(str(start_hour)) == 1 else str(start_hour) start_min = '0' + str(start_min) if len(str(start_min)) == 1 else str(start_min) @@ -96,7 +96,7 @@ def cut_video(args, outdir): start_hour = int(end_hour) start_min = int(end_min) start_sec = int(end_sec) - + if __name__ == '__main__': parser = argparse.ArgumentParser(__doc__) diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/english_normalizer.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/english_normalizer.py index 9b4e390b4ed..772425142df 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/english_normalizer.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/english_normalizer.py @@ -54,7 +54,7 @@ def __init__(self): "Z": "zed", ".": "point", } - + def correct_abbreviation(self, text): # TODO mixed abbreviation or proper noun like i7, ffmpeg, BTW should be supported @@ -120,4 +120,4 @@ def correct_number(self, text): # if the text is not truncated correctly by early stop token, then manually add one. if len(results) > 0 and results[-1] not in [",", ".", "?", "!"]: results += "." - return results \ No newline at end of file + return results diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/reduce_noise.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/reduce_noise.py index 9f3c1c63c08..fd5aeac2b07 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/reduce_noise.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/reduce_noise.py @@ -370,4 +370,3 @@ def reduce_audio_amplify(self, output_audio_path, y): rn_am_sound = rn_sound.apply_gain(original_db - rn_db) rn_am_sound.export(f"{output_audio_path}_rn_ap.wav") return f"{output_audio_path}_rn_ap.wav" - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/split_audio.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/split_audio.py index be46fc15dfc..4f46ce7f7d5 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/split_audio.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/split_audio.py @@ -164,7 +164,7 @@ def vad_collector(sample_rate, frame_duration_ms, def main(args): is_dir = os.path.isdir(args.in_path) - if is_dir: + if is_dir: input_dir = args.in_path path_list=os.listdir(args.in_path) else: # input is a wav file @@ -202,4 +202,4 @@ def main(args): if not is_exist: logging.info("path not existed!") else: - main(args) \ No newline at end of file + main(args) diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/video_to_wav.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/video_to_wav.py index db91a5406d7..782ecfd223f 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/video_to_wav.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/utils/video_to_wav.py @@ -66,7 +66,7 @@ def convert_video_to_wav(path, output_sample_rate, is_mono=True): else: # pragma: no cover logging.info("file %s format not supported!", filename) continue - + if __name__ == '__main__': # pragma: no cover parser = argparse.ArgumentParser(__doc__) diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/cache_config.yaml b/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/cache_config.yaml index c04a71fd905..f2995339340 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/cache_config.yaml +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/cache_config.yaml @@ -33,7 +33,7 @@ storage_config: sqlite,faiss vector_params: # Set vector storage related params here -evaluation: +evaluation: distance evaluation_config: max_distance: 1.0 diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/requirements.txt b/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/requirements.txt index f92d2459735..f52106d5367 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/requirements.txt @@ -1,2 +1,2 @@ -gptcache git+https://github.com/UKPLab/sentence-transformers.git@5c838a705c24c2dfd151a71674c99d09d014c1a9 +gptcache diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/diffusion_utils.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/diffusion_utils.py index b20b83038d3..811b27ce1bf 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/diffusion_utils.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/diffusion_utils.py @@ -37,4 +37,3 @@ def neural_engine_init(ir_path): vae_decoder_graph.graph_init(vae_decoder_conf, vae_decoder_bin) return [text_encoder_graph, unet_graph, vae_decoder_graph] - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/instructpix2pix_pipeline.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/instructpix2pix_pipeline.py index 760c0ad3065..73e0dd95e09 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/instructpix2pix_pipeline.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/image2image/instructpix2pix_pipeline.py @@ -341,7 +341,7 @@ def __call__( # Original Pytorch Diffuser Unet Code: predict the noise residual #noise_pred = self.unet(scaled_latent_model_input, t, encoder_hidden_states=prompt_embeds).sample - + # The ITREX Unet Code scaled_latent_model_input = scaled_latent_model_input.contiguous() prompt_embeds = prompt_embeds.contiguous() @@ -744,4 +744,3 @@ def bf16_to_fp32(bf16_np): int32_np = int32_np << 16 fp32_np = int32_np.view(np.float32) return fp32_np - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/memory/memory.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/memory/memory.py index f19147517bf..61b8388a937 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/memory/memory.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/memory/memory.py @@ -20,7 +20,7 @@ class Memory: def __init__(self): self.chat_memory = [] - + def clear(self): self.chat_memory.clear() diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/README.md b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/README.md index ba961cf5de0..2a5dbe5f8ff 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/README.md +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/README.md @@ -98,4 +98,4 @@ threads [int]: The thread number of model inference. Set to the core number of y max_new_tokens [int]: The max generated token numbers. Default to 32. seed [int]: The random seed of llm. Default to 1234. -``` \ No newline at end of file +``` diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/ner.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/ner.py index ad599796375..83f1f944eed 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/ner.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/ner.py @@ -55,4 +55,3 @@ def ner_inference(self, response): logging.info("[NER info] Inference time consumption: %s", time.time() - start_time) return result - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/requirements.txt b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/requirements.txt index b642f9a3f58..04e761b52bd 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/requirements.txt @@ -1,10 +1,10 @@ +deepface +exifread +intel_extension_for_pytorch +neural-compressor +pymysql +spacy torch -torchvision torchaudio -spacy -neural-compressor +torchvision transformers -intel_extension_for_pytorch -pymysql -deepface -exifread diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/utils/process_text.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/utils/process_text.py index 2ed551c4864..b9f82a1fa5f 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/utils/process_text.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/ner/utils/process_text.py @@ -57,7 +57,7 @@ def process_time(result_text: str, doc) -> dict: if len(mentioned_time["period"]) % 2 != 0: mentioned_time["time"] = list(set(mentioned_time["time"]+mentioned_time["period"])) mentioned_time["period"] = [] - + return mentioned_time @@ -94,11 +94,11 @@ def process_entities(query, doc, mentioned_time: dict) -> dict: result_period = post_process_last_week() result = { - "period": result_period, - "time": mentioned_time['time'], - 'location': location, - "name": name, + "period": result_period, + "time": mentioned_time['time'], + 'location': location, + "name": name, "organization": organization } - - return result \ No newline at end of file + + return result diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/prompt/prompt_template.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/prompt/prompt_template.py index 14a8f24adc5..ec40cb093da 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/prompt/prompt_template.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/prompt/prompt_template.py @@ -69,4 +69,4 @@ def generate_intent_prompt(query): conv = PromptTemplate("intent") conv.append_message(conv.roles[0], query) conv.append_message(conv.roles[1], None) - return conv.get_prompt() \ No newline at end of file + return conv.get_prompt() diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/README.md b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/README.md index bde22c5916a..bcf0291fb31 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/README.md +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/README.md @@ -236,4 +236,4 @@ from intel_extension_for_transformers.langchain.retrievers import ChildParentRet from langchain.vectorstores import Chroma retriever = ChildParentRetriever(vectorstore=Chroma(documents=child_documents), parentstore=Chroma(documents=parent_documents), search_type=xxx, search_kwargs={...}) docs=retriever.get_relevant_documents("Intel") -``` \ No newline at end of file +``` diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/detector/intent_detection.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/detector/intent_detection.py index 8aa265e0476..895011d3e86 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/detector/intent_detection.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/detector/intent_detection.py @@ -36,4 +36,3 @@ def intent_detection(self, model_name, query): params["max_new_tokens"] = 10 intent = predict(**params) return intent - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/html_parser.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/html_parser.py index 6c6fa42e18e..3b6aa70f382 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/html_parser.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/html_parser.py @@ -193,4 +193,4 @@ def load_html_data(url): # {'text': all_text, 'main_content': main_content} - return main_content \ No newline at end of file + return main_content diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/parser.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/parser.py index 1d879466ae6..3337470f430 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/parser.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/parser/parser.py @@ -37,8 +37,8 @@ def __init__(self, max_chuck_size=512, min_chuck_size=5, process=True): self.max_chuck_size = max_chuck_size self.min_chuck_size = min_chuck_size self.process = process - - + + def load(self, input, **kwargs): """ The API for loading the file. Support single file, batch files, and urls parsing. @@ -49,7 +49,7 @@ def load(self, input, **kwargs): self.min_chuck_size = kwargs['min_chuck_size'] if 'process' in kwargs: self.process = kwargs['process'] - + if isinstance(input, str): if os.path.isfile(input): data_collection = self.parse_document(input) @@ -64,10 +64,10 @@ def load(self, input, **kwargs): logging.error("The given link/str is unavailable. Please try another one!") else: logging.error("The input format is invalid!") - + return data_collection - - + + def parse_document(self, input): """ Parse the uploaded file. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py index 4aaa8c75490..87c116867e7 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retrieval_agent.py @@ -90,12 +90,12 @@ def __init__(self, "accuracy", "general", ) - + assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format( \ self.retrieval_type) assert self.mode in allowed_generation_mode, "generation mode of {} not allowed.".format( \ self.mode) - + if isinstance(input_path, str): if os.path.exists(input_path): self.input_path = input_path @@ -129,12 +129,12 @@ def __init__(self, except Exception as e: logging.error("Please select a proper embedding model.") logging.error(e) - + self.document_parser = DocumentParser(max_chuck_size=max_chuck_size, min_chuck_size = min_chuck_size, \ process=self.process) data_collection = self.document_parser.load(input=self.input_path, **kwargs) logging.info("The parsing for the uploaded files is finished.") - + langchain_documents = document_transfer(data_collection) logging.info("The format of parsed documents is transferred.") @@ -203,7 +203,7 @@ def create(self, input_path, **kwargs): """ data_collection = self.document_parser.load(input=input_path, **kwargs) langchain_documents = document_transfer(data_collection) - + if self.retrieval_type == 'default': knowledge_base = self.database.from_documents(documents=langchain_documents, \ embedding=self.embeddings, **kwargs) @@ -219,14 +219,14 @@ def create(self, input_path, **kwargs): self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \ child_document_store=child_knowledge_base, **kwargs).retriever logging.info("The retriever is successfully built.") - + def append_localdb(self, append_path, **kwargs): "Append the knowledge instances into a given knowledge base." data_collection = self.document_parser.load(input=append_path, **kwargs) langchain_documents = document_transfer(data_collection) - + if self.retrieval_type == 'default': knowledge_base = self.database.from_documents(documents=langchain_documents, \ embedding=self.embeddings, **kwargs) @@ -242,7 +242,7 @@ def append_localdb(self, append_path, **kwargs): self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \ child_document_store=child_knowledge_base, **kwargs).retriever logging.info("The retriever is successfully built.") - + def pre_llm_inference_actions(self, model_name, query): diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retriever_adapter.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retriever_adapter.py index 5aec1b1ffee..27582b9627a 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retriever_adapter.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/retriever_adapter.py @@ -30,7 +30,7 @@ class RetrieverAdapter(): def __init__(self, retrieval_type='default', document_store=None, child_document_store= None, **kwargs): self.retrieval_type = retrieval_type - + if self.retrieval_type == "default": self.retriever = VectorStoreRetriever(vectorstore = document_store, **kwargs) elif self.retrieval_type == "child_parent": diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/dict.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/dict.py index 7c0e9851087..b205c0b0168 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/dict.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/dict.py @@ -1938,4 +1938,4 @@ ['政治', '司法'], ['政治', '主权'], ['政治', '北京大桥'] -] \ No newline at end of file +] diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/stopword.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/stopword.py index dcfc0d51ce1..9aeb3533bc1 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/stopword.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/security/stopword.py @@ -63,4 +63,4 @@ '……', '《', '》', -] \ No newline at end of file +] diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/README.md b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/README.md index 33e710c6afe..ce4b2d72d0f 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/README.md +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/README.md @@ -27,4 +27,4 @@ sadtalker.convert(source_image=source_image, driven_audio=driven_audio, output_v ## Acknowledgements -This plugin is mostly adapted from [SadTalker](https://github.com/OpenTalker/SadTalker). We thank the related authors for their great work! \ No newline at end of file +This plugin is mostly adapted from [SadTalker](https://github.com/OpenTalker/SadTalker). We thank the related authors for their great work! diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/inference.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/inference.py index f0e045769b5..1c2bc280417 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/inference.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/inference.py @@ -57,7 +57,7 @@ def main(args): batch_size = args.batch_size current_root_path = os.path.split(sys.argv[0])[0] - + sadtalker_paths = init_path( args.checkpoint_dir, os.path.join(current_root_path, "src/config"), args.size, args.preprocess ) diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/requirements.txt b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/requirements.txt index cb3c4622e26..b59f6263562 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/requirements.txt @@ -1,21 +1,21 @@ -numpy==1.23.5 +av +basicsr==1.4.2 +dlib-bin face_alignment==1.3.5 +facexlib @ git+https://github.com/Spycsh/facexlib@master +gfpgan imageio==2.19.3 imageio-ffmpeg==0.4.7 +joblib==1.2.0 +kornia==0.6.8 librosa==0.9.2 # numba -resampy==0.3.1 +numpy==1.23.5 pydub==0.25.1 +pyyaml +resampy==0.3.1 +safetensors +scikit-image==0.19.3 scipy==1.10.1 -kornia==0.6.8 tqdm yacs==0.1.8 -pyyaml -joblib==1.2.0 -scikit-image==0.19.3 -basicsr==1.4.2 -facexlib @ git+https://github.com/Spycsh/facexlib@master -gfpgan -dlib-bin -av -safetensors diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2exp_models/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2exp_models/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2exp_models/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2exp_models/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2pose_models/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2pose_models/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2pose_models/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/audio2pose_models/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2exp.yaml b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2exp.yaml index 3640ea4a496..1691e739f76 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2exp.yaml +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2exp.yaml @@ -27,7 +27,7 @@ DATASET: DEBUG: True NUM_REPEATS: 2 T: 40 - + MODEL: FRAMEWORK: V2 @@ -44,7 +44,7 @@ MODEL: LATENT_SIZE: 256 ENCODER_LAYER_SIZES: [192, 1024] DECODER_LAYER_SIZES: [1024, 192] - + TRAIN: MAX_EPOCH: 300 @@ -67,5 +67,3 @@ TRAIN: TAG: NAME: small_dataset - - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2pose.yaml b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2pose.yaml index 12335c7baa5..002af7c09d1 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2pose.yaml +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/auido2pose.yaml @@ -25,7 +25,7 @@ DATASET: COEFF_LEN: 73 NUM_CLASSES: 46 DEBUG: True - + MODEL: AUDIOENCODER: @@ -41,7 +41,7 @@ MODEL: LATENT_SIZE: 64 ENCODER_LAYER_SIZES: [192, 128] DECODER_LAYER_SIZES: [128, 192] - + TRAIN: MAX_EPOCH: 150 @@ -59,5 +59,3 @@ TRAIN: TAG: NAME: cvae_UNET_useAudio_usewav2lipAudioEncoder - - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender.yaml b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender.yaml index d8d7949ad1f..e827ce9fcb0 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender.yaml +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender.yaml @@ -18,20 +18,20 @@ model_params: common_params: - num_kp: 15 - image_channel: 3 + num_kp: 15 + image_channel: 3 feature_channel: 32 estimate_jacobian: False # True kp_detector_params: temperature: 0.1 - block_expansion: 32 + block_expansion: 32 max_features: 1024 scale_factor: 0.25 # 0.25 num_blocks: 5 reshape_channel: 16384 # 16384 = 1024 * 16 reshape_depth: 16 he_estimator_params: - block_expansion: 64 + block_expansion: 64 max_features: 2048 num_bins: 66 generator_params: @@ -50,7 +50,7 @@ model_params: compress: 4 discriminator_params: scales: [1] - block_expansion: 32 + block_expansion: 32 max_features: 512 num_blocks: 4 sn: True @@ -60,4 +60,3 @@ model_params: layer: 3 num_kp: 15 num_bins: 66 - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender_still.yaml b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender_still.yaml index e6cd9a48a89..ac2889f2318 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender_still.yaml +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/config/facerender_still.yaml @@ -18,20 +18,20 @@ model_params: common_params: - num_kp: 15 - image_channel: 3 + num_kp: 15 + image_channel: 3 feature_channel: 32 estimate_jacobian: False # True kp_detector_params: temperature: 0.1 - block_expansion: 32 + block_expansion: 32 max_features: 1024 scale_factor: 0.25 # 0.25 num_blocks: 5 reshape_channel: 16384 # 16384 = 1024 * 16 reshape_depth: 16 he_estimator_params: - block_expansion: 64 + block_expansion: 64 max_features: 2048 num_bins: 66 generator_params: @@ -50,7 +50,7 @@ model_params: compress: 4 discriminator_params: scales: [1] - block_expansion: 32 + block_expansion: 32 max_features: 512 num_blocks: 4 sn: True @@ -60,4 +60,3 @@ model_params: layer: 3 num_kp: 15 num_bins: 66 - diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/face3d/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/face3d/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/face3d/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/face3d/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/make_animation.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/make_animation.py index aebffd83ddd..2a63e08eb7c 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/make_animation.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/facerender/modules/make_animation.py @@ -261,4 +261,4 @@ def natural_keys(text): # predictions_ts = torch.stack(predictions, dim=1) predictions_ts = torch.stack(aggregated_predictions, dim=1) - return predictions_ts \ No newline at end of file + return predictions_ts diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/__init__.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/__init__.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/audio.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/audio.py index c43b74a8b0c..7af1e6fa07f 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/audio.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/audio.py @@ -96,4 +96,4 @@ def _normalize(S): if hp.symmetric_mels: return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value else: - return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)) \ No newline at end of file + return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)) diff --git a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/videoio.py b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/videoio.py index bbcba1e1f5a..b554d63a8b3 100644 --- a/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/videoio.py +++ b/intel_extension_for_transformers/neural_chat/pipeline/plugins/video/face_animation/src/utils/videoio.py @@ -43,4 +43,4 @@ def save_video_with_watermark(video, audio, save_path, watermark=False): input_audio = ffmpeg.input(audio) ffmpeg.concat(input_video, input_audio, v=1, a=1).output( save_path - ).run() \ No newline at end of file + ).run() diff --git a/intel_extension_for_transformers/neural_chat/plugins.py b/intel_extension_for_transformers/neural_chat/plugins.py index 859b5cff380..624c4351212 100644 --- a/intel_extension_for_transformers/neural_chat/plugins.py +++ b/intel_extension_for_transformers/neural_chat/plugins.py @@ -72,4 +72,4 @@ def get_registered_plugins(): def get_all_plugins(): return ["tts", "tts_chinese", "asr", "asr_chinese", "retrieval", "cache", "safety_checker", "ner", "ner_int", - "face_animation"] \ No newline at end of file + "face_animation"] diff --git a/intel_extension_for_transformers/neural_chat/prompts/__init__.py b/intel_extension_for_transformers/neural_chat/prompts/__init__.py index 9d709dd806c..d3e17f82b3e 100644 --- a/intel_extension_for_transformers/neural_chat/prompts/__init__.py +++ b/intel_extension_for_transformers/neural_chat/prompts/__init__.py @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .prompt import PromptTemplate \ No newline at end of file +from .prompt import PromptTemplate diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py index 6627205a2a0..2686417bd93 100644 --- a/intel_extension_for_transformers/neural_chat/prompts/prompt.py +++ b/intel_extension_for_transformers/neural_chat/prompts/prompt.py @@ -196,9 +196,9 @@ register_conv_template( Conversation( name="ner", - system_message="""Please determine the precise time mentioned in the user's query. - Your response should consist only of an accurate time in the format - 'Time: YYYY-MM-DD' or 'Period: YYYY-MM-DD to YYYY-MM-DD.' + system_message="""Please determine the precise time mentioned in the user's query. + Your response should consist only of an accurate time in the format + 'Time: YYYY-MM-DD' or 'Period: YYYY-MM-DD to YYYY-MM-DD.' If the user query does not include any time reference, please reply with 'None'.\n""", roles=("Current Time: ", "User Query: "), sep_style=SeparatorStyle.NO_COLON_SINGLE, diff --git a/intel_extension_for_transformers/neural_chat/requirements.txt b/intel_extension_for_transformers/neural_chat/requirements.txt index ec4654403e6..27ccc939601 100644 --- a/intel_extension_for_transformers/neural_chat/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/requirements.txt @@ -1,29 +1,29 @@ -transformers>=4.35.2 -peft==0.6.2 +accelerate +cchardet +einops +evaluate +fastapi==0.103.2 fschat==0.2.32 -torch==2.1.0 -torchaudio==2.1.0 +git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 +huggingface_hub intel_extension_for_pytorch +neural-compressor +neural_speed +numpy==1.23.5 onnx>=1.15.0 -shortuuid -evaluate -fastapi==0.103.2 -pydantic==1.10.13 -starlette -yacs -uvicorn optimum optimum-intel +peft==0.6.2 +pydantic==1.10.13 +python-dotenv +python-multipart rouge_score -numpy==1.23.5 -git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 -accelerate -cchardet +shortuuid +starlette tensorflow>=2.13.0 -neural-compressor -einops -python-multipart -python-dotenv -huggingface_hub +torch==2.1.0 +torchaudio==2.1.0 +transformers>=4.35.2 +uvicorn vllm -neural_speed +yacs diff --git a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt index 3e741d24572..48a5329c47a 100644 --- a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt +++ b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt @@ -1,26 +1,26 @@ -transformers>=4.35.2 -peft==0.6.2 -fschat==0.2.32 -intel_extension_for_pytorch==2.1.0 -shortuuid +--extra-index-url https://download.pytorch.org/whl/cpu +cchardet +einops evaluate fastapi==0.103.2 -pydantic==1.10.13 -starlette -yacs -uvicorn +fschat==0.2.32 +git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 +intel_extension_for_pytorch==2.1.0 +neural-compressor +neural_speed +numpy==1.23.5 optimum optimum-intel +peft==0.6.2 +pydantic==1.10.13 +python-dotenv +python-multipart rouge_score -numpy==1.23.5 +shortuuid +starlette tiktoken==0.4.0 -git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 ---extra-index-url https://download.pytorch.org/whl/cpu torch==2.1.0 torchaudio==2.1.0 -neural-compressor -einops -cchardet -python-multipart -python-dotenv -neural_speed +transformers>=4.35.2 +uvicorn +yacs diff --git a/intel_extension_for_transformers/neural_chat/requirements_hpu.txt b/intel_extension_for_transformers/neural_chat/requirements_hpu.txt index 5f1a632fb4f..6700cfa2c87 100644 --- a/intel_extension_for_transformers/neural_chat/requirements_hpu.txt +++ b/intel_extension_for_transformers/neural_chat/requirements_hpu.txt @@ -1,20 +1,20 @@ -transformers>=4.35.2 -peft -fschat==0.2.32 -shortuuid +cchardet +einops evaluate fastapi==0.103.2 -pydantic==1.10.13 -starlette -yacs -uvicorn -optimum -rouge_score -numpy==1.23.5 +fschat==0.2.32 git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 -neural-compressor intel_extension_for_pytorch -einops -cchardet -python-multipart +neural-compressor +numpy==1.23.5 +optimum +peft +pydantic==1.10.13 python-dotenv +python-multipart +rouge_score +shortuuid +starlette +transformers>=4.35.2 +uvicorn +yacs diff --git a/intel_extension_for_transformers/neural_chat/requirements_pc.txt b/intel_extension_for_transformers/neural_chat/requirements_pc.txt index 5a5aa9001cf..84edc0c4f70 100644 --- a/intel_extension_for_transformers/neural_chat/requirements_pc.txt +++ b/intel_extension_for_transformers/neural_chat/requirements_pc.txt @@ -1,23 +1,23 @@ -transformers -peft -fschat==0.2.32 -shortuuid +--extra-index-url https://download.pytorch.org/whl/cpu +cchardet +einops evaluate fastapi==0.103.2 -pydantic==1.10.13 -starlette -yacs -uvicorn +fschat==0.2.32 +git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 +neural-compressor +numpy==1.23.5 optimum optimum-intel +peft +pydantic==1.10.13 +python-dotenv +python-multipart rouge_score -numpy==1.23.5 -git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 ---extra-index-url https://download.pytorch.org/whl/cpu +shortuuid +starlette torch==2.1.0 torchaudio==2.1.0 -neural-compressor -einops -cchardet -python-multipart -python-dotenv +transformers +uvicorn +yacs diff --git a/intel_extension_for_transformers/neural_chat/requirements_xpu.txt b/intel_extension_for_transformers/neural_chat/requirements_xpu.txt index c3d30f8ffca..f2a980c0e9c 100644 --- a/intel_extension_for_transformers/neural_chat/requirements_xpu.txt +++ b/intel_extension_for_transformers/neural_chat/requirements_xpu.txt @@ -1,16 +1,16 @@ -transformers>=4.35.2 -fschat==0.2.32 -shortuuid +cchardet +einops evaluate fastapi==0.103.2 +fschat==0.2.32 +neural-compressor +numpy==1.23.5 pydantic==1.10.13 +python-dotenv +python-multipart +rouge_score +shortuuid starlette -yacs +transformers>=4.35.2 uvicorn -rouge_score -numpy==1.23.5 -cchardet -neural-compressor -einops -python-multipart -python-dotenv +yacs diff --git a/intel_extension_for_transformers/neural_chat/server/restful/codegen_api.py b/intel_extension_for_transformers/neural_chat/server/restful/codegen_api.py index 22be5bc444c..d000af762ad 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/codegen_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/codegen_api.py @@ -33,7 +33,7 @@ def check_completion_request(request: BaseModel) -> Optional[str]: logger.info(f"Checking parameters of completion request...") if request.temperature is not None and request.temperature < 0: return f"Param Error: {request.temperature} is less than the minimum of 0 --- 'temperature'" - + if request.temperature is not None and request.temperature > 2: return f"Param Error: {request.temperature} is greater than the maximum of 2 --- 'temperature'" diff --git a/intel_extension_for_transformers/neural_chat/server/restful/faceanimation_api.py b/intel_extension_for_transformers/neural_chat/server/restful/faceanimation_api.py index 8c31c8d8162..e325236d112 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/faceanimation_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/faceanimation_api.py @@ -102,4 +102,4 @@ async def handle_talkingbot_face_animation(image: UploadFile = File(...), text=text, mode=mode, voice=voice) - return response \ No newline at end of file + return response diff --git a/intel_extension_for_transformers/neural_chat/server/restful/finetune_api.py b/intel_extension_for_transformers/neural_chat/server/restful/finetune_api.py index facb7a2c986..749d00c9170 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/finetune_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/finetune_api.py @@ -36,7 +36,7 @@ def check_finetune_request(request: BaseModel) -> Optional[str]: if request.train_file is None and request.dataset_name is None: return f"Param Error: finetune dataset can not be None" return None - + class FinetuneAPIRouter(APIRouter): @@ -56,7 +56,7 @@ def get_chatbot(self): logger.error("Chatbot instance is not found.") raise RuntimeError("Chatbot instance has not been set.") return self.chatbot - + def handle_finetune_request(self, request: FinetuneRequest) -> str: try: model_args = ModelArguments(model_name_or_path=request.model_name_or_path) @@ -90,6 +90,6 @@ def handle_finetune_request(self, request: FinetuneRequest) -> str: @router.post("/v1/finetune") async def finetune_endpoint(request: FinetuneRequest): ret = check_finetune_request(request) - if ret is not None: + if ret is not None: raise RuntimeError(f"Invalid parameter: {ret}") - return router.handle_finetune_request(request) \ No newline at end of file + return router.handle_finetune_request(request) diff --git a/intel_extension_for_transformers/neural_chat/server/restful/openai_protocol.py b/intel_extension_for_transformers/neural_chat/server/restful/openai_protocol.py index 6c4883632ae..84f836bf698 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/openai_protocol.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/openai_protocol.py @@ -16,7 +16,7 @@ # limitations under the License. """ -Code source from FastChat's OpenAI protocol: +Code source from FastChat's OpenAI protocol: https://github.com/lm-sys/FastChat/blob/main/fastchat/protocol/openai_api_protocol.py """ diff --git a/intel_extension_for_transformers/neural_chat/server/restful/photoai_api.py b/intel_extension_for_transformers/neural_chat/server/restful/photoai_api.py index 08474bd3a32..e433d183850 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/photoai_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/photoai_api.py @@ -59,7 +59,7 @@ def get_chatbot(self): logger.error("Chatbot instance is not found.") raise RuntimeError("Chatbot instance has not been set.") return self.chatbot - + async def handle_voice_chat_request(self, prompt: str, audio_output_path: Optional[str]=None) -> str: chatbot = self.get_chatbot() try: @@ -120,7 +120,7 @@ async def handle_ai_photos_upload_images(request: Request, background_tasks: Bac img_path = image_path+'/'+ img_name # save exif info from origin image exif = img_obj.info.get('exif', b"") - + # save image info into db empty_tags = '{}' insert_sql = f"INSERT INTO image_info VALUES(null, '{user_id}', '{img_path}', null, '', \ @@ -164,7 +164,7 @@ def handle_ai_photos_get_all_images(request: Request): result_list = [] mysql_db = MysqlDb() image_list = mysql_db.fetch_all( - sql=f'''SELECT image_id, image_path FROM image_info + sql=f'''SELECT image_id, image_path FROM image_info WHERE user_id="{user_id}" AND exist_status="active";''') for image in image_list: image_name = image['image_path'].split('/')[-1] @@ -250,22 +250,22 @@ async def handle_ai_photos_get_image_detail(request: Request): try: mysql_db = MysqlDb() image_info = mysql_db.fetch_one( - sql=f'''SELECT * FROM image_info WHERE - image_id={image_id} AND user_id="{user_id}" AND exist_status="active";''', + sql=f'''SELECT * FROM image_info WHERE + image_id={image_id} AND user_id="{user_id}" AND exist_status="active";''', params=None) except Exception as e: logger.error(" "+str(e)) return JSONResponse(content=f'Exception {e} occurred when selecting image {image_id} from MySQL.') finally: mysql_db._close() - + if image_info: image_detail = format_image_info(image_info) logger.info(f' Image detail of image {image_id} is: {image_detail}') return image_detail else: return JSONResponse( - content=f"No image id: {image_id} for user {user_id}", + content=f"No image id: {image_id} for user {user_id}", status_code=status.HTTP_500_INTERNAL_SERVER_ERROR ) @@ -315,7 +315,7 @@ async def handle_ai_photos_update_label(request: Request): params = await request.json() label_list = params['label_list'] - try: + try: mysql_db = MysqlDb() for label_obj in label_list: label = label_obj['label'] @@ -324,23 +324,23 @@ async def handle_ai_photos_update_label(request: Request): if label == 'person': with mysql_db.transaction(): mysql_db.update( - sql=f'''UPDATE face_info SET face_tag="{label_to}" - WHERE face_tag="{label_from}"''', + sql=f'''UPDATE face_info SET face_tag="{label_to}" + WHERE face_tag="{label_from}"''', params=None) mysql_db.update( - sql=f"""UPDATE image_face SET face_tag='{label_to}' - WHERE user_id='{user_id}' and face_tag='{label_from}';""", + sql=f"""UPDATE image_face SET face_tag='{label_to}' + WHERE user_id='{user_id}' and face_tag='{label_from}';""", params=None) continue if label == 'address': - update_sql = f"""UPDATE image_info SET address='{label_to}' + update_sql = f"""UPDATE image_info SET address='{label_to}' WHERE user_id='{user_id}' and address LIKE '%{label_from}%';""" elif label == 'time': - update_sql = f"""UPDATE image_info SET captured_time='{label_to}' + update_sql = f"""UPDATE image_info SET captured_time='{label_to}' WHERE user_id='{user_id}' and DATEDIFF(captured_time, '{label_from}') = 0;""" else: return JSONResponse( - content=f"Illegal label name: {label}", + content=f"Illegal label name: {label}", status_code=status.HTTP_500_INTERNAL_SERVER_ERROR ) with mysql_db.transaction(): @@ -520,4 +520,3 @@ async def handle_talkingbot_llm_tts(request: Request): logger.info(f'Received prompt: {text}, and use voice: {voice} knowledge_id: {knowledge_id}') return await router.handle_voice_chat_request(text, audio_output_path) - diff --git a/intel_extension_for_transformers/neural_chat/server/restful/photoai_services.py b/intel_extension_for_transformers/neural_chat/server/restful/photoai_services.py index 9427d128dd0..d9afab9bfd3 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/photoai_services.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/photoai_services.py @@ -22,8 +22,8 @@ import datetime from typing import List, Dict from .photoai_utils import ( - find_GPS_image, - get_address_from_gps, + find_GPS_image, + get_address_from_gps, generate_caption, transfer_xywh ) @@ -88,7 +88,7 @@ def update_image_tags(image): elif key == 'location' and value != image_info['address']: update_sql_list.append(f' address="{value}" ') tag_name_list.append('location') - + for tag_name in tag_name_list: tags.pop(tag_name) old_tags.update(tags) @@ -116,13 +116,13 @@ def update_image_attr(image, attr): new_checked = 1 if new_attr else 0 with mysql_db.transaction(): mysql_db.update( - sql=f"UPDATE image_info SET {attr}={new_checked} WHERE image_id={image_id}", + sql=f"UPDATE image_info SET {attr}={new_checked} WHERE image_id={image_id}", params=None ) else: with mysql_db.transaction(): mysql_db.update( - sql=f'UPDATE image_info SET {attr}="{new_attr}" WHERE image_id={image_id}', + sql=f'UPDATE image_info SET {attr}="{new_attr}" WHERE image_id={image_id}', params=None ) except Exception as e: @@ -163,7 +163,7 @@ def delete_single_image(user_id, image_id): logger.info(f'[Delete] Deleting image {image_id}') mysql_db = MysqlDb() image_path = mysql_db.fetch_one( - sql=f'SELECT image_path FROM image_info WHERE image_id={image_id}', + sql=f'SELECT image_path FROM image_info WHERE image_id={image_id}', params=None ) if image_path==None: @@ -171,7 +171,7 @@ def delete_single_image(user_id, image_id): logger.error(info) raise Exception(info) image_path = image_path['image_path'] - + # delete local image os.remove(image_path) logger.info(f'[Delete] Image {image_path} successfully deleted.') @@ -180,7 +180,7 @@ def delete_single_image(user_id, image_id): try: with mysql_db.transaction(): mysql_db.update( - sql=f"UPDATE image_info SET exist_status='deleted' WHERE image_id={image_id} ;", + sql=f"UPDATE image_info SET exist_status='deleted' WHERE image_id={image_id} ;", params=None ) except Exception as e: @@ -231,7 +231,7 @@ def process_single_image(img_id, img_path, user_id): longitude = gps_info['GPSLongitude'] if 'GPSAltitude' in gps_info: altitude = gps_info['GPSAltitude'] - logger.info(f'[background - single] Image is captured at: {captured_time},' + + logger.info(f'[background - single] Image is captured at: {captured_time},' + 'latitude: {latitude}, longitude: {longitude}, altitude: {altitude}') if latitude: update_image_attr(image={"image_id": img_id, "latitude": latitude}, attr='latitude') @@ -359,7 +359,7 @@ def process_face_for_single_image(image_id, image_path, db_path, user_id): continue # find faces in img2: one or many find_face_sql = f""" - SELECT face_id, face_tag, xywh FROM image_face WHERE + SELECT face_id, face_tag, xywh FROM image_face WHERE image_path='{ref_image_path}' AND user_id='{user_id}'; """ try: @@ -386,7 +386,7 @@ def process_face_for_single_image(image_id, image_path, db_path, user_id): if face_id == -1 and face_tag == None: raise Exception(f'Error occurred when verifying faces for reference image: Inconsistent face information.') # insert into image_face - insert_img_face_sql = f"""INSERT INTO image_face + insert_img_face_sql = f"""INSERT INTO image_face VALUES(null, {image_id}, '{image_path}', {face_id}, '{image_xywh}', '{user_id}', '{face_tag}');""" try: with mysql_db.transaction(): @@ -399,12 +399,12 @@ def process_face_for_single_image(image_id, image_path, db_path, user_id): if image_xywh in face_xywh_list: face_xywh_list.remove(image_xywh) logger.info(f'[background - face] current face_xywh_list: {face_xywh_list}') - + # all faces matched in db, no faces left if len(face_xywh_list) == 0: logger.info(f"[background - face] Image {image_id} face process finished.") return None - + # 3. add new faces for current image (no reference in db) logger.info(f'[background - face] Adding new faces for image {image_id}') for cur_xywh in face_xywh_list: @@ -437,10 +437,10 @@ def get_type_obj_from_attr(attr, user_id): logger.info(f'Geting image type of {attr}') if attr == 'time': - select_sql = f'''SELECT DATE(captured_time) AS date FROM image_info + select_sql = f'''SELECT DATE(captured_time) AS date FROM image_info WHERE user_id = "{user_id}" AND exist_status="active" GROUP BY date ORDER BY date;''' elif attr == 'address': - select_sql = f'''SELECT address FROM image_info + select_sql = f'''SELECT address FROM image_info WHERE user_id="{user_id}" AND exist_status="active" GROUP BY address;''' else: return {} @@ -456,19 +456,19 @@ def get_type_obj_from_attr(attr, user_id): if item == None: continue example_image_path = mysql_db.fetch_one( - sql=f'''SELECT image_path FROM image_info - WHERE DATEDIFF(captured_time, "{item}") = 0 and user_id="{user_id}" - and exist_status="active" LIMIT 1;''', + sql=f'''SELECT image_path FROM image_info + WHERE DATEDIFF(captured_time, "{item}") = 0 and user_id="{user_id}" + and exist_status="active" LIMIT 1;''', params=None)['image_path'] elif attr == 'address': item = item['address'] if item == None or item == 'None' or item == 'null': continue example_image_path = mysql_db.fetch_one( - sql=f'''SELECT image_path FROM image_info WHERE - address="{item}" and user_id="{user_id}" and exist_status="active" LIMIT 1;''', + sql=f'''SELECT image_path FROM image_info WHERE + address="{item}" and user_id="{user_id}" and exist_status="active" LIMIT 1;''', params=None)['image_path'] - + image_name = example_image_path.split('/')[-1] image_path = format_image_path(user_id, image_name) select_result[item] = image_path @@ -479,7 +479,7 @@ def get_type_obj_from_attr(attr, user_id): if attr == 'time': logger.info(f'type list: {select_result}') return select_result - + # check whether address simplification is needed simplify_flag = True cur_country = None @@ -488,11 +488,11 @@ def get_type_obj_from_attr(attr, user_id): country = address.split(', ')[0] if not cur_country: cur_country = country - else: + else: if country != cur_country: simplify_flag = False break - + # simplify address name dynamically if simplify_flag: logger.info(f'address need to be simplified') @@ -511,7 +511,7 @@ def get_address_list(user_id) -> list[str]: logger.info(f'Getting address list of user {user_id}') from ...utils.database.mysqldb import MysqlDb mysql_db = MysqlDb() - select_sql = f'''SELECT address FROM image_info WHERE + select_sql = f'''SELECT address FROM image_info WHERE user_id="{user_id}" AND exist_status="active" GROUP BY address;''' select_list = mysql_db.fetch_all(sql=select_sql) result_list = [] @@ -532,10 +532,10 @@ def get_process_status(user_id): from ...utils.database.mysqldb import MysqlDb mysql_db = MysqlDb() total_cnt = mysql_db.fetch_one( - sql=f"""SELECT COUNT(*) AS cnt FROM image_info WHERE + sql=f"""SELECT COUNT(*) AS cnt FROM image_info WHERE user_id='{user_id}' AND exist_status='active';""")['cnt'] processing_cnt = mysql_db.fetch_one( - sql=f"""SELECT COUNT(*) AS cnt FROM image_info WHERE + sql=f"""SELECT COUNT(*) AS cnt FROM image_info WHERE user_id='{user_id}' AND exist_status='active' AND process_status='processing';""")['cnt'] mysql_db._close() result = {} @@ -551,21 +551,21 @@ def get_images_by_type(user_id, type, subtype) -> List: if type == 'address': if subtype == 'default': subtype = 'None' - sql=f"""SELECT image_id, image_path FROM image_info WHERE + sql=f"""SELECT image_id, image_path FROM image_info WHERE user_id='{user_id}' AND exist_status='active' AND address LIKE '%{subtype}%';""" elif type == 'time': if subtype == 'None': - sql = f'''SELECT image_id, image_path FROM image_info + sql = f'''SELECT image_id, image_path FROM image_info WHERE captured_time is null AND user_id="{user_id}" AND exist_status="active";''' else: - sql = f'''SELECT image_id, image_path FROM image_info + sql = f'''SELECT image_id, image_path FROM image_info WHERE DATE(captured_time)="{subtype}" AND user_id="{user_id}" AND exist_status="active";''' elif type == 'person': - sql = f"""SELECT image_info.image_id, image_info.image_path FROM image_face - INNER JOIN image_info ON image_info.image_id=image_face.image_id - WHERE image_info.user_id='{user_id}' AND image_info.exist_status='active' + sql = f"""SELECT image_info.image_id, image_info.image_path FROM image_face + INNER JOIN image_info ON image_info.image_id=image_face.image_id + WHERE image_info.user_id='{user_id}' AND image_info.exist_status='active' AND image_face.face_tag='{subtype}'""" logger.info(f'sql: {sql}') @@ -589,9 +589,9 @@ def get_images_by_type(user_id, type, subtype) -> List: def get_face_list_by_user_id(user_id: str) -> List[Dict]: logger.info(f'getting face list of user {user_id}') - group_by_face_sql = f'''SELECT group_concat(image_face.image_path) AS image_path, - group_concat(image_face.face_tag) AS face_tag FROM image_face - INNER JOIN image_info ON image_info.image_id=image_face.image_id + group_by_face_sql = f'''SELECT group_concat(image_face.image_path) AS image_path, + group_concat(image_face.face_tag) AS face_tag FROM image_face + INNER JOIN image_info ON image_info.image_id=image_face.image_id WHERE image_info.user_id = "{user_id}" AND image_info.exist_status="active" GROUP BY face_id;''' try: from ...utils.database.mysqldb import MysqlDb @@ -625,9 +625,9 @@ def get_image_list_by_ner_query(ner_result: Dict, user_id: str, query: str) -> L # get person name query face_list = mysql_db.fetch_all( - sql=f"""select image_face.face_tag from image_face inner join image_info - on image_info.image_id=image_face.image_id where - image_info.user_id='{user_id}' AND exist_status='active';""", + sql=f"""select image_face.face_tag from image_face inner join image_info + on image_info.image_id=image_face.image_id where + image_info.user_id='{user_id}' AND exist_status='active';""", params=None) logger.info(f"[NER query] face list is: {face_list}") if face_list: @@ -665,7 +665,7 @@ def get_image_list_by_ner_query(ner_result: Dict, user_id: str, query: str) -> L query_sql += '('+sql+')' else: logger.info(f'[NER query] no location in query') - + # get time query if ner_result['time']: time_points = ner_result['time'] @@ -707,7 +707,7 @@ def get_image_list_by_ner_query(ner_result: Dict, user_id: str, query: str) -> L query_sql += '('+sql+')' else: logger.info(f'[NER query] no time period in query') - + if not query_flag: logger.info(f'[NER query] no compatible data for current query') return [] @@ -739,14 +739,14 @@ def delete_user_infos(user_id: str): # delete image_face logger.info(f'[delete user] delete image_face of user {user_id}.') mysql_db.delete( - sql=f"""DELETE FROM image_face WHERE user_id='{user_id}'""", + sql=f"""DELETE FROM image_face WHERE user_id='{user_id}'""", params=None) - + # delete face_info logger.info(f'[delete user] delete face_info of user {user_id}.') mysql_db.delete( - sql=f"""DELETE face_info FROM face_info LEFT JOIN image_face - ON face_info.face_id = image_face.face_id WHERE image_face.face_id IS NULL""", + sql=f"""DELETE face_info FROM face_info LEFT JOIN image_face + ON face_info.face_id = image_face.face_id WHERE image_face.face_id IS NULL""", params=None) # delete image_info @@ -778,7 +778,7 @@ def delete_user_infos(user_id: str): logger.info(f'[delete user] local images of user {user_id} is deleted.') except Exception as e: raise Exception(e) - + logger.info(f'[delete user] user {user_id} information all deleted.') @@ -798,4 +798,3 @@ def forward_req_to_sd_inference_runner(inputs): def stable_defusion_func(inputs): return forward_req_to_sd_inference_runner(inputs) - diff --git a/intel_extension_for_transformers/neural_chat/server/restful/photoai_utils.py b/intel_extension_for_transformers/neural_chat/server/restful/photoai_utils.py index 83493c58b6c..5f603c9c18f 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/photoai_utils.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/photoai_utils.py @@ -96,7 +96,7 @@ def get_address_from_gps(latitude, longitude, api_key): return result else: return None - + def infer_image(pic_path, processor, model): raw_image = Image.open(pic_path).convert('RGB') @@ -148,4 +148,3 @@ def transfer_xywh(facial_area: dict): for item in items: result += str(facial_area[item]) + '_' return result - diff --git a/intel_extension_for_transformers/neural_chat/server/restful/plugin_audio_api.py b/intel_extension_for_transformers/neural_chat/server/restful/plugin_audio_api.py index 8bae7a89871..5c36a8c0f29 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/plugin_audio_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/plugin_audio_api.py @@ -37,9 +37,9 @@ def handle_voice_asr_request(self, filename: str) -> str: return asr.audio2text(filename) except Exception as e: raise Exception(e) - + async def handle_voice_tts_request(self, text: str, voice: str, audio_output_path: Optional[str]=None) -> str: - + plugins.tts.args['voice'] = voice plugins.tts.args['output_audio_path'] = audio_output_path tts = get_plugin_instance("tts") diff --git a/intel_extension_for_transformers/neural_chat/server/restful/request.py b/intel_extension_for_transformers/neural_chat/server/restful/request.py index 307a381af90..d645a60a262 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/request.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/request.py @@ -80,4 +80,3 @@ class TGIRequest(RequestBaseModel): inputs: str parameters: Optional[dict] = None stream: Optional[bool] = False - diff --git a/intel_extension_for_transformers/neural_chat/server/restful/retrieval_api.py b/intel_extension_for_transformers/neural_chat/server/restful/retrieval_api.py index 6551260a194..691d0f2eb38 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/retrieval_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/retrieval_api.py @@ -124,7 +124,7 @@ async def retrieval_upload_link(request: Request): user_upload_dir.mkdir(parents=True, exist_ok=True) user_persist_dir.mkdir(parents=True, exist_ok=True) logger.info(f"[askdoc - upload_link] upload path: {user_upload_dir}") - + try: # get retrieval instance and reload db with new knowledge base logger.info("[askdoc - upload_link] starting to create local db...") @@ -156,14 +156,14 @@ async def retrieval_create(request: Request, cur_path = Path(path_prefix) / f"{user_id}-{kb_id}" os.makedirs(path_prefix, exist_ok=True) cur_path.mkdir(parents=True, exist_ok=True) - + user_upload_dir = Path(path_prefix) / f"{user_id}-{kb_id}/upload_dir" user_persist_dir = Path(path_prefix) / f"{user_id}-{kb_id}/persist_dir" user_upload_dir.mkdir(parents=True, exist_ok=True) user_persist_dir.mkdir(parents=True, exist_ok=True) cur_time = get_current_beijing_time() logger.info(f"[askdoc - create] upload path: {user_upload_dir}") - + # save file to local path save_file_name = str(user_upload_dir) + '/' + cur_time + '-' + filename with open(save_file_name, 'wb') as fout: @@ -286,7 +286,7 @@ async def retrieval_chat(request: Request): def stream_generator(): yield f"data: {generator}\n\n" yield f"data: [DONE]\n\n" - else: + else: def stream_generator(): for output in generator: ret = { @@ -301,7 +301,7 @@ def stream_generator(): flag = True if output.endswith('.') or output.endswith('\n'): output = output[:-1] - + if '](' in output: output = output.split('](')[-1].replace(')', '') if output.endswith('\n'): @@ -357,7 +357,7 @@ def save_chat_feedback_to_db(request: FeedbackRequest) -> None: mysql_db._set_db("fastrag") question, answer, feedback, comments = request.question, request.answer, request.feedback, request.comments feedback_str = 'dislike' if int(feedback) else 'like' - logger.info(f'''[askdoc - feedback] feedback question: [{question}], + logger.info(f'''[askdoc - feedback] feedback question: [{question}], answer: [{answer}], feedback: [{feedback_str}], comments: [{comments}]''') question = question.replace('"', "'") answer = answer.replace('"', "'") @@ -374,7 +374,7 @@ def save_chat_feedback_to_db(request: FeedbackRequest) -> None: with mysql_db.transaction(): mysql_db.insert(sql, None) except: # pragma: no cover - raise Exception("""Exception occurred when inserting data into MySQL, + raise Exception("""Exception occurred when inserting data into MySQL, please check the db session and your syntax.""") else: logger.info('[askdoc - feedback] feedback inserted.') @@ -402,7 +402,7 @@ def get_feedback_from_db(): def data_generator(): output = io.StringIO() writer = csv.DictWriter( - output, + output, csv_fields ) writer.writeheader() @@ -430,6 +430,6 @@ def data_generator(): cur_time = datetime.datetime.now() cur_time_str = cur_time.strftime("%Y%m%d") return StreamingResponse( - data_generator(), - media_type='text/csv', + data_generator(), + media_type='text/csv', headers={"Content-Disposition": f"attachment;filename=feedback{cur_time_str}.csv"}) diff --git a/intel_extension_for_transformers/neural_chat/server/restful/text2image_api.py b/intel_extension_for_transformers/neural_chat/server/restful/text2image_api.py index 48ff490c046..d02ddb66a04 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/text2image_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/text2image_api.py @@ -49,7 +49,7 @@ async def handle_text2image_request(self, request: str) -> ImageResponse: else: logger.info('Text transferring to image finished.') return ImageResponse(image=image, response="Succeed") - + router = Text2ImageAPIRouter() diff --git a/intel_extension_for_transformers/neural_chat/server/restful/textchat_api.py b/intel_extension_for_transformers/neural_chat/server/restful/textchat_api.py index 74f908c8f7f..e08961a34b8 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/textchat_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/textchat_api.py @@ -31,7 +31,7 @@ def check_completion_request(request: BaseModel) -> Optional[str]: logger.info(f"Checking parameters of completion request...") if request.temperature is not None and request.temperature < 0: return f"Param Error: {request.temperature} is less than the minimum of 0 --- 'temperature'" - + if request.temperature is not None and request.temperature > 2: return f"Param Error: {request.temperature} is greater than the maximum of 2 --- 'temperature'" diff --git a/intel_extension_for_transformers/neural_chat/server/restful/tgi_api.py b/intel_extension_for_transformers/neural_chat/server/restful/tgi_api.py index 134862f8299..364a81ef930 100644 --- a/intel_extension_for_transformers/neural_chat/server/restful/tgi_api.py +++ b/intel_extension_for_transformers/neural_chat/server/restful/tgi_api.py @@ -55,9 +55,9 @@ def handle_tgi_request(self, prompt, parameters, stream=False): top_p = parameters.get("top_p", 0.95) typical_p = parameters.get("typical_p", 0.95) res = client.text_generation( - prompt=prompt, best_of=best_of, do_sample=do_sample, - max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty, - temperature=temperature, top_k=top_k, top_p=top_p, + prompt=prompt, best_of=best_of, do_sample=do_sample, + max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty, + temperature=temperature, top_k=top_k, top_p=top_p, typical_p=typical_p, stream=stream) return res diff --git a/intel_extension_for_transformers/neural_chat/server/server_commands.py b/intel_extension_for_transformers/neural_chat/server/server_commands.py index 75bc51465b0..0ea2da6c59c 100644 --- a/intel_extension_for_transformers/neural_chat/server/server_commands.py +++ b/intel_extension_for_transformers/neural_chat/server/server_commands.py @@ -91,7 +91,7 @@ def get_client_command(name: str): def neuralchat_server_execute(): commands = neuralchat_server_commands - + idx = 0 for _argv in (['neuralchat_server'] + sys.argv[1:]): if _argv not in commands: diff --git a/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/client.py b/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/client.py index b3c8d3268d4..d2f88bc469a 100644 --- a/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/client.py +++ b/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/client.py @@ -84,4 +84,4 @@ output_data0 = results.as_numpy('OUTPUT0') print("input:",input_data0) - print("output:",output_data0) \ No newline at end of file + print("output:",output_data0) diff --git a/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/model.py b/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/model.py index c142e044359..50dffb2d4f4 100644 --- a/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/model.py +++ b/intel_extension_for_transformers/neural_chat/serving/triton/text_generation/model.py @@ -126,4 +126,4 @@ def finalize(self): Implementing `finalize` function is OPTIONAL. This function allows the model to perform any necessary clean ups before exit. """ - print("Cleaning up...") \ No newline at end of file + print("Cleaning up...") diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_normal.py b/intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_normal.py index edf6de08b49..a541c52f8cd 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_normal.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/api/test_chatbot_normal.py @@ -27,7 +27,7 @@ gaudi2_content = """ Habana Gaudi2 and 4th Gen Intel Xeon Scalable processors deliver leading performance and optimal cost savings for AI training. Today, MLCommons published results of its industry AI performance benchmark, MLPerf Training 3.0, in which both the Habana® Gaudi®2 deep learning accelerator and the 4th Gen Intel® Xeon® Scalable processor delivered impressive training results. -The latest MLPerf Training 3.0 results underscore the performance of Intel's products on an array of deep learning models. The maturity of Gaudi2-based software and systems for training was demonstrated at scale on the large language model, GPT-3. Gaudi2 is one of only two semiconductor solutions to submit performance results to the benchmark for LLM training of GPT-3. +The latest MLPerf Training 3.0 results underscore the performance of Intel's products on an array of deep learning models. The maturity of Gaudi2-based software and systems for training was demonstrated at scale on the large language model, GPT-3. Gaudi2 is one of only two semiconductor solutions to submit performance results to the benchmark for LLM training of GPT-3. Gaudi2 also provides substantially competitive cost advantages to customers, both in server and system costs. The accelerator’s MLPerf-validated performance on GPT-3, computer vision and natural language models, plus upcoming software advances make Gaudi2 an extremely compelling price/performance alternative to Nvidia's H100. On the CPU front, the deep learning training performance of 4th Gen Xeon processors with Intel AI engines demonstrated that customers can build with Xeon-based servers a single universal AI system for data pre-processing, model training and deployment to deliver the right combination of AI performance, efficiency, accuracy and scalability. Gaudi2 delivered impressive time-to-train on GPT-31: 311 minutes on 384 accelerators. @@ -92,7 +92,7 @@ def test_valid_cpu_device(self): def test_enable_plugin_tts(self): # Test enabling Text-to-Speech plugin config = PipelineConfig(model_name_or_path="facebook/opt-125m") - config.plugins = {"tts": {"enable": True, "args": + config.plugins = {"tts": {"enable": True, "args": {"device": "cpu", "voice": "default", "stream_mode": "true", "output_audio_path": "./output_audio"}}} result = build_chatbot(config) self.assertIsNotNone(result) @@ -109,7 +109,7 @@ def test_enable_plugin_tts_chinese(self): def test_enable_plugin_asr(self): # Test enabling Audio Speech Recognition plugin config = PipelineConfig(model_name_or_path="facebook/opt-125m") - config.plugins = {"asr": {"enable": True, "args": + config.plugins = {"asr": {"enable": True, "args": {"device": "cpu", "model_name_or_path": "openai/whisper-small"}}} result = build_chatbot(config) self.assertIsNotNone(result) @@ -118,7 +118,7 @@ def test_enable_plugin_asr(self): def test_enable_plugin_retrieval(self): # Test enabling Retrieval plugin config = PipelineConfig(model_name_or_path="facebook/opt-125m") - config.plugins = {"retrieval": {"enable": True, "args": + config.plugins = {"retrieval": {"enable": True, "args": {"input_path": "./gaudi2.txt", "persist_directory": "./output"}}} result = build_chatbot(config) self.assertIsNotNone(result) diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/api/test_inference.py b/intel_extension_for_transformers/neural_chat/tests/ci/api/test_inference.py index 8023b9904f7..ebdd0536fb1 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/api/test_inference.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/api/test_inference.py @@ -79,7 +79,7 @@ def test_retrieval_with_qdrant(self): print(response) self.assertIsNotNone(response) plugins.retrieval.enable = False - + def test_retrieval_append(self): plugins.retrieval.enable = True plugins.retrieval.args["append"] = True @@ -91,7 +91,7 @@ def test_retrieval_append(self): response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.") print(response) self.assertIsNotNone(response) - + plugins.retrieval.args["append"] = False config = PipelineConfig(model_name_or_path="facebook/opt-125m", plugins=plugins) @@ -115,7 +115,7 @@ def test_retrieval_append_with_qdrant(self): response = chatbot.predict("Tell me about Intel Xeon Scalable Processors.") print(response) self.assertIsNotNone(response) - + plugins.retrieval.args["append"] = False config = PipelineConfig(model_name_or_path="facebook/opt-125m", plugins=plugins) diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/api/test_rag.py b/intel_extension_for_transformers/neural_chat/tests/ci/api/test_rag.py index 700cd3f6b5e..6367efaae32 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/api/test_rag.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/api/test_rag.py @@ -58,7 +58,7 @@ def tearDown(self) -> None: if os.path.exists("test_txt"): shutil.rmtree("test_txt", ignore_errors=True) return super().tearDown() - + def test_retrieval_txt(self): plugins.retrieval.enable = True plugins.retrieval.args["input_path"] = "../assets/docs/sample.txt" @@ -83,7 +83,7 @@ def tearDown(self) -> None: if os.path.exists("test_docx"): shutil.rmtree("test_docx", ignore_errors=True) return super().tearDown() - + def test_retrieval_docx(self): plugins.retrieval.enable = True plugins.retrieval.args["input_path"] = "../assets/docs/sample.docx" @@ -108,7 +108,7 @@ def tearDown(self) -> None: if os.path.exists("test_xlsx"): shutil.rmtree("test_xlsx", ignore_errors=True) return super().tearDown() - + def test_retrieval_xlsx(self): plugins.retrieval.enable = True plugins.retrieval.args["input_path"] = "../assets/docs/sample.xlsx" @@ -133,7 +133,7 @@ def tearDown(self) -> None: if os.path.exists("test_xlsx_1"): shutil.rmtree("test_xlsx_1", ignore_errors=True) return super().tearDown() - + def test_retrieval_xlsx_1(self): plugins.retrieval.enable = True plugins.retrieval.args["input_path"] = "../assets/docs/sample_1.xlsx" @@ -158,7 +158,7 @@ def tearDown(self) -> None: if os.path.exists("test_xlsx_2"): shutil.rmtree("test_xlsx_2", ignore_errors=True) return super().tearDown() - + def test_retrieval_xlsx_2(self): plugins.retrieval.enable = True plugins.retrieval.args["input_path"] = "../assets/docs/sample_2.xlsx" diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/cli/test_cli.py b/intel_extension_for_transformers/neural_chat/tests/ci/cli/test_cli.py index 3d9c2fd1f8e..ef1c7c2f0be 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/cli/test_cli.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/cli/test_cli.py @@ -139,4 +139,3 @@ def test_neuralchat_execute(self): if __name__ == '__main__': unittest.main() - diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_cut_video.py b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_cut_video.py index 3c73ef750a1..3ee76c912e8 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_cut_video.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_cut_video.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - + from intel_extension_for_transformers.neural_chat.pipeline.plugins.audio.utils.cut_video import cut_video import os import argparse diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_reduce_noise.py b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_reduce_noise.py index f7409d24ce8..aefd1ca9d28 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_reduce_noise.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_reduce_noise.py @@ -30,7 +30,7 @@ def setUpClass(self): self.y, sr = librosa.load(self.audio_path, 16000) self.reducer = NoiseReducer(sr=sr) self.reducer_nonstationary = NoiseReducer(sr=sr, nonstationary=True) - + @classmethod def tearDownClass(self): diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_split_audio.py b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_split_audio.py index 8a235cde9b7..80b6eba2e4b 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_split_audio.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/audio/test_split_audio.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - + from intel_extension_for_transformers.neural_chat.pipeline.plugins.audio.utils.split_audio import main import os import argparse diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/cache/test_cache.py b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/cache/test_cache.py index b0d855ac948..0ee08a66c3e 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/cache/test_cache.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/cache/test_cache.py @@ -34,7 +34,7 @@ def tearDown(self) -> None: else: print(f"The directory gptcache_data does not exist.") return super().tearDown() - + def test_chat_cache(self): cache_plugin = ChatCache(embedding_model_dir="hkunlp/instructor-large") cache_plugin.init_similar_cache_from_config() diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/memory/test_memory.py b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/memory/test_memory.py index f24ce7d38fb..008c99b9910 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/memory/test_memory.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/memory/test_memory.py @@ -24,7 +24,7 @@ def setUp(self): def tearDown(self) -> None: return super().tearDown() - + def test_memory(self): query ='hello' answer = "Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?" diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/retrieval/test_retrieval.py b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/retrieval/test_retrieval.py index 16132c7b560..4b2e6059038 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/plugins/retrieval/test_retrieval.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/plugins/retrieval/test_retrieval.py @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser +from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser import unittest diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_askdoc_server.py b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_askdoc_server.py index 841c04ca3ff..ef8c7fe442d 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_askdoc_server.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_askdoc_server.py @@ -47,7 +47,7 @@ gaudi2_content = """ Habana Gaudi2 and 4th Gen Intel Xeon Scalable processors deliver leading performance and optimal cost savings for AI training. Today, MLCommons published results of its industry AI performance benchmark, MLPerf Training 3.0, in which both the Habana® Gaudi®2 deep learning accelerator and the 4th Gen Intel® Xeon® Scalable processor delivered impressive training results. -The latest MLPerf Training 3.0 results underscore the performance of Intel's products on an array of deep learning models. The maturity of Gaudi2-based software and systems for training was demonstrated at scale on the large language model, GPT-3. Gaudi2 is one of only two semiconductor solutions to submit performance results to the benchmark for LLM training of GPT-3. +The latest MLPerf Training 3.0 results underscore the performance of Intel's products on an array of deep learning models. The maturity of Gaudi2-based software and systems for training was demonstrated at scale on the large language model, GPT-3. Gaudi2 is one of only two semiconductor solutions to submit performance results to the benchmark for LLM training of GPT-3. Gaudi2 also provides substantially competitive cost advantages to customers, both in server and system costs. The accelerator’s MLPerf-validated performance on GPT-3, computer vision and natural language models, plus upcoming software advances make Gaudi2 an extremely compelling price/performance alternative to Nvidia's H100. On the CPU front, the deep learning training performance of 4th Gen Xeon processors with Intel AI engines demonstrated that customers can build with Xeon-based servers a single universal AI system for data pre-processing, model training and deployment to deliver the right combination of AI performance, efficiency, accuracy and scalability. diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_multi_cpu_server.py b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_multi_cpu_server.py index 17db6def956..552da06fad5 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_multi_cpu_server.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_multi_cpu_server.py @@ -25,7 +25,7 @@ @patch('intel_extension_for_transformers.neural_chat.server.multi_cpu_server.build_chatbot') class TestMultiCPUServer(unittest.TestCase): - + def test_parse_args(self, mock_build_chatbot): args = parse_args() print(args) diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_client.py b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_client.py index c5a7e2914c8..dce6ee841ff 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_client.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_client.py @@ -169,4 +169,3 @@ def test_execute_failed(self): if __name__ == '__main__': unittest.main() - diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_server.py b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_server.py index 782dd075982..1e79932d9aa 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_server.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_neuralchat_server.py @@ -132,4 +132,3 @@ def test_execute_exception_call(self, mock_call): if __name__ == '__main__': unittest.main() - diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_api.py b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_api.py index b2d69e0bb2d..cff16d839f0 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_api.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_api.py @@ -71,7 +71,7 @@ def mock_process(user_id, image_obj_list): mock_db2.return_value.transaction.return_value = MagicMock() mock_db2.return_value.insert.return_value = True mock_db2.return_value.fetch_one.return_value = MOCK_IMAGE_INFO - + data = { "image_list": [MOCK_IMAGE_SRC] } @@ -87,7 +87,7 @@ def mock_process(user_id, image_obj_list): def test_get_all_images(self, mock_func, mock_db): mock_func.return_value.fetch_one.return_value = MOCK_USER_INFO mock_db.return_value.fetch_all.return_value = [MOCK_IMAGE_INFO] - + response = client.post("/v1/aiphotos/getAllImages") self.assertEqual(response.status_code, 200) self.assertIn('image1.jpg', response.json()[0]['image_path']) @@ -167,10 +167,10 @@ def test_update_label(self, mock_func, mock_db): mock_func.return_value = True mock_db.return_value.transaction.return_value = MagicMock() mock_db.return_value.update.return_value = True - + data = { - "label_list": [{ - "label": "time", + "label_list": [{ + "label": "time", "from": "2023-10-08", "to": "2023-10-01" }] @@ -223,7 +223,7 @@ def test_chat_with_image(self, mock_func1, mock_func2, mock_func3, mock_func4, m self.assertEqual(response.status_code, 200) self.assertIn('image1.jpg', response.json()[0]['image_path']) - + @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_api.stable_defusion_func') @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_api.image_to_byte64') @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_api.get_image_root_path') diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_services.py b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_services.py index 12f62b34ec4..9c77ea854c2 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_services.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_photoai_services.py @@ -23,7 +23,7 @@ from unittest.mock import patch, MagicMock from intel_extension_for_transformers.neural_chat.server.restful.photoai_services import ( check_user_ip, check_image_status, update_image_tags, update_image_attr, format_image_info, - delete_single_image, process_images_in_background, process_single_image, + delete_single_image, process_images_in_background, process_single_image, process_face_for_single_image, get_type_obj_from_attr, get_address_list, get_process_status, get_images_by_type, get_face_list_by_user_id, get_image_list_by_ner_query, delete_user_infos @@ -32,7 +32,7 @@ MOCK_USER_INFO = {'user_id': '1', 'login_time': None, 'leave_time': None, 'is_active': 1} MOCK_IMAGE_INFO = { - 'image_id': 1, + 'image_id': 1, 'user_id': '1', 'image_path': 'image1.jpg', 'captured_time': datetime.datetime.strptime('2022-02-22', '%Y-%m-%d'), @@ -113,7 +113,7 @@ def test_update_image_attr(self, mock_func, mock_db): sql='UPDATE image_info SET captured_time="2022-02-22 00:00:00" WHERE image_id=1', params=None ) - + def test_format_image_info(self, mock_db): image = MOCK_IMAGE_INFO @@ -135,7 +135,7 @@ def test_delete_single_image(self, mock_db): sql="UPDATE image_info SET exist_status='deleted' WHERE image_id=1 ;", params=None ) - + @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_services.process_single_image') def test_process_images_in_background(self, mock_func, mock_db): @@ -154,7 +154,7 @@ def test_process_images_in_background(self, mock_func, mock_db): process_images_in_background(user_id='1', image_obj_list=image_obj_list) except Exception as e: raise Exception(e) - + @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_services.process_face_for_single_image') @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_services.generate_caption') @@ -163,7 +163,7 @@ def test_process_images_in_background(self, mock_func, mock_db): @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_services.find_GPS_image') def test_process_single_image(self, mock_func1, mock_func2, mock_func3, mock_func4, mock_func5, mock_db): mock_func1.return_value = { - 'date_information': None, + 'date_information': None, 'GPS_information': { 'GPSLatitude': '', 'GPSLongitude': '', @@ -190,7 +190,7 @@ def test_process_single_image(self, mock_func1, mock_func2, mock_func3, mock_fun sql="UPDATE image_info SET process_status='ready' WHERE image_id=1", params=None ) - + @patch('intel_extension_for_transformers.neural_chat.server.restful.photoai_services.transfer_xywh') @patch('deepface.DeepFace.verify') @@ -234,7 +234,7 @@ def test_get_address_list(self, mock_db): res = get_address_list(user_id='1') self.assertIn('Shanghai', res) mock_db.return_value.fetch_all.assert_called_once_with( - sql='''SELECT address FROM image_info WHERE \n user_id="1" AND exist_status="active" GROUP BY address;''') + sql='''SELECT address FROM image_info WHERE\n user_id="1" AND exist_status="active" GROUP BY address;''') def test_get_process_status(self, mock_db): @@ -271,9 +271,9 @@ def test_get_image_list_by_ner_query(self, mock_func1, mock_db): }] res = get_image_list_by_ner_query( ner_result={ - 'time': ['2022-02-22'], - 'period': [{'from': '2022-02-02', 'to': '2023-02-02'}]}, - user_id='1', + 'time': ['2022-02-22'], + 'period': [{'from': '2022-02-02', 'to': '2023-02-02'}]}, + user_id='1', query='photos taken in shanghai') self.assertIn('https://test_server_ip/ai_photos/user1/image.jpg', res[0]['imgSrc']) @@ -281,7 +281,7 @@ def test_get_image_list_by_ner_query(self, mock_func1, mock_db): def test_delete_user_infos(self, mock_db): mock_db.return_value.transaction.return_value = MagicMock() mock_db.return_value.delete.return_value = True - try: + try: os.environ['IMAGE_ROOT_PATH'] = './mocked_root_path' delete_user_infos(user_id='1') except Exception as e: diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_server_commands.py b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_server_commands.py index 29c4a89f8b2..cc44da83fde 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/server/test_server_commands.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/server/test_server_commands.py @@ -18,7 +18,7 @@ import unittest from intel_extension_for_transformers.neural_chat.server.server_commands import ( - get_server_command, NeuralChatServerHelpCommand, + get_server_command, NeuralChatServerHelpCommand, get_client_command, NeuralChatClientHelpCommand, neuralchat_server_execute, neuralchat_client_execute, NeuralChatClientBaseCommand @@ -30,7 +30,7 @@ class TestServerCommand(unittest.TestCase): def test_get_server_command(self): res = get_server_command('neuralchat_server.help') self.assertIs(res, NeuralChatServerHelpCommand) - + def test_get_client_command(self): res = get_client_command('neuralchat_client.help') self.assertIs(res, NeuralChatClientHelpCommand) diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/utils/test_dotdict.py b/intel_extension_for_transformers/neural_chat/tests/ci/utils/test_dotdict.py index 22de1169ca4..47b13bd35c4 100644 --- a/intel_extension_for_transformers/neural_chat/tests/ci/utils/test_dotdict.py +++ b/intel_extension_for_transformers/neural_chat/tests/ci/utils/test_dotdict.py @@ -41,21 +41,21 @@ def test_DotDict(self): test_dotdict2 = DotDict( {'person':{'name':{'first':'John'}}}) test_dotdict2.__setitem__(key='person2', value=[{'sex': 'male'}]) self.assertEqual( - test_dotdict2, + test_dotdict2, {'person': {'name': {'first': 'John'}}, 'person2': {'sex': 'male'}}) test_dotdict2.__setitem__(key='person3', value=[{'sex': 'male'},{'age': 20}]) self.assertEqual( - test_dotdict2, + test_dotdict2, { - 'person': {'name': {'first': 'John'}}, - 'person2': {'sex': 'male'}, + 'person': {'name': {'first': 'John'}}, + 'person2': {'sex': 'male'}, 'person3': {'sex': 'male', 'age': 20}}) - + test_dotdict2.__setstate__({'test_state': 'active'}) res = test_dotdict2.__getstate__() self.assertEqual(res, {'test_state': 'active'}) - + if __name__ == "__main__": diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_chatglm.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_chatglm.py index 3dcd9881e08..4cb7f101c89 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_chatglm.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_chatglm.py @@ -44,4 +44,4 @@ def test_get_default_conv_template(self): self.assertIn('上海', str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_codellama.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_codellama.py index 1355f60fcdb..85e436dc610 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_codellama.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_codellama.py @@ -35,4 +35,4 @@ def test_code_gen(self): self.assertIn("Hello World", str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_llama.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_llama.py index 2c9a76fa2ce..5f8e375a5e7 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_llama.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_llama.py @@ -42,4 +42,4 @@ def test_get_default_conv_template(self): self.assertIn('Intel Xeon Scalable Processors', str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mistral.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mistral.py index f096c2ec935..7f1108ee754 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mistral.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mistral.py @@ -44,4 +44,4 @@ def test_get_default_conv_template(self): self.assertIn('Intel Xeon Scalable processors', str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_model_utils.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_model_utils.py index 37a819ff972..b5f8ce9a8d1 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_model_utils.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_model_utils.py @@ -50,4 +50,4 @@ def test_load_model_on_hpu_with_deepspeed(self): self.assertTrue(MODELS[self.model_path]["model"] is not None) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt.py index ca3539bfaf6..52789ae46e6 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt.py @@ -41,4 +41,4 @@ def test_get_default_conv_template(self): self.assertIn('Intel Xeon Scalable processors', str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt_trace.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt_trace.py index fff0fa6168e..dd3e2f0141f 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt_trace.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_mpt_trace.py @@ -24,7 +24,7 @@ def setUp(self): def tearDown(self) -> None: return super().tearDown() - + def test_mpt_trace(self): config = PipelineConfig(model_name_or_path='/tf_dataset2/models/nlp_toolkit/mpt-7b') chatbot = build_chatbot(config) diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_neuralchat.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_neuralchat.py index f694c4c8ada..a437a671ac8 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_neuralchat.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_neuralchat.py @@ -79,4 +79,4 @@ def test_get_default_conv_template_v3_1(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_phi2.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_phi2.py index 587caef9544..c4a9ccb168d 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_phi2.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_phi2.py @@ -34,4 +34,4 @@ def test_code_gen(self): self.assertIn("99 plus 22 equals 121", str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_qwen.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_qwen.py index feb868b3132..cc80743a515 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_qwen.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_qwen.py @@ -47,4 +47,4 @@ def test_get_default_conv_template(self): self.assertIn('上海', str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_starcoder.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_starcoder.py index fc1a1e69c82..17785d7dd3b 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_starcoder.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_starcoder.py @@ -34,4 +34,4 @@ def test_code_gen(self): self.assertIn("Hello, world", str(result)) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/plugins/test_ner.py b/intel_extension_for_transformers/neural_chat/tests/nightly/plugins/test_ner.py index 563ca049e0e..73e94e9dbab 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/plugins/test_ner.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/plugins/test_ner.py @@ -33,14 +33,14 @@ def test_ner(self): query = "Show me photos taken in Shanghai." result = ner_obj.ner_inference(query) _result = { - 'period': [], - 'time': [], - 'location': ['Shanghai'], - 'name': [], + 'period': [], + 'time': [], + 'location': ['Shanghai'], + 'name': [], 'organization': [] } self.assertEqual(result, _result) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/serving/test_tgi_serving.py b/intel_extension_for_transformers/neural_chat/tests/nightly/serving/test_tgi_serving.py index 8b3c506e592..3812b770c09 100644 --- a/intel_extension_for_transformers/neural_chat/tests/nightly/serving/test_tgi_serving.py +++ b/intel_extension_for_transformers/neural_chat/tests/nightly/serving/test_tgi_serving.py @@ -51,10 +51,10 @@ def test_tgi_root(self, mock_text_generation): best_of=1, do_sample=True, max_new_tokens=10, - repetition_penalty=1.03, - temperature=0.5, - top_k=10, - top_p=0.95, + repetition_penalty=1.03, + temperature=0.5, + top_k=10, + top_p=0.95, typical_p=0.95, stream=False ) @@ -74,10 +74,10 @@ def test_tgi_generate(self, mock_text_generation): best_of=1, do_sample=True, max_new_tokens=10, - repetition_penalty=1.03, - temperature=0.5, - top_k=10, - top_p=0.95, + repetition_penalty=1.03, + temperature=0.5, + top_k=10, + top_p=0.95, typical_p=0.95, stream=False ) @@ -102,4 +102,4 @@ def mock_generator(): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt index 3c9583d56c4..5ac25cea1d6 100644 --- a/intel_extension_for_transformers/neural_chat/tests/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/tests/requirements.txt @@ -1,77 +1,77 @@ -transformers==4.36.2 -peft==0.6.2 +av +basicsr==1.4.2 +beautifulsoup4 +chromadb==0.4.15 +deepface +diffusers==0.12.1 +dlib-bin +einops +evaluate +exifread +face_alignment==1.3.5 +facexlib @ git+https://github.com/Spycsh/facexlib@master +fastapi==0.103.2 +ffmpeg-python==0.2.0 fschat==0.2.32 -torch==2.1.0 -torchaudio==2.1.0 -torchvision==0.16.0 +gfpgan +git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 +git+https://github.com/UKPLab/sentence-transformers.git@5c838a705c24c2dfd151a71674c99d09d014c1a9 +gptcache +huggingface_hub +imageio==2.19.3 +imageio-ffmpeg==0.4.7 +InstructorEmbedding intel_extension_for_pytorch==2.1.0 +joblib==1.2.0 +kornia==0.6.8 +langchain==0.0.354 +langchain_core +langid +librosa +markdown +neural-compressor +neural_speed num2words -speechbrain -onnxruntime>=1.16.3 +numba +numpy==1.23.5 onnx>=1.15.0 +onnxruntime>=1.16.3 +openpyxl +optimum +optimum-intel==1.11.0 +optimum[habana] paddlepaddle paddlespeech -shortuuid -gptcache -evaluate +peft==0.6.2 +protobuf==3.20.2 +pydantic==1.10.13 pydub -python-multipart +pymysql PyPDF2 -langchain==0.0.354 -langchain_core python-docx +python-multipart +pyyaml +qdrant-client +resampy==0.3.1 +rouge_score +safetensors +scikit-image==0.19.3 scikit-learn -librosa -beautifulsoup4 -InstructorEmbedding -chromadb==0.4.15 -fastapi==0.103.2 -pydantic==1.10.13 +scipy==1.10.1 +shortuuid +spacy +speechbrain starlette -yacs -uvicorn -optimum -optimum[habana] -git+https://github.com/UKPLab/sentence-transformers.git@5c838a705c24c2dfd151a71674c99d09d014c1a9 -unstructured -markdown -rouge_score -openpyxl -numpy==1.23.5 tiktoken==0.4.0 -git+https://github.com/EleutherAI/lm-evaluation-harness.git@cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 -spacy -neural-compressor -pymysql -deepface -exifread -face_alignment==1.3.5 -imageio==2.19.3 -imageio-ffmpeg==0.4.7 -numba -resampy==0.3.1 -scipy==1.10.1 -kornia==0.6.8 +torch==2.1.0 +torchaudio==2.1.0 +torchvision==0.16.0 tqdm -pyyaml -joblib==1.2.0 -scikit-image==0.19.3 -basicsr==1.4.2 -facexlib @ git+https://github.com/Spycsh/facexlib@master -gfpgan -dlib-bin -av -safetensors -ffmpeg-python==0.2.0 -protobuf==3.20.2 -einops -urllib3 -langid -optimum-intel==1.11.0 -zhconv -diffusers==0.12.1 +transformers==4.36.2 transformers_stream_generator -qdrant-client -huggingface_hub +unstructured +urllib3 +uvicorn vllm -neural_speed +yacs +zhconv diff --git a/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/README.md b/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/README.md index d629add4261..2853a85bd13 100644 --- a/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/README.md +++ b/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/README.md @@ -59,4 +59,4 @@ This project is to provide instructions and guidance on how to use OpenAI. Howev 1. OpenAI Policies: OpenAI may have its own policies and regulations, such as API usage limits, pricing plans, service agreements, etc. Please make sure you are aware of and comply with OpenAI's relevant policies to avoid any violations. -2. If you have any questions or issues related to OpenAI while using this service, we take no responsibility for them.. \ No newline at end of file +2. If you have any questions or issues related to OpenAI while using this service, we take no responsibility for them.. diff --git a/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/tsconfig.json b/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/tsconfig.json index 6ae0c8c44d0..0f47472f79f 100644 --- a/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/tsconfig.json +++ b/intel_extension_for_transformers/neural_chat/ui/customized/talking_photo/tsconfig.json @@ -10,8 +10,4 @@ "sourceMap": true, "strict": true } - // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias - // - // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes - // from the referenced tsconfig.json - TypeScript does not merge them in } diff --git a/intel_extension_for_transformers/neural_chat/ui/customized/talkingbot/tsconfig.json b/intel_extension_for_transformers/neural_chat/ui/customized/talkingbot/tsconfig.json index 6ae0c8c44d0..0f47472f79f 100644 --- a/intel_extension_for_transformers/neural_chat/ui/customized/talkingbot/tsconfig.json +++ b/intel_extension_for_transformers/neural_chat/ui/customized/talkingbot/tsconfig.json @@ -10,8 +10,4 @@ "sourceMap": true, "strict": true } - // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias - // - // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes - // from the referenced tsconfig.json - TypeScript does not merge them in } diff --git a/intel_extension_for_transformers/neural_chat/ui/gradio/basic/README.md b/intel_extension_for_transformers/neural_chat/ui/gradio/basic/README.md index 5d66885d2f0..0bf55560a0a 100644 --- a/intel_extension_for_transformers/neural_chat/ui/gradio/basic/README.md +++ b/intel_extension_for_transformers/neural_chat/ui/gradio/basic/README.md @@ -72,4 +72,4 @@ You also have the option to update the backend service URL in the `app.py` file. ![Update backend URL](https://i.imgur.com/gRtZHrJ.png) ->**Note**: Please use Gradio version 3.36.0. \ No newline at end of file +>**Note**: Please use Gradio version 3.36.0. diff --git a/intel_extension_for_transformers/neural_chat/ui/gradio/basic/requirements.txt b/intel_extension_for_transformers/neural_chat/ui/gradio/basic/requirements.txt index cf638b309b3..60eb886f47f 100644 --- a/intel_extension_for_transformers/neural_chat/ui/gradio/basic/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/ui/gradio/basic/requirements.txt @@ -1,10 +1,10 @@ -pip -torch==2.1.0 diffusers==0.8.1 -transformers -requests +fschat +gradio huggingface_hub markdown2 nh3 -gradio -fschat +pip +requests +torch==2.1.0 +transformers diff --git a/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/README.md b/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/README.md index 1e44a8e30e6..3bffa57c3a8 100644 --- a/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/README.md +++ b/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/README.md @@ -75,4 +75,4 @@ You also have the option to update the backend service URL in the `app.py` file. ![Update backend URL](https://i.imgur.com/j7TTYaW.png) ->**Note**: Please use Gradio version 3.34.0. \ No newline at end of file +>**Note**: Please use Gradio version 3.34.0. diff --git a/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/app.py b/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/app.py index a092101161a..298dae8f565 100644 --- a/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/app.py +++ b/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/app.py @@ -506,10 +506,10 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request: } .user, .bot { width: 80% !important; - + } .bot { - white-space: pre-wrap !important; + white-space: pre-wrap !important; line-height: 1.3 !important; display: flex; flex-direction: column; @@ -522,7 +522,7 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request: #btn-list-style { background: #eee0; border: 1px solid #0053f4; -} +} .title { font-size: 1.5rem; font-weight: 700; @@ -654,7 +654,7 @@ def build_single_model_ui(models): # Register listeners btn_list = [regenerate_btn, clear_btn] - + regenerate_btn.click(regenerate, state1, [state1, chatbot1, textbox] + btn_list).then( http_bot, [state1, model_selector, temperature, max_output_tokens, topk, choice_chatbot1], diff --git a/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/requirements.txt b/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/requirements.txt index 43673c0c18f..2896e887da8 100644 --- a/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/requirements.txt +++ b/intel_extension_for_transformers/neural_chat/ui/gradio/side_by_side/requirements.txt @@ -1,10 +1,10 @@ -pip -torch diffusers==0.8.1 -transformers -requests +fschat +gradio huggingface_hub markdown2 nh3 -gradio -fschat +pip +requests +torch +transformers diff --git a/intel_extension_for_transformers/neural_chat/utils/__init__.py b/intel_extension_for_transformers/neural_chat/utils/__init__.py index 407b29e53be..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/utils/__init__.py +++ b/intel_extension_for_transformers/neural_chat/utils/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/intel_extension_for_transformers/neural_chat/utils/common.py b/intel_extension_for_transformers/neural_chat/utils/common.py index 462b89b29fb..eb8d86a1f7f 100644 --- a/intel_extension_for_transformers/neural_chat/utils/common.py +++ b/intel_extension_for_transformers/neural_chat/utils/common.py @@ -47,4 +47,4 @@ def is_audio_file(filename): if file_extension in audio_extensions: return True else: - return False \ No newline at end of file + return False diff --git a/intel_extension_for_transformers/neural_chat/utils/database/__init__.py b/intel_extension_for_transformers/neural_chat/utils/database/__init__.py index 2823243c0bb..18896e7b549 100644 --- a/intel_extension_for_transformers/neural_chat/utils/database/__init__.py +++ b/intel_extension_for_transformers/neural_chat/utils/database/__init__.py @@ -13,4 +13,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/intel_extension_for_transformers/neural_chat/utils/database/mysqldb.py b/intel_extension_for_transformers/neural_chat/utils/database/mysqldb.py index 181fa78ebd8..dc417f5b61d 100644 --- a/intel_extension_for_transformers/neural_chat/utils/database/mysqldb.py +++ b/intel_extension_for_transformers/neural_chat/utils/database/mysqldb.py @@ -84,4 +84,3 @@ def delete(self, sql, params): def _edit(self, sql, params): return self._cursor.execute(sql, params) - diff --git a/intel_extension_for_transformers/neural_chat/utils/dotdict.py b/intel_extension_for_transformers/neural_chat/utils/dotdict.py index 0092ca02538..2a2daa172a9 100644 --- a/intel_extension_for_transformers/neural_chat/utils/dotdict.py +++ b/intel_extension_for_transformers/neural_chat/utils/dotdict.py @@ -86,5 +86,3 @@ def __setstate__(self, d): self.__dict__.update(d) __setattr__, __getattr__ = __setitem__, __getitem__ - - diff --git a/intel_extension_for_transformers/neural_chat/utils/record_request.py b/intel_extension_for_transformers/neural_chat/utils/record_request.py index b5636dcc74d..4dd19db8203 100644 --- a/intel_extension_for_transformers/neural_chat/utils/record_request.py +++ b/intel_extension_for_transformers/neural_chat/utils/record_request.py @@ -44,4 +44,3 @@ def record_request(request_url: str, request_body: Dict, user_id: str): except Exception as e: raise Exception(f"[record request] Exception occurred: {e}") mysqldb._close() - diff --git a/intel_extension_for_transformers/setfit/__init__.py b/intel_extension_for_transformers/setfit/__init__.py index 5f72f138d9e..8427ebfbbb8 100644 --- a/intel_extension_for_transformers/setfit/__init__.py +++ b/intel_extension_for_transformers/setfit/__init__.py @@ -23,4 +23,4 @@ setfit = LazyImport("setfit") add_templated_examples = setfit.add_templated_examples get_templated_dataset = setfit.get_templated_dataset -sample_dataset = setfit.sample_dataset \ No newline at end of file +sample_dataset = setfit.sample_dataset diff --git a/intel_extension_for_transformers/setfit/modeling.py b/intel_extension_for_transformers/setfit/modeling.py index 152178c3289..09f75555216 100644 --- a/intel_extension_for_transformers/setfit/modeling.py +++ b/intel_extension_for_transformers/setfit/modeling.py @@ -22,4 +22,4 @@ class SetFitModel(setfit.SetFitModel): def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) \ No newline at end of file + super().__init__(*args, **kwargs) diff --git a/intel_extension_for_transformers/setfit/trainer.py b/intel_extension_for_transformers/setfit/trainer.py index 0a5b0c64034..29b67c572c5 100644 --- a/intel_extension_for_transformers/setfit/trainer.py +++ b/intel_extension_for_transformers/setfit/trainer.py @@ -21,4 +21,4 @@ class SetFitTrainer(setfit.SetFitTrainer): def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) \ No newline at end of file + super().__init__(*args, **kwargs) diff --git a/intel_extension_for_transformers/setfit/trainer_distillation.py b/intel_extension_for_transformers/setfit/trainer_distillation.py index d07a46144dc..cdb06ab536b 100644 --- a/intel_extension_for_transformers/setfit/trainer_distillation.py +++ b/intel_extension_for_transformers/setfit/trainer_distillation.py @@ -21,4 +21,4 @@ class DistillationSetFitTrainer(setfit.DistillationSetFitTrainer): def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) \ No newline at end of file + super().__init__(*args, **kwargs) diff --git a/intel_extension_for_transformers/setfit/utils.py b/intel_extension_for_transformers/setfit/utils.py index 41bc6937b3b..0eb90350230 100644 --- a/intel_extension_for_transformers/setfit/utils.py +++ b/intel_extension_for_transformers/setfit/utils.py @@ -18,4 +18,4 @@ from intel_extension_for_transformers.transformers.utils.utility import LazyImport setfit = LazyImport("setfit") -LOSS_NAME_TO_CLASS = setfit.utils.LOSS_NAME_TO_CLASS \ No newline at end of file +LOSS_NAME_TO_CLASS = setfit.utils.LOSS_NAME_TO_CLASS diff --git a/intel_extension_for_transformers/transformers/config.py b/intel_extension_for_transformers/transformers/config.py index 87d1351177d..c0713c2a02e 100644 --- a/intel_extension_for_transformers/transformers/config.py +++ b/intel_extension_for_transformers/transformers/config.py @@ -1209,7 +1209,7 @@ def kwargs(self): def kwargs(self, kwargs): """Set kwargs.""" self._kwargs = kwargs - + @constructor_register class PrunerV2: """ diff --git a/intel_extension_for_transformers/transformers/distillation.py b/intel_extension_for_transformers/transformers/distillation.py index 0f2fcd449fa..9f801b9c112 100644 --- a/intel_extension_for_transformers/transformers/distillation.py +++ b/intel_extension_for_transformers/transformers/distillation.py @@ -43,7 +43,7 @@ class DistillationCriterionMode(Enum): """Criterion mode class for distillation.""" KNOWLEDGELOSS = "KnowledgeDistillationLoss" INTERMEDIATELAYERSLOSS = "IntermediateLayersKnowledgeDistillationLoss" - + SUPPORTED_DISTILLATION_CRITERION_MODE = \ diff --git a/intel_extension_for_transformers/transformers/dpo_trainer.py b/intel_extension_for_transformers/transformers/dpo_trainer.py index bf0b394fdad..46ccc127485 100644 --- a/intel_extension_for_transformers/transformers/dpo_trainer.py +++ b/intel_extension_for_transformers/transformers/dpo_trainer.py @@ -49,9 +49,9 @@ class DPOTrainer(Trainer): model (`transformers.PreTrainedModel`): The model to train, preferably an `AutoModelForSequenceClassification`. ref_model (`PreTrainedModelWrapper`): - Hugging Face transformer model with a casual language modelling head. + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation and loss. If no - reference model is provided, the trainer will + reference model is provided, the trainer will create a reference model with the same architecture as the model to be optimized. beta (`float`, defaults to 0.1): The beta factor in DPO loss. Higher beta means less divergence from the initial policy. @@ -368,4 +368,3 @@ def __init__( from habana_frameworks.torch.hpu import wrap_in_hpu_graph # pylint: disable=E0611, E0401 ref_model = self.accelerator.unwrap_model(self.ref_model) ref_model = wrap_in_hpu_graph(ref_model) - diff --git a/intel_extension_for_transformers/transformers/dynamic/drop_and_restore_utils.py b/intel_extension_for_transformers/transformers/dynamic/drop_and_restore_utils.py index 38031b04293..04252479182 100644 --- a/intel_extension_for_transformers/transformers/dynamic/drop_and_restore_utils.py +++ b/intel_extension_for_transformers/transformers/dynamic/drop_and_restore_utils.py @@ -38,7 +38,7 @@ def sample_length_configuration( min_length=2, ): """Get different sequence length for hidden layers. - + Args: max_seq_length: A number to set the max sequence length. num_hidden_layers: A number of total hidden layers. @@ -48,7 +48,7 @@ def sample_length_configuration( length_drop_ratio_bound: The max ratio to truncate the sequence. If the ratio set, the length will not less than max_seq_length * ratio. min_length: The number to set the min sequence length. - + Return: (Tuple): The tuple of length configuration for different hidden layers. """ @@ -74,13 +74,13 @@ def sample_layer_configuration( layer_dropout_bound=None, ): """Get sample layers depends on the set parameters. - + Args: num_hidden_layers: A number to set the max sequence length. layer_dropout_prob: Probability to dropout a layer. layer_dropout: Number of how many layers to dropout. layer_dropout_bound: The bound of how many layers to dropout. - + Return: (Tuple): The tuple to the numbers of which samples are kept. """ diff --git a/intel_extension_for_transformers/transformers/mixture/auto_distillation.py b/intel_extension_for_transformers/transformers/mixture/auto_distillation.py index e887ed508e1..84426228123 100644 --- a/intel_extension_for_transformers/transformers/mixture/auto_distillation.py +++ b/intel_extension_for_transformers/transformers/mixture/auto_distillation.py @@ -42,19 +42,19 @@ class AutoDistillation(NASBase): """The framework class is designed for handling the whole pipeline of AutoDistillation. - AutoDistillation is composed of three major stages, i.e. Model Exploration, Flash Distillation, + AutoDistillation is composed of three major stages, i.e. Model Exploration, Flash Distillation, and Evaluation. - In Model Exploration, a search engine will search for a better compressed model from the architecture + In Model Exploration, a search engine will search for a better compressed model from the architecture design space in each iteration. Flash Distillation is the stage for training the searched model to discover its potential. - In Evaluation stage, the trained model will be evaluated to measure its performances (e.g. + In Evaluation stage, the trained model will be evaluated to measure its performances (e.g. the prediction accuracy, the hardware performance etc.) in order to select the best model architecture. """ def __init__(self, model_builder, conf_fname_or_obj, framework='pytorch'): """Init an AutoDistillation instance base on config. Args: - model_builder (function obj): A function to build model instance with the specified + model_builder (function obj): A function to build model instance with the specified model architecture parameters. conf_fname_or_obj (string or obj): The path to the YAML configuration file or a configuration object containing search setting, flash distillation settings, etc. @@ -68,7 +68,7 @@ def __init__(self, model_builder, conf_fname_or_obj, framework='pytorch'): def search(self, res_save_path=None, model_cls=None): """Auto distillation search process. - + Returns: Best model architecture found in search process. """ @@ -129,7 +129,7 @@ def reload_tf_model(model): ) self.search_results[tuple(model_arch_paras.values())] = metrics - if (self.framework != "pytorch" or not torch.distributed.is_initialized() + if (self.framework != "pytorch" or not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0): self._search_algorithm.get_feedback(sum(self.metrics_conversion(metrics))) print(f'res_save_path: {res_save_path}, save_path = {save_path}') @@ -179,7 +179,7 @@ def count_model_parameters(self, model): def load_search_results(self, path): """Load previous search results. - + Args: path: The file path which stores the previous results. """ @@ -349,4 +349,4 @@ def eval_func(self, eval_func): def __repr__(self): # pragma: no cover """Return class name.""" - return 'AutoDistillation' \ No newline at end of file + return 'AutoDistillation' diff --git a/intel_extension_for_transformers/transformers/modeling/__init__.py b/intel_extension_for_transformers/transformers/modeling/__init__.py index 65b24287d38..5fb99065b03 100644 --- a/intel_extension_for_transformers/transformers/modeling/__init__.py +++ b/intel_extension_for_transformers/transformers/modeling/__init__.py @@ -21,4 +21,3 @@ from .model import OptimizedModel from .modeling_auto import (AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM) - diff --git a/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/__init__.py b/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/__init__.py index 96a9dc31f6b..5193c828c62 100644 --- a/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/__init__.py +++ b/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/modeling_gpt_bigcode.py b/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/modeling_gpt_bigcode.py index 167c9b3f056..1de628f50f0 100644 --- a/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/modeling_gpt_bigcode.py +++ b/intel_extension_for_transformers/transformers/modeling/gpt_bigcode/modeling_gpt_bigcode.py @@ -188,9 +188,9 @@ def _attn(self, query, key, value, attention_mask=None, head_mask=None): beta = 1 else: beta = 0 - + attn_weights = scale_factor * torch.matmul(query, key) # + beta * attn_weights (not needed, it is 0) - + if upcast: # Use a fused kernel to prevent a large overhead from casting and scaling. # Sub-optimal when the key length is not a multiple of 8. diff --git a/intel_extension_for_transformers/transformers/modeling/llava_models/__init__.py b/intel_extension_for_transformers/transformers/modeling/llava_models/__init__.py index ed04d17bdbe..369707c0ef6 100644 --- a/intel_extension_for_transformers/transformers/modeling/llava_models/__init__.py +++ b/intel_extension_for_transformers/transformers/modeling/llava_models/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/intel_extension_for_transformers/transformers/modeling/llava_models/llava_arch.py b/intel_extension_for_transformers/transformers/modeling/llava_models/llava_arch.py index c343d254782..7d02a9e04df 100644 --- a/intel_extension_for_transformers/transformers/modeling/llava_models/llava_arch.py +++ b/intel_extension_for_transformers/transformers/modeling/llava_models/llava_arch.py @@ -161,7 +161,7 @@ def prepare_inputs_labels_for_multimodal( for batch_idx, cur_input_ids in enumerate(input_ids): num_images = (cur_input_ids == IMAGE_TOKEN_INDEX).sum() if num_images == 0: - # Concatenating the cur_image_features[0:0], like in the original implementation, + # Concatenating the cur_image_features[0:0], like in the original implementation, # is removed as it causes the backpropogation to crash on the hpu. new_input_embeds.append(self.get_model().embed_tokens(cur_input_ids)) new_labels.append(labels[batch_idx]) diff --git a/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py b/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py index 59761d41811..50d9f500f34 100644 --- a/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py +++ b/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py @@ -21,7 +21,7 @@ import torch.nn as nn from transformers import AutoConfig, AutoModelForCausalLM -from transformers import MistralConfig, MistralModel, MistralForCausalLM # pylint: disable=E0611 +from transformers import MistralConfig, MistralModel, MistralForCausalLM # pylint: disable=E0611 from transformers.modeling_outputs import CausalLMOutputWithPast from .multimodal_encoder.builder import build_vision_tower diff --git a/intel_extension_for_transformers/transformers/modeling/model.py b/intel_extension_for_transformers/transformers/modeling/model.py index b3421a88f3a..597988dd3dc 100644 --- a/intel_extension_for_transformers/transformers/modeling/model.py +++ b/intel_extension_for_transformers/transformers/modeling/model.py @@ -174,7 +174,7 @@ def from_pretrained(cls, model_name_or_path: str, **kwargs): ) raise EnvironmentError(msg) else: - from transformers.utils import cached_file + from transformers.utils import cached_file try: # Load from URL or cache if already cached resolved_weights_file = cached_file( diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_bert_dynamic.py b/intel_extension_for_transformers/transformers/modeling/modeling_bert_dynamic.py index ade294158d5..6f702135eef 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_bert_dynamic.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_bert_dynamic.py @@ -612,7 +612,7 @@ def feed_forward_chunk(self, attention_output): class BertEncoder(nn.Module): """Bert encoder.""" def __init__(self, config): - """Init an instance base on config.""" + """Init an instance base on config.""" super().__init__() self.config = config self.layer = nn.ModuleList([BertLayer(config) for _ in range(config.num_hidden_layers)]) @@ -636,17 +636,17 @@ def forward( ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: """The main entry point for the class.""" bsz, tsz, dim = hidden_states.size() - + if length_config is not None: restored_hidden_states = hidden_states remain_indices = torch.arange(tsz, device=hidden_states.device).unsqueeze(0).repeat(bsz, 1) - + all_hidden_states = () if output_hidden_states else None if output_hidden_states: all_hidden_states = all_hidden_states + (hidden_states, ) - + all_self_attentions = () if output_attentions and not length_config else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention and not length_config \ else None @@ -895,13 +895,13 @@ class BertForPreTrainingOutput(ModelOutput): # pragma: no cover seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`): Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation before SoftMax). - hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or + hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus the initial embedding outputs. - attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, sequence_length)`. @@ -1021,9 +1021,9 @@ def get_input_embeddings(self): return self.embeddings.word_embeddings def set_input_embeddings(self, value): - """Setter of input embeddings.""" + """Setter of input embeddings.""" self.embeddings.word_embeddings = value - + def set_length_config(self, length_config): """Setter of length config.""" self.length_config = length_config @@ -2068,4 +2068,4 @@ def expand_gather(input, dim, index): """Expand gather.""" size = list(input.size()) size[dim] = -1 - return input.gather(dim, index.expand(*size)) \ No newline at end of file + return input.gather(dim, index.expand(*size)) diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_roberta_dynamic.py b/intel_extension_for_transformers/transformers/modeling/modeling_roberta_dynamic.py index b18e7416d31..0d993814640 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_roberta_dynamic.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_roberta_dynamic.py @@ -116,7 +116,7 @@ def forward( self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None, past_key_values_length=0 ): """The main entry point for the class.""" - if position_ids is None: + if position_ids is None: if input_ids is not None: # Create the position ids from the input token ids. Any padded tokens remain padded. position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx, past_key_values_length) @@ -130,8 +130,8 @@ def forward( seq_length = input_shape[1] - # Setting the token_type_ids to the registered buffer in constructor where it is all zeros, which usually - # occurs when its auto-generated, registered buffer helps users when tracing the model without passing + # Setting the token_type_ids to the registered buffer in constructor where it is all zeros, which usually + # occurs when its auto-generated, registered buffer helps users when tracing the model without passing # token_type_ids, solves issue #5664 if token_type_ids is None: # pragma: no cover if hasattr(self, "token_type_ids"): @@ -155,7 +155,7 @@ def forward( def create_position_ids_from_inputs_embeds(self, inputs_embeds): # pragma: no cover """We are provided embeddings directly. - + We cannot infer which are padded so just generate sequential position ids. Args: @@ -207,7 +207,7 @@ def transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor: """Transpose for scores.""" # new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size) # assign -1 to bypass onnx dynamic batch issue: - new_x_shape = (-1,) + x.size()[1:-1] + (self.num_attention_heads, self.attention_head_size) + new_x_shape = (-1,) + x.size()[1:-1] + (self.num_attention_heads, self.attention_head_size) x = x.view(new_x_shape) return x.permute(0, 2, 1, 3) @@ -513,7 +513,7 @@ def forward( outputs = (layer_output,) + outputs # if decoder, return the attn key/values as the last output if self.is_decoder: - outputs = outputs + (present_key_value,) + outputs = outputs + (present_key_value,) return outputs, keep_indices @@ -552,16 +552,16 @@ def forward( ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPastAndCrossAttentions]: """The main entry point for the class.""" bsz, tsz, dim = hidden_states.size() - + if length_config is not None: restored_hidden_states = hidden_states remain_indices = torch.arange(tsz, device=hidden_states.device).unsqueeze(0).repeat(bsz, 1) - + all_hidden_states = () if output_hidden_states else None - + if output_hidden_states: all_hidden_states = all_hidden_states + (hidden_states, ) - + all_self_attentions = () if output_attentions and not length_config else None all_cross_attentions = () if output_attentions and self.config.add_cross_attention and not length_config else \ None @@ -630,14 +630,14 @@ def custom_forward(*inputs): next_decoder_cache += (layer_outputs[-1],) if not layer_output_length and output_attentions: # pragma: no cover # we're done with attentions for calculating tokens drop - + all_self_attentions = all_self_attentions + (layer_outputs[1],) if self.config.add_cross_attention: all_cross_attentions = all_cross_attentions + (layer_outputs[2],) if output_hidden_states: all_hidden_states = all_hidden_states + (hidden_states,) - + last_hidden_state = restored_hidden_states if length_config is not None else hidden_states if not return_dict: @@ -682,8 +682,8 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: class RobertaPreTrainedModel(PreTrainedModel): # pragma: no cover """Roberta pretrained model. - - An abstract class to handle weights initialization and a simple interface for + + An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models. """ @@ -823,10 +823,10 @@ def __init__(self, config, add_pooling_layer=True): # Initialize weights and apply final processing self.post_init() - + self.length_config = eval(config.length_config) if hasattr(config, "length_config") else None self.output_attentions = self.length_config is not None - + def get_input_embeddings(self): """Getter of input embeddings.""" return self.embeddings.word_embeddings @@ -834,7 +834,7 @@ def get_input_embeddings(self): def set_input_embeddings(self, value): """Setter of input embeddings.""" self.embeddings.word_embeddings = value - + def set_length_config(self, length_config): """Setter of length config.""" self.length_config = length_config @@ -846,7 +846,7 @@ def set_output_attentions(self, value): def _prune_heads(self, heads_to_prune): # pragma: no cover """Prunes heads of the model. - + heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base class PreTrainedModel. """ for layer, heads in heads_to_prune.items(): @@ -1063,7 +1063,7 @@ def forward( Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]` - past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 + past_key_values (`tuple(tuple(torch.FloatTensor))` of length `config.n_layers` with each tuple having 4 tensors of shape `(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`): Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding. If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that @@ -1088,7 +1088,7 @@ def forward( >>> prediction_logits = outputs.logits ``` - + Returns: CausalLMOutputWithCrossAttentions. """ @@ -1724,7 +1724,7 @@ def forward( def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0): """Replace non-padding symbols with their position numbers. - Position numbers begin at padding_idx+1. Padding symbols are ignored. + Position numbers begin at padding_idx+1. Padding symbols are ignored. This is modified from fairseq's `utils.make_positions`. """ @@ -1738,4 +1738,4 @@ def expand_gather(input, dim, index): size = list(input.size()) size[0] = -1 # for onnx dynamic batch size issue size[dim] = -1 - return input.gather(dim, index.expand(*size)) \ No newline at end of file + return input.gather(dim, index.expand(*size)) diff --git a/intel_extension_for_transformers/transformers/nas/basic_nas.py b/intel_extension_for_transformers/transformers/nas/basic_nas.py index da5b4d48b43..5d8cf54879e 100644 --- a/intel_extension_for_transformers/transformers/nas/basic_nas.py +++ b/intel_extension_for_transformers/transformers/nas/basic_nas.py @@ -126,4 +126,4 @@ def eval_func(self, user_eval_func): def __repr__(self): """Class representation.""" - return 'BasicNAS' # pragma: no cover \ No newline at end of file + return 'BasicNAS' # pragma: no cover diff --git a/intel_extension_for_transformers/transformers/nas/nas.py b/intel_extension_for_transformers/transformers/nas/nas.py index adc9de32b97..18433596029 100644 --- a/intel_extension_for_transformers/transformers/nas/nas.py +++ b/intel_extension_for_transformers/transformers/nas/nas.py @@ -66,4 +66,4 @@ def __new__(self, conf_fname_or_obj, *args, **kwargs): "NAS approach not set in config, use default NAS approach, i.e. Basic." ) method = 'basic' - return NASMethods[method](conf_fname_or_obj, *args, **kwargs) \ No newline at end of file + return NASMethods[method](conf_fname_or_obj, *args, **kwargs) diff --git a/intel_extension_for_transformers/transformers/optimizer.py b/intel_extension_for_transformers/transformers/optimizer.py index 750751bcffd..db1eeb6a973 100644 --- a/intel_extension_for_transformers/transformers/optimizer.py +++ b/intel_extension_for_transformers/transformers/optimizer.py @@ -103,7 +103,7 @@ def save_model(self, output_dir, tokenizer=None): Args: output_dir: the path to save config.json and pytorch_model.bin. - tokenizer (object, optional): the tokenizer object, use it if you want to + tokenizer (object, optional): the tokenizer object, use it if you want to save tokenizer.json in output_dir. Defaults to None. """ os.makedirs(shlex.quote(output_dir), exist_ok=True) @@ -176,7 +176,7 @@ def calib_dataloader(self): @eval_func.setter def eval_func(self, func: Callable): """Set the evaluation function. - + Args: func: evaluation function. """ @@ -185,7 +185,7 @@ def eval_func(self, func: Callable): @train_func.setter def train_func(self, func: Callable): """Set the train function. - + Args: func: train function. """ @@ -194,7 +194,7 @@ def train_func(self, func: Callable): @provider.setter def provider(self, provider): """Set the provider. - + Args: provider: optimization provider. """ @@ -203,7 +203,7 @@ def provider(self, provider): @calib_dataloader.setter def calib_dataloader(self, dataloader): """Set the calibration dataloader. - + Args: dataloader: calibration dataloader. """ @@ -220,7 +220,7 @@ def init_quantizer( provider: str = Provider.INC.value, ): """Init a Quantization object with config. - + Args: quant_config: quantization config. provider: define the quantization provider. @@ -281,7 +281,7 @@ def quantize( calib_dataloader=None, ): """Prepare for invoking the _inc_quantize function. - + Args: quant_config: quantization config. provider: define the quantization provider. @@ -313,7 +313,7 @@ def init_pruner( provider: str = Provider.INC.value, ): """Init a Pruning object with config. - + Args: pruning_config: pruning config. provider: define the pruning provider. @@ -340,7 +340,7 @@ def prune( train_func: Optional[Callable] = None, ): """Do the pruning. - + Args: pruning_config: pruning config. provider: define the pruning provider. @@ -373,7 +373,7 @@ def init_distiller( provider: str = Provider.INC.value, ): """Init a Distillation object with config and the teacher model. - + Args: distillation_config: distillation config. teacher_model: set the teacher model. @@ -403,7 +403,7 @@ def distill( train_func: Optional[Callable] = None, ): """Do the distillation. - + Args: distillation_config: distillation config. teacher_model: set the teacher model. @@ -432,7 +432,7 @@ def distill( def _save_inc_int8(self, opt_model, output_dir): """Save the optimized model in the output directory. - + Args: opt_model: optimized model. output_dir: output path. @@ -450,7 +450,7 @@ def save_model(self, output_dir, tokenizer=None): Args: output_dir: the path to save config.json and pytorch_model.bin. - tokenizer (object, optional): the tokenizer object, use it if you want to + tokenizer (object, optional): the tokenizer object, use it if you want to save tokenizer.json in output_dir. Defaults to None. """ os.makedirs(shlex.quote(output_dir), exist_ok=True) diff --git a/intel_extension_for_transformers/transformers/optimizer_tf.py b/intel_extension_for_transformers/transformers/optimizer_tf.py index 0ca78d87fe0..c13a40e9554 100644 --- a/intel_extension_for_transformers/transformers/optimizer_tf.py +++ b/intel_extension_for_transformers/transformers/optimizer_tf.py @@ -123,7 +123,7 @@ def input_names(self): @input_names.setter def input_names(self, input_names: List): """Set the input names. - + Args: input_names: the names of inputs. """ @@ -137,7 +137,7 @@ def output_names(self): @output_names.setter def output_names(self, output_names: List): """Set the output names. - + Args: output_names: the names of outputs. """ @@ -151,7 +151,7 @@ def eval_func(self): @eval_func.setter def eval_func(self, func: Callable): """Set the evaluation function. - + Args: func: evaluation function. """ @@ -165,7 +165,7 @@ def train_func(self): @train_func.setter def train_func(self, func: Callable): """Set the training function. - + Args: func: train function. """ @@ -179,7 +179,7 @@ def train_dataset(self): @train_dataset.setter def train_dataset(self, train_dataset): """Set the training dataset. - + Args: train_dataset: train dataset. """ @@ -195,7 +195,7 @@ def eval_dataset(self): @eval_dataset.setter def eval_dataset(self, eval_dataset): """Set the evaluation dataset. - + Args: eval_dataset: evaluation dataset. """ @@ -372,7 +372,7 @@ def init_quantizer( quant_config, ): """Init a Quantization object with config. - + Args: quant_config: quantization config. """ @@ -394,7 +394,7 @@ def _inc_quantize( quant_config, ): """Do the quantization. - + Args: quant_config: quantization config. """ @@ -438,7 +438,7 @@ def quantize( eval_dataset=None, ): """Prepare for invoking INC quantize function. - + Args: quant_config: quantization config. eval_func: evaluation function. @@ -463,7 +463,7 @@ def init_pruner( pruning_config=None, ): """Init a Pruning object with config. - + Args: pruning_config: pruning config. """ @@ -494,7 +494,7 @@ def prune( eval_dataset=None, ): """Do the pruning. - + Args: pruning_config: pruning config. eval_func: evaluation function. @@ -550,7 +550,7 @@ def init_distiller( teacher_model: PreTrainedModel, ): """Init a Distillation object with config and the teacher model. - + Args: distillation_config: distillation config. teacher_model: set the teacher model. @@ -618,7 +618,7 @@ def distill( train_func: Optional[Callable] = None, ): """Do the distillation. - + Args: distillation_config: distillation config. teacher_model: set the teacher model. @@ -651,7 +651,7 @@ def distill( def model_builder_builtin(self, arch_paras=None, model_cls=None): """Specify model_cls to use the built-in model builder. - + Args: arch_paras: architecture parameters. model_cls: model information. @@ -679,7 +679,7 @@ def autodistill( train_func: Optional[Callable] = None ): """Do the auto distillation. - + Args: autodistillation_config: autodistillation config. teacher_model: set the teacher model. @@ -837,7 +837,7 @@ def eval_func_builtin(model): def build_train_func(self, model): """Build the training function for pruning or distillation. - + Args: model (object): the input model """ @@ -900,4 +900,4 @@ def on_train_batch_end(self, batch, logs=None): epochs=epochs, callbacks=[callback()]) self.component.model._session = None - self.model.save_pretrained(get_filepath(TMPPATH, self.task_type, self.task_id), saved_model=True) \ No newline at end of file + self.model.save_pretrained(get_filepath(TMPPATH, self.task_type, self.task_id), saved_model=True) diff --git a/intel_extension_for_transformers/transformers/ppo_config.py b/intel_extension_for_transformers/transformers/ppo_config.py index a0c6198ef6a..361985a9a26 100644 --- a/intel_extension_for_transformers/transformers/ppo_config.py +++ b/intel_extension_for_transformers/transformers/ppo_config.py @@ -66,7 +66,7 @@ class PPOConfig: remove_unused_columns: bool = True """Remove unused columns from the dataset if `datasets.Dataset` is used""" tracker_kwargs: JSONDict = field(default_factory=dict) - """Keyword arguments for the tracker (e.g. python ppo.py --ppo_config.tracker_kwargs='{"wandb": {"entity": + """Keyword arguments for the tracker (e.g. python ppo.py --ppo_config.tracker_kwargs='{"wandb": {"entity": "my_wandb_entity", "name": "my_exp_name"}}'""" accelerator_kwargs: JSONDict = field(default_factory=dict) """Keyword arguments for the accelerator""" @@ -87,7 +87,7 @@ class PPOConfig: init_kl_coef: Optional[float] = 0.2 """Initial KL penalty coefficient (used for adaptive and linear control)""" kl_penalty: Literal["kl", "abs", "mse", "full"] = "kl" - """kl penalty options: 'kl': model_logp - ref_logp, 'abs': abs(kl), 'mse': mean squared error mse(kl) and 'full': + """kl penalty options: 'kl': model_logp - ref_logp, 'abs': abs(kl), 'mse': mean squared error mse(kl) and 'full': the actual kl for all tokens in the distribution""" target: Optional[float] = 6 """Target KL value for adaptive KL control""" diff --git a/intel_extension_for_transformers/transformers/pruning.py b/intel_extension_for_transformers/transformers/pruning.py index 79d8f7a6636..6877a8daf76 100644 --- a/intel_extension_for_transformers/transformers/pruning.py +++ b/intel_extension_for_transformers/transformers/pruning.py @@ -72,6 +72,3 @@ def __init__(self, epoch_range: List=[0, 4], initial_sparsity_ratio: float=0.0, self.method = method self.names = names self.parameters = parameters - - - diff --git a/intel_extension_for_transformers/transformers/trainer.py b/intel_extension_for_transformers/transformers/trainer.py index 466786b994d..f5f52fb2056 100644 --- a/intel_extension_for_transformers/transformers/trainer.py +++ b/intel_extension_for_transformers/transformers/trainer.py @@ -116,9 +116,9 @@ class BaseTrainer(): """The base class of trainer.""" def __init__(self, *args, **kwargs): """Initialization function. - + Args: - args: defined parameters. + args: defined parameters. kwargs: additional keyword arguments used to hide deprecated arguments. """ super().__init__(*args, **kwargs) @@ -257,15 +257,15 @@ def init_quantizer( provider: str = Provider.INC.value, ): """Initialize the quantizer. - + Args: - quant_config: The path to the YAML configuration file or QuantizationConfig class containing + quant_config: The path to the YAML configuration file or QuantizationConfig class containing accuracy goal, quantization objective and related dataloaders etc. provider: The provider used to quantize. - + Returns: - An objective of neural_compressor Quantization class, which can automativally searches for - optimal quantization recipes for low precision model inference and achieving best tuning + An objective of neural_compressor Quantization class, which can automativally searches for + optimal quantization recipes for low precision model inference and achieving best tuning objectives. """ from neural_compressor.experimental import Quantization @@ -333,7 +333,7 @@ def quantize( """The main entry point of automatic quantization tuning. Args: - quant_config: The path to the YAML configuration file or QuantizationConfig class containing + quant_config: The path to the YAML configuration file or QuantizationConfig class containing accuracy goal, quantization objective and related dataloaders etc. provider: The provider used to quantize. eval_func (:obj:`Callable`, optional): The function used to evaluate the model. @@ -341,8 +341,8 @@ def quantize( calib_dataloader: The dataloader for calibration dataset. Returns: - An objective of neural_compressor Quantization class, which can automativally searches for - optimal quantization recipes for low precision model inference and achieving best tuning + An objective of neural_compressor Quantization class, which can automativally searches for + optimal quantization recipes for low precision model inference and achieving best tuning objectives. """ self._eval_func = self.builtin_eval_func if eval_func is None else eval_func @@ -383,12 +383,12 @@ def init_pruner( provider: str = Provider.INC.value, ): """Initialize the pruner. - + Args: pruning_config: The path to the YAML configuration file or PruningConf class containing accuracy goal, pruning objective and related dataloaders etc. provider: The provider used to quantize. - + Returns: An objective of neural_compressor Pruning class. """ @@ -484,7 +484,7 @@ def init_distiller( accuracy goal, distillation objective and related dataloaders etc. teacher_model: The model(torch.nn.Module) transfers knowledge to a smaller model. provider (str): The provider used to quantize. - + Returns: An objective of neural_compressor Distillation class. """ @@ -514,13 +514,13 @@ def distill( """The main entry point of automatic distillation tuning. Args: - quant_config: The path to the YAML configuration file or DistillationConfig class containing + quant_config: The path to the YAML configuration file or DistillationConfig class containing accuracy goal, distillation objective and related dataloaders etc. teacher_model: The model(torch.nn.Module) transfers knowledge to a smaller model. provider (str): The provider used to quantize. eval_func (:obj:`Callable`, optional: The function to evaluate the model. train_func (:obj:`Callable`, optional: The function to train the model. - + Returns: An objective of neural_compressor Distillation class. """ @@ -559,7 +559,7 @@ def orchestrate_optimizations( Args: config_list: The list of configs. - teacher_model (:obj:`Callable`, optional): The model(torch.nn.Module) transfers knowledge + teacher_model (:obj:`Callable`, optional): The model(torch.nn.Module) transfers knowledge to a smaller model. eval_func (:obj:`Callable`, optional): Evaluation function to evaluate the tuning objective. train_func (:obj:`Callable`, optional): Training function which will be combined with pruning. @@ -578,10 +578,10 @@ def orchestrate_optimizations( def create_optimizer_builtin(self, config_list, teacher_model=None): """The function to create optimizer. - + Args: config_list: The list of configs. - teacher_model (:obj:`Callable`, optional): The model(torch.nn.Module) transfers knowledge + teacher_model (:obj:`Callable`, optional): The model(torch.nn.Module) transfers knowledge to a smaller model. """ components = [] @@ -622,11 +622,11 @@ def train( Args: component (:obj:`Component`, `optional`): Component object handling the training process. resume_from_checkpoint (:obj:`str` or :obj:`bool`, `optional`): If a :obj:`str`, local path - to a saved checkpoint as saved by a previous instance of :class:`~transformers.Trainer`. + to a saved checkpoint as saved by a previous instance of :class:`~transformers.Trainer`. If a :obj:`bool` and equals `True`, load the last checkpoint in `args.output_dir` as saved by a previous instance of :class:`~transformers.Trainer`. If present, training will resume from the model/optimizer/scheduler states loaded here. - trial (:obj:`optuna.Trial` or :obj:`Dict[str, Any]`, `optional`): The trial run or the + trial (:obj:`optuna.Trial` or :obj:`Dict[str, Any]`, `optional`): The trial run or the hyperparameter dictionary for hyperparameter search. ignore_keys_for_eval (:obj:`List[str]`, `optional`): A list of keys in the output of your model (if it is a dictionary) that should be ignored when gathering predictions for evaluation @@ -1115,8 +1115,8 @@ def training_step( Args: model (:obj:`nn.Module`): The model to train. inputs (:obj:`Dict[str, Union[torch.Tensor, Any]]`): The inputs and targets of the model. - The dictionary will be unpacked before being fed to the model. Most models expect - the targets under the argument :obj:`labels`. Check your model's documentation for + The dictionary will be unpacked before being fed to the model. Most models expect + the targets under the argument :obj:`labels`. Check your model's documentation for all accepted arguments. Return: @@ -1374,7 +1374,7 @@ def training_step_length_adaptive( # pylint: disable=E1101 def compute_loss(self, model, inputs, return_outputs=False): # pragma: no cover """How the loss is computed by Trainer. - + By default, all models return the loss in the first element. Subclass and override for custom behavior. @@ -1515,17 +1515,17 @@ def nas( NAS is composed of two major stages, Model Exploration and Evaluation. - In Model Exploration, a search engine will search for a better compressed model from the architecture + In Model Exploration, a search engine will search for a better compressed model from the architecture design space in each iteration. - In Evaluation stage, the trained model will be evaluated to measure its performances (e.g. the prediction + In Evaluation stage, the trained model will be evaluated to measure its performances (e.g. the prediction accuracy, the hardware performance etc.) in order to select the best model architecture. Args: - nas_config: The path to the YAML configuration file or a configuration + nas_config: The path to the YAML configuration file or a configuration object containing settings for NAS, etc. provider (str): Provide the baseic function. Default set to INC. - model_builder (:obj:`Callabel`, optional): A function to build model instance with + model_builder (:obj:`Callabel`, optional): A function to build model instance with the specified model architecture parameters. model_cls (:obj:`Callabel`, optional): Class of the model. eval_func (:obj:`Callabel`, optional): The function to evaluate the model. @@ -1604,21 +1604,21 @@ def autodistillation( AutoDistillation is composed of three major stages, Model Exploration, Flash Distillation, and Evaluation. - In Model Exploration, a search engine will search for a better compressed model from the architecture + In Model Exploration, a search engine will search for a better compressed model from the architecture design space in each iteration. Flash Distillation is the stage for training the searched model to discover its potential. - In Evaluation stage, the trained model will be evaluated to measure its performances (e.g. the prediction + In Evaluation stage, the trained model will be evaluated to measure its performances (e.g. the prediction accuracy, the hardware performance etc.) in order to select the best model architecture. Args: - autodistillation_config: The path to the YAML configuration file or a configuration + autodistillation_config: The path to the YAML configuration file or a configuration object containing search setting, flash distillation settings, etc. - teacher_model: The model(torch.nn.Module or PreTrainedModel) transfers knowledge to + teacher_model: The model(torch.nn.Module or PreTrainedModel) transfers knowledge to a smaller model. provider (str): Provide the baseic function. Default set to INC. - model_builder (:obj:`Callabel`, optional): A function to build model instance with + model_builder (:obj:`Callabel`, optional): A function to build model instance with the specified model architecture parameters. model_cls (:obj:`Callabel`, optional): Class of the model. eval_func (:obj:`Callabel`, optional): The function to evaluate the model. @@ -1644,7 +1644,7 @@ def take_train_steps(model, block_name=None, checkpoint=None): """The a train step with automatic distillation. - + Args: trainer: define the training and evaluation loop for PyTorch. agent: distillation model. @@ -1672,7 +1672,7 @@ def take_train_steps(model, def take_eval_steps(model, trainer, metric_names, save_metrics=False): """The a evaluation step with automatic distillation. - + Args: model: the target model to make evaluation. trainer: define the training and evaluation loop for PyTorch. @@ -1694,7 +1694,7 @@ def take_eval_steps(model, trainer, metric_names, save_metrics=False): def train_func_builtin(model): """The function to use specified method to train the model. - + Args: model: input model. """ @@ -1764,7 +1764,7 @@ def run_distillers(model, def eval_func_builtin(model): """The function to use specified method to evaluate the model. - + Args: model: input model. """ @@ -1782,7 +1782,7 @@ def eval_func_builtin(model): def model_builder_builtin(self, arch_paras=None, model_cls=None): """The function to use specified method to build model. - + Args: arch_paras: Parameters of the architecture to build a new model. model_cls: Class for the model. @@ -2003,9 +2003,9 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): def export_to_onnx(self, *args, **kwargs): """The function to transfer model into onnx model. - + Args: - args: defined parameters. + args: defined parameters. kwargs: additional keyword arguments used to hide deprecated arguments. """ if self.enable_bf16: @@ -2023,7 +2023,7 @@ def export_to_fp32_onnx( verbose=True, ): """The function to transfer model into fp32 onnx model. - + Args: save_path: the save path of the exported model. opset_version: the onnx op version of the exported model. @@ -2066,7 +2066,7 @@ def export_to_bf16_onnx( verbose=True, ): """The function to transfer model into bf16 onnx model. - + Args: save_path: the save path of the exported model. opset_version: the onnx op version of the exported model. @@ -2128,7 +2128,7 @@ def export_to_int8_onnx( scale_mapping=False, ): """The function to transfer model into int8 onnx model. - + Args: save_path: the save path of the exported model. quant_format: quantization format. @@ -2322,7 +2322,7 @@ def export_to_int8_onnx( activation_type = ortq.QuantType.QInt8 weight_type = ortq.QuantType.QInt8 elif 'U8S8' in dtype: # pragma: no cover - if not self.enable_executor: + if not self.enable_executor: logger.error("Right now, we don't support dtype: {}, please use \ U8U8/S8S8 or set trainer.enable_executor=True \ for U8S8.".format(dtype)) @@ -2471,7 +2471,7 @@ def get_export_args(self, model): self._remove_label(input) # convert to a dict - input = dict(input.items()) + input = dict(input.items()) if model.__class__.__name__ == 'XLNetForSequenceClassification': # pragma: no cover input.pop('token_type_ids') @@ -2500,14 +2500,14 @@ def get_export_args(self, model): pass return input, input_names, output_names, axes_dict - + def infer_task(self, model): """Infer task.""" from optimum.exporters.tasks import TasksManager if not hasattr(model, "config"): raise ValueError("model doesn't have 'config' attribute.") - + try: # infer task from model id model_name_or_path = model.config._name_or_path @@ -2517,7 +2517,7 @@ def infer_task(self, model): # infer task from model itself task = TasksManager.infer_task_from_model(model) except: # pragma: no cover - try: + try: # infer task from model type model_type = model.config.model_type.replace("_", "-") tasks = TasksManager.get_supported_tasks_for_model_type(model_type, "onnx") @@ -2525,7 +2525,7 @@ def infer_task(self, model): task = tasks[0] except: raise ValueError("Could not infer the task.") - + return task @staticmethod @@ -2594,9 +2594,9 @@ def set_dynamic_config( dynamic_config: DynamicLengthConfig, ): """The function to set dynamic config. - + Args: - dynamic_config: the settings of the dynamic config. + dynamic_config: the settings of the dynamic config. """ self.dynamic_config = dynamic_config lc = None diff --git a/intel_extension_for_transformers/transformers/utils/metrics.py b/intel_extension_for_transformers/transformers/utils/metrics.py index 9f615d85950..d2bcc19218d 100644 --- a/intel_extension_for_transformers/transformers/utils/metrics.py +++ b/intel_extension_for_transformers/transformers/utils/metrics.py @@ -25,20 +25,20 @@ def __init__(self, name: str, greater_is_better: bool = True, is_relative: bool Args: name: Metric name which evaluates function returns, like:"eval_f1", "eval_accuracy"... - greater_is_better: Used to describe the usage of the metric, like: greater is better for f1, + greater_is_better: Used to describe the usage of the metric, like: greater is better for f1, this parameter is only used for quantization. - is_relative: Used in conjunction with "criterion". If "criterion" is 0.01, and "is_relative" - is True, it means that we want to get an optimized model which metric drop <1% relative, - if "is_relative" is False, means metric drop <1% absolute, this parameter is only used + is_relative: Used in conjunction with "criterion". If "criterion" is 0.01, and "is_relative" + is True, it means that we want to get an optimized model which metric drop <1% relative, + if "is_relative" is False, means metric drop <1% absolute, this parameter is only used for quantization. - criterion: Used in conjunction with "is_relative". If "criterion" is 0.01, and "is_relative" - is True, it means that we want to get an optimized model which metric drop <1% relative, - if "criterion" is 0.02, means metric drop <2% relative, this parameter is only used for + criterion: Used in conjunction with "is_relative". If "criterion" is 0.01, and "is_relative" + is True, it means that we want to get an optimized model which metric drop <1% relative, + if "criterion" is 0.02, means metric drop <2% relative, this parameter is only used for quantization. - weight_ratio: Used when there are multiple metrics, for example: you want to focus on both - f1 and accuracy, then you will create f1 instance and accuracy instance, and indicate - their weight proportion. If weight_ratio of f1 is 0.3, and weight ratio of accuracy - is 0.7, then the final metric to tune is f1*0.3 + accuracy*0.7, this parameter is only + weight_ratio: Used when there are multiple metrics, for example: you want to focus on both + f1 and accuracy, then you will create f1 instance and accuracy instance, and indicate + their weight proportion. If weight_ratio of f1 is 0.3, and weight ratio of accuracy + is 0.7, then the final metric to tune is f1*0.3 + accuracy*0.7, this parameter is only used for quantization. """ self.name = name diff --git a/intel_extension_for_transformers/transformers/utils/objectives.py b/intel_extension_for_transformers/transformers/utils/objectives.py index 7f2b3ec5d93..74ae7aad700 100644 --- a/intel_extension_for_transformers/transformers/utils/objectives.py +++ b/intel_extension_for_transformers/transformers/utils/objectives.py @@ -24,12 +24,12 @@ def __init__(self, name: str, greater_is_better: bool = True, weight_ratio: floa Args: name: Objectice name. - greater_is_better: Used to describe the usage of the metric, like: greater is better for f1, + greater_is_better: Used to describe the usage of the metric, like: greater is better for f1, this parameter is only used for quantization. - weight_ratio: Used when there are multiple metrics, for example: you want to focus on both - f1 and accuracy, then you will create f1 instance and accuracy instance, and indicate - their weight proportion. If weight_ratio of f1 is 0.3, and weight ratio of accuracy - is 0.7, then the final metric to tune is f1*0.3 + accuracy*0.7, this parameter is only + weight_ratio: Used when there are multiple metrics, for example: you want to focus on both + f1 and accuracy, then you will create f1 instance and accuracy instance, and indicate + their weight proportion. If weight_ratio of f1 is 0.3, and weight ratio of accuracy + is 0.7, then the final metric to tune is f1*0.3 + accuracy*0.7, this parameter is only used for quantization. """ self.name = name diff --git a/intel_extension_for_transformers/transformers/utils/utility_tf.py b/intel_extension_for_transformers/transformers/utils/utility_tf.py index d293d6b928e..583a83ff5a3 100644 --- a/intel_extension_for_transformers/transformers/utils/utility_tf.py +++ b/intel_extension_for_transformers/transformers/utils/utility_tf.py @@ -105,4 +105,4 @@ def get_filepath(base_dirpath, task_type, task_id): def keras2SavedModel(model): # pragma: no cover """Transfer keras model into save_model.""" model = common.Model(model) - return model.model \ No newline at end of file + return model.model diff --git a/intel_extension_for_transformers/utils/data_augmentation.py b/intel_extension_for_transformers/utils/data_augmentation.py index 75dd15f34fc..9a4598b2e0d 100644 --- a/intel_extension_for_transformers/utils/data_augmentation.py +++ b/intel_extension_for_transformers/utils/data_augmentation.py @@ -255,7 +255,7 @@ def text_generation_augmentation(self, extension, raw_datasets): """Execute the process of text generation augmentation. Args: - extension: No used + extension: No used raw_datasets: The original datasets, the datasets can be from huggingface datasets(like: glue/sst2) or the customer datasets, each sample should be: 'label' + '\t' + 'sentence' + EOS + '\n' diff --git a/intel_extension_for_transformers/utils/llm_carbon_calc.py b/intel_extension_for_transformers/utils/llm_carbon_calc.py index f46b1fd80b8..4d8c448e93c 100644 --- a/intel_extension_for_transformers/utils/llm_carbon_calc.py +++ b/intel_extension_for_transformers/utils/llm_carbon_calc.py @@ -22,10 +22,10 @@ JOUL_TO_KWH = 2.78e-7 def main(): - parser = argparse.ArgumentParser(description='LLM carbon calculator - ' + parser = argparse.ArgumentParser(description='LLM carbon calculator - ' 'simple estimator of LLM inference ' 'carbon emission') - parser.add_argument('-c', '--carbon-intensity', type=float, + parser.add_argument('-c', '--carbon-intensity', type=float, dest='carbon_intensity', default=WORLD_AVG_CARBON_INTENSITY, metavar='C', help='carbon intensity of electricity of your country ' @@ -64,14 +64,14 @@ def main(): m = args.mem tdp = args.tdp c = args.carbon_intensity - carbon = (tdp + m * 0.001 * POWER_PER_GB_MEM) * t * 0.001 * JOUL_TO_KWH * c + carbon = (tdp + m * 0.001 * POWER_PER_GB_MEM) * t * 0.001 * JOUL_TO_KWH * c print('TDP (W): ', tdp) print('Memory Consumption (MB): ', m) print('Output token number: ', args.token_size) print('Total time of one inference (ms): ', t) print('Carbon emission in one inference (kgCO2e): ', carbon) - return carbon + return carbon if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/intel_extension_for_transformers/utils/llm_carbon_calc_readme.md b/intel_extension_for_transformers/utils/llm_carbon_calc_readme.md index 9b49e91993b..b0d7945d987 100644 --- a/intel_extension_for_transformers/utils/llm_carbon_calc_readme.md +++ b/intel_extension_for_transformers/utils/llm_carbon_calc_readme.md @@ -44,4 +44,4 @@ When the total time of one inference procedure is not measured but first token l Output token number: 32 Total time of one inference (ms): 6510.36 Carbon emission in one inference (kgCO2e): 0.00035751489124038457 -``` \ No newline at end of file +``` diff --git a/requirements-gpu.txt b/requirements-gpu.txt index 109cb85e7dd..9dcce32ae6d 100644 --- a/requirements-gpu.txt +++ b/requirements-gpu.txt @@ -1,11 +1,11 @@ -ninja +--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us +accelerate cmake +datasets +intel_extension_for_pytorch==2.0.110+xpu +ninja py-cpuinfo setuptools>=65 setuptools_scm[toml]>=6.2 -accelerate -datasets texttable ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us torch==2.0.1a0 -intel_extension_for_pytorch==2.0.110+xpu diff --git a/requirements.txt b/requirements.txt index e51a5f8682f..3a76c79a535 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -ninja +--extra-index-url https://download.pytorch.org/whl/cpu +accelerate cmake>=3.16 +gguf +ninja +optimum-intel py-cpuinfo setuptools>=65 setuptools_scm[toml]>=6.2 ---extra-index-url https://download.pytorch.org/whl/cpu torch==2.1.0+cpu -accelerate -optimum-intel -gguf diff --git a/setup.py b/setup.py index 26f68033019..de78269ae34 100644 --- a/setup.py +++ b/setup.py @@ -323,4 +323,4 @@ def check_submodules(): ], setup_requires=['setuptools_scm'], use_scm_version=True, - ) \ No newline at end of file + ) diff --git a/tests/CI/requirements.txt b/tests/CI/requirements.txt index dc833dd4bef..50ef84425df 100644 --- a/tests/CI/requirements.txt +++ b/tests/CI/requirements.txt @@ -1 +1 @@ -../requirements.txt \ No newline at end of file +../requirements.txt diff --git a/tests/CI/test_benchmark.py b/tests/CI/test_benchmark.py index ae3e532ef8a..4e12c9d392b 100644 --- a/tests/CI/test_benchmark.py +++ b/tests/CI/test_benchmark.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import unittest diff --git a/tests/CI/test_config.py b/tests/CI/test_config.py index 3d0f212dc71..052c68b7b0d 100644 --- a/tests/CI/test_config.py +++ b/tests/CI/test_config.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy import shutil import torch diff --git a/tests/CI/test_data_augmentation.py b/tests/CI/test_data_augmentation.py index 32d263bc44c..3c1470debe4 100644 --- a/tests/CI/test_data_augmentation.py +++ b/tests/CI/test_data_augmentation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import unittest diff --git a/tests/CI/test_device_utils.py b/tests/CI/test_device_utils.py index 811057b21b4..ee7647343a2 100644 --- a/tests/CI/test_device_utils.py +++ b/tests/CI/test_device_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import unittest diff --git a/tests/CI/test_dpo.py b/tests/CI/test_dpo.py index fa87b34b41f..899cfc7c546 100644 --- a/tests/CI/test_dpo.py +++ b/tests/CI/test_dpo.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import unittest from intel_extension_for_transformers.transformers.dpo_trainer import DPOTrainer, is_peft_available, disable_dropout_in_model diff --git a/tests/CI/test_evaluation.py b/tests/CI/test_evaluation.py index ed156978d4c..61d0598ad3f 100644 --- a/tests/CI/test_evaluation.py +++ b/tests/CI/test_evaluation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import unittest @@ -90,7 +104,7 @@ def test_cnn_daily(self): model=model, tokenizer_name="t5-small", batch_size=1, limit=5 ) self.assertEqual(results["rouge2"], 9.5858) - + def test_evaluate_for_ort_Seq2SeqLM(self): from intel_extension_for_transformers.llm.evaluation.lm_eval import evaluate cmd = 'optimum-cli export onnx --model hf-internal-testing/tiny-random-t5 --task text2text-generation-with-past t5-past/' @@ -184,7 +198,7 @@ def test_evaluate_for_ort_CasualLM(self): model_format="onnx" ) self.assertEqual(results["results"]["piqa"]["acc"], 0.6) - + # test evaluate model exported with optimum >= 1.14.0 if Version(optimum.version.__version__) >= OPTIMUM114_VERSION: cmd = 'optimum-cli export onnx --model hf-internal-testing/tiny-random-gptj --task text-generation-with-past gptj-past/' diff --git a/tests/CI/test_evaluation_bigcode.py b/tests/CI/test_evaluation_bigcode.py index d87d17a5d79..d14923659d1 100644 --- a/tests/CI/test_evaluation_bigcode.py +++ b/tests/CI/test_evaluation_bigcode.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import unittest @@ -8,7 +22,7 @@ class TestLmEvaluationHarness(unittest.TestCase): @classmethod def setUpClass(self): - self.starcoder = AutoModelForCausalLM.from_pretrained("bigcode/tiny_starcoder_py") + self.starcoder = AutoModelForCausalLM.from_pretrained("bigcode/tiny_starcoder_py") cmd = 'pip install git+https://github.com/bigcode-project/bigcode-evaluation-harness.git@0d84db85f9ff971fa23a187a3347b7f59af288dc' p = subprocess.Popen(cmd, preexec_fn=os.setsid, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) # nosec diff --git a/tests/CI/test_llava.py b/tests/CI/test_llava.py index a621e8fa0bb..cd5e9df809e 100644 --- a/tests/CI/test_llava.py +++ b/tests/CI/test_llava.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import unittest from intel_extension_for_transformers.transformers.modeling.llava_models import LlavaMistralForCausalLM diff --git a/tests/CI/test_modeling.py b/tests/CI/test_modeling.py index f8c2914d0a8..8b0d324db1b 100644 --- a/tests/CI/test_modeling.py +++ b/tests/CI/test_modeling.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy import os import shutil diff --git a/tests/CI/test_neural_engine.py b/tests/CI/test_neural_engine.py index b7f2bbac107..42fada36ef7 100644 --- a/tests/CI/test_neural_engine.py +++ b/tests/CI/test_neural_engine.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from intel_extension_for_transformers.llm.runtime.deprecated.compile import compile from intel_extension_for_transformers.llm.runtime.deprecated.compile.ops.tensor import Tensor from intel_extension_for_transformers.llm.runtime.deprecated.compile.graph import Graph @@ -29,7 +43,7 @@ def test_Bert_Mini_int8_Onnx_Neural_Engine(self): "The onnx model was not successfully downloaded, therefore test may cannot run" ) return - + model = compile(filename) input_0 = np.random.randint(0, 384, (1, 32)).reshape(1, 32) input_1 = np.random.randint(1, 2, (1, 32)).reshape(1, 32) @@ -53,11 +67,11 @@ def test_torch_model_Neural_Engine(self): newgraph = Graph() newgraph.graph_init(file_name + '/conf.yaml', file_name + '/model.bin') out = newgraph.inference([example_in.numpy()]) - + np.testing.assert_almost_equal(ref_out, [*out.values()][0], decimal=5) os.remove('{}.pt'.format(file_name)) shutil.rmtree(file_name) - - + + if __name__ == "__main__": unittest.main() diff --git a/tests/CI/test_pipeline.py b/tests/CI/test_pipeline.py index 356ea777b6b..bc3a0ca629d 100644 --- a/tests/CI/test_pipeline.py +++ b/tests/CI/test_pipeline.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import unittest import shutil diff --git a/tests/CI/test_ppo.py b/tests/CI/test_ppo.py index 617db562625..b4e5b112804 100644 --- a/tests/CI/test_ppo.py +++ b/tests/CI/test_ppo.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import unittest diff --git a/tests/CI/test_pytorch_pruner.py b/tests/CI/test_pytorch_pruner.py index 5deea2c487e..88264da15f9 100644 --- a/tests/CI/test_pytorch_pruner.py +++ b/tests/CI/test_pytorch_pruner.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import sys import unittest diff --git a/tests/CI/test_quantization.py b/tests/CI/test_quantization.py index 0348c88bfef..43c0274b150 100644 --- a/tests/CI/test_quantization.py +++ b/tests/CI/test_quantization.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from ast import LShift from math import isclose import numpy as np @@ -378,7 +392,7 @@ def test_quantization_for_llm(self): ) # amp - amp_config = MixedPrecisionConfig() + amp_config = MixedPrecisionConfig() amp_model = AutoModelForCausalLM.from_pretrained(model_name_or_path, quantization_config=amp_config, use_llm_runtime=False diff --git a/tests/CI/test_quantization_qa_ipex.py b/tests/CI/test_quantization_qa_ipex.py index fe33823d435..67e75a45bd5 100644 --- a/tests/CI/test_quantization_qa_ipex.py +++ b/tests/CI/test_quantization_qa_ipex.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import sys import torch @@ -43,7 +57,7 @@ def test_run_qa_ipex(self): run_qa.main() int8_model = OptimizedModel.from_pretrained("./tmp/squad_output") self.assertTrue(isinstance(int8_model, torch.jit.ScriptModule)) - + test_args = f""" run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking-finetuned-squad diff --git a/tests/CI/test_weight_only.py b/tests/CI/test_weight_only.py index f21b6acd326..07a7f953aab 100644 --- a/tests/CI/test_weight_only.py +++ b/tests/CI/test_weight_only.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy import os import torch diff --git a/tests/CI/test_weight_only_gpu.py b/tests/CI/test_weight_only_gpu.py index abe4448729c..4880507c52b 100644 --- a/tests/CI/test_weight_only_gpu.py +++ b/tests/CI/test_weight_only_gpu.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import torch import unittest diff --git a/tests/Nightly/requirements.txt b/tests/Nightly/requirements.txt index dc833dd4bef..50ef84425df 100644 --- a/tests/Nightly/requirements.txt +++ b/tests/Nightly/requirements.txt @@ -1 +1 @@ -../requirements.txt \ No newline at end of file +../requirements.txt diff --git a/tests/Nightly/test_autodistillation.py b/tests/Nightly/test_autodistillation.py index 1db02ef4947..6dcd1ee780d 100644 --- a/tests/Nightly/test_autodistillation.py +++ b/tests/Nightly/test_autodistillation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import torch diff --git a/tests/Nightly/test_carbon_calc.py b/tests/Nightly/test_carbon_calc.py index 1536a9303db..94b5aa6fae6 100644 --- a/tests/Nightly/test_carbon_calc.py +++ b/tests/Nightly/test_carbon_calc.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import unittest from unittest.mock import patch from intel_extension_for_transformers.utils.llm_carbon_calc import main @@ -8,16 +22,16 @@ def test_calc_with_inference_time(self): with patch("sys.argv", ["main", "-m", "27412.98", "--tdp", "350", "-c", "0.56", "-t", "6510.3"]): ret = main() assert ret == 0.0003575115963544682 - + def test_calc_with_token_latency(self): with patch("sys.argv", ["main", "-m", "27412.98", "--tdp", "350", "-c", "0.56", "--fl", "2284.75", "--nl", "136.31", "-n", "64"]): ret = main() assert ret == 0.0005970487041784186 - + def test_calc_with_missing_arg(self): with patch("sys.argv", ["main", "-m", "27412.98", "--tdp", "350", "-c", "0.56", "--fl", "2284.75", "-n", "64"]): ret = main() assert ret == 0.0 - + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/Nightly/test_distillation.py b/tests/Nightly/test_distillation.py index 505c2a35775..118c7bb4444 100644 --- a/tests/Nightly/test_distillation.py +++ b/tests/Nightly/test_distillation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy import mlflow import numpy as np diff --git a/tests/Nightly/test_dynamic_length.py b/tests/Nightly/test_dynamic_length.py index 8fd4c8c5fad..2395e429306 100644 --- a/tests/Nightly/test_dynamic_length.py +++ b/tests/Nightly/test_dynamic_length.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import onnx import shutil @@ -61,7 +75,7 @@ def __init__(self, labels=False, type=None): if labels: self.encoded_dict['start_positions'] = [21] self.encoded_dict['end_positions'] = [25] - + def __len__(self): return 1 @@ -109,17 +123,17 @@ def tearDownClass(self): def test_dynamic_inference(self): full_output = self.dynamic_trainer.predict(self.dummy_dataset).predictions - + dynamic_length_config = DynamicLengthConfig( const_rate=0.2, max_length=MAX_LENGTH ) self.dynamic_trainer.set_dynamic_config(dynamic_length_config) - + dynamic_output = self.dynamic_trainer.predict(self.dummy_dataset).predictions - + self.assertTrue((full_output[0] != dynamic_output[0]).any()) - + #check onnx self.dynamic_trainer.export_to_onnx('dynamic-model.onnx') self.assertTrue(check_onnx('dynamic-model.onnx', self.dynamic_trainer.get_eval_dataloader())) @@ -145,17 +159,17 @@ def tearDownClass(self): def test_dynamic_inference(self): full_output = self.dynamic_trainer.predict(self.dummy_dataset).predictions - + dynamic_length_config = DynamicLengthConfig( const_rate=0.2, max_length=MAX_LENGTH ) self.dynamic_trainer.set_dynamic_config(dynamic_length_config) - + dynamic_output = self.dynamic_trainer.predict(self.dummy_dataset).predictions - + self.assertTrue((full_output[0] != dynamic_output[0]).any()) - + #check onnx self.dynamic_trainer.export_to_onnx('dynamic-model.onnx') self.assertTrue(check_onnx('dynamic-model.onnx', self.dynamic_trainer.get_eval_dataloader())) @@ -267,7 +281,7 @@ def tearDownClass(self): def test_dynamic_training(self): - + dynamic_length_config = DynamicLengthConfig( evo_iter=EVO_ITER, population_size=POPULATION_SIZE, @@ -278,7 +292,7 @@ def test_dynamic_training(self): ) self.dynamic_trainer.set_dynamic_config(dynamic_config=dynamic_length_config) - self.dynamic_trainer.run_evolutionary_search() + self.dynamic_trainer.run_evolutionary_search() class TestEvolutionarySearchBert(unittest.TestCase): @classmethod @@ -308,7 +322,7 @@ def tearDownClass(self): def test_search(self): - + dynamic_length_config = DynamicLengthConfig( evo_iter=EVO_ITER, population_size=POPULATION_SIZE, @@ -329,13 +343,13 @@ def test_search_functions(self): subs = ('MACs','score','method') self.assertTrue( all(i in res for i in subs), msg='{0}'.format(res)) evo = Evolution(self.model,MAX_LENGTH, 'cpu', None, eval_metric='eval_loss') - + class TestSampleConfiguration(unittest.TestCase): - + def test_sample_length_config(self): - + no_drop_lc = tuple( MAX_LENGTH for _ in range(NUM_LAYERS)) lc = sample_length_configuration(MAX_LENGTH, NUM_LAYERS) @@ -367,13 +381,12 @@ def test_sample_length_config(self): self.assertTrue( len(length_conf) == NUM_LAYERS , msg='{0}, {1}'.format(length_conf, layer_conf)) - - + + if __name__ == "__main__": unittest.main() - diff --git a/tests/Nightly/test_llm_runtime.py b/tests/Nightly/test_llm_runtime.py index bf29068b5f7..6e69a82e7de 100644 --- a/tests/Nightly/test_llm_runtime.py +++ b/tests/Nightly/test_llm_runtime.py @@ -47,7 +47,7 @@ def test_llm_runtime(self): tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) inputs = tokenizer(prompt, return_tensors="pt") - + pt_logits = torch.load("/tf_dataset2/inc-ut/nlptoolkit_ut_model/llama2_pt_logits.pth")[:,-1] pt_generate_ids = torch.load("/tf_dataset2/inc-ut/nlptoolkit_ut_model/llama2_pt_generate_ids.pth")[0].tolist() print(tokenizer.decode(pt_generate_ids)) @@ -61,14 +61,14 @@ def test_llm_runtime(self): self.assertEqual(pt_generate_ids[i], itrex_generate_ids[i]) # check diff of logits - itrex_model = AutoModel.from_pretrained(model_name, load_in_4bit=True, + itrex_model = AutoModel.from_pretrained(model_name, load_in_4bit=True, use_llm_runtime=True, trust_remote_code=True) itrex_logits = itrex_model(inputs.input_ids) cmp = cmpData(pt_logits.detach().numpy().flatten(), itrex_logits.flatten()) print("load_in_4bit: ", cmp) self.assertTrue(cmp["diff2"] < 0.42) - itrex_model = AutoModel.from_pretrained(model_name, load_in_8bit=True, + itrex_model = AutoModel.from_pretrained(model_name, load_in_8bit=True, use_llm_runtime=True, trust_remote_code=True) itrex_logits = itrex_model(inputs.input_ids) cmp = cmpData(pt_logits.detach().numpy().flatten(), itrex_logits.flatten()) diff --git a/tests/Nightly/test_llm_smoothquant.py b/tests/Nightly/test_llm_smoothquant.py index 28c6bc9486c..2a303cba1be 100644 --- a/tests/Nightly/test_llm_smoothquant.py +++ b/tests/Nightly/test_llm_smoothquant.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from math import isclose from intel_extension_for_transformers.transformers import ( AutoModelForCausalLM, diff --git a/tests/Nightly/test_nas.py b/tests/Nightly/test_nas.py index 2f5ffcf6eb7..96bcfcec148 100644 --- a/tests/Nightly/test_nas.py +++ b/tests/Nightly/test_nas.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import shutil import torch diff --git a/tests/Nightly/test_orchestrate_optimization.py b/tests/Nightly/test_orchestrate_optimization.py index 32c56dc6358..422b10700a9 100644 --- a/tests/Nightly/test_orchestrate_optimization.py +++ b/tests/Nightly/test_orchestrate_optimization.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy import torch import numpy as np diff --git a/tests/Nightly/test_pruning.py b/tests/Nightly/test_pruning.py index e4cba056d3f..b7284ddfe6b 100644 --- a/tests/Nightly/test_pruning.py +++ b/tests/Nightly/test_pruning.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import copy import os import shutil diff --git a/tests/Nightly/test_tf_autodistillation.py b/tests/Nightly/test_tf_autodistillation.py index edb93f6943f..a7eeb2a4ccc 100644 --- a/tests/Nightly/test_tf_autodistillation.py +++ b/tests/Nightly/test_tf_autodistillation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import shutil import numpy as np import unittest diff --git a/tests/Nightly/test_tf_distillation.py b/tests/Nightly/test_tf_distillation.py index bbcd54c5c1b..d5521845439 100644 --- a/tests/Nightly/test_tf_distillation.py +++ b/tests/Nightly/test_tf_distillation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import shutil import numpy as np import unittest diff --git a/tests/Nightly/test_tf_pruning.py b/tests/Nightly/test_tf_pruning.py index d049109ccc1..5fa4806957a 100644 --- a/tests/Nightly/test_tf_pruning.py +++ b/tests/Nightly/test_tf_pruning.py @@ -1,8 +1,22 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from intel_extension_for_transformers.transformers.utils.utility_tf import get_filepath import numpy as np import os import shutil -import tensorflow as tf +import tensorflow as tf import unittest from datasets import load_dataset, load_metric from intel_extension_for_transformers.transformers import ( @@ -43,7 +57,7 @@ def preprocess_function(examples): data_collator = DefaultDataCollator(return_tensors="tf") dataset = raw_datasets.select(range(10)) self.dummy_dataset = dataset.to_tf_dataset( - columns=[col for col in dataset.column_names if col not in + columns=[col for col in dataset.column_names if col not in set(non_label_column_names + ["label"])], shuffle=False, batch_size=2, @@ -123,9 +137,9 @@ def eval_func(model): def train_func(model): return model - + self.optimizer.prune(pruning_config=pruning_conf, - train_func=train_func, + train_func=train_func, eval_func=eval_func) diff --git a/tests/Nightly/test_tf_quantization.py b/tests/Nightly/test_tf_quantization.py index 5e57fae9743..3162950c68a 100644 --- a/tests/Nightly/test_tf_quantization.py +++ b/tests/Nightly/test_tf_quantization.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np import os import shutil diff --git a/tests/README.md b/tests/README.md index 4908f82e97a..6023bef0f55 100644 --- a/tests/README.md +++ b/tests/README.md @@ -26,4 +26,4 @@ Note: it depend on IPEX 2.1.0 for GPU or newer. conda activate env_itrex_gpu cd tests python test_weight_only_gpu.py -``` \ No newline at end of file +``` diff --git a/tests/requirements.txt b/tests/requirements.txt index ada9fac1b7b..bb2ece4d96c 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,28 +1,28 @@ +--find-links https://download.pytorch.org/cpu/whl/torch_stable.html +accelerate +bitsandbytes datasets==2.14.7 +einops +evaluate +gguf +git+https://github.com/huggingface/optimum-intel.git +git+https://github.com/huggingface/optimum.git@927e94739447b13f7eefe085c8d3662654b6a11c +git+https://github.com/intel/neural-compressor.git +intel-extension-for-pytorch==2.1.0 +intel-tensorflow==2.13.0 mlflow +neural-speed nlpaug==1.1.9 -sacremoses -git+https://github.com/intel/neural-compressor.git onnx>=1.10 onnxruntime ---find-links https://download.pytorch.org/cpu/whl/torch_stable.html -torch==2.1.0 -transformers==4.36.2 -intel-tensorflow==2.13.0 -torchprofile -intel-extension-for-pytorch==2.1.0 -tokenizers -sentencepiece != 0.1.92 -accelerate -evaluate -wget -git+https://github.com/huggingface/optimum.git@927e94739447b13f7eefe085c8d3662654b6a11c -git+https://github.com/huggingface/optimum-intel.git peft==0.6.2 -tyro -bitsandbytes +sacremoses +sentencepiece != 0.1.92 tiktoken +tokenizers +torch==2.1.0 +torchprofile +transformers==4.36.2 transformers_stream_generator -einops -neural-speed -gguf \ No newline at end of file +tyro +wget diff --git a/workflows/chatbot/demo/advanced_frontend/README.md b/workflows/chatbot/demo/advanced_frontend/README.md index 02a84570892..259bafe7dc3 100644 --- a/workflows/chatbot/demo/advanced_frontend/README.md +++ b/workflows/chatbot/demo/advanced_frontend/README.md @@ -57,4 +57,4 @@ This project is to provide instructions and guidance on how to use OpenAI. Howev 1. OpenAI Policies: OpenAI may have its own policies and regulations, such as API usage limits, pricing plans, service agreements, etc. Please make sure you are aware of and comply with OpenAI's relevant policies to avoid any violations. -2. If you have any questions or issues related to OpenAI while using this service, we take no responsibility for them.. \ No newline at end of file +2. If you have any questions or issues related to OpenAI while using this service, we take no responsibility for them.. diff --git a/workflows/chatbot/demo/advanced_frontend/tsconfig.json b/workflows/chatbot/demo/advanced_frontend/tsconfig.json index 6ae0c8c44d0..d9ca65aa92a 100644 --- a/workflows/chatbot/demo/advanced_frontend/tsconfig.json +++ b/workflows/chatbot/demo/advanced_frontend/tsconfig.json @@ -10,8 +10,4 @@ "sourceMap": true, "strict": true } - // Path aliases are handled by https://kit.svelte.dev/docs/configuration#alias - // - // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes - // from the referenced tsconfig.json - TypeScript does not merge them in -} +} \ No newline at end of file diff --git a/workflows/chatbot/demo/basic_frontend/README.md b/workflows/chatbot/demo/basic_frontend/README.md index 75d1a19edd8..e4d907a66ba 100644 --- a/workflows/chatbot/demo/basic_frontend/README.md +++ b/workflows/chatbot/demo/basic_frontend/README.md @@ -20,4 +20,4 @@ For detailed information about the configuration settings, please refer to the [ To set up your application, copy the code files from this directory and configure them as needed. Alternatively, you can clone the existing space from [https://huggingface.co/spaces/Intel/NeuralChat-MPT](https://huggingface.co/spaces/Intel/NeuralChat-MPT). ->**Note**: Please use Gradio version 3.36.0. \ No newline at end of file +>**Note**: Please use Gradio version 3.36.0. diff --git a/workflows/chatbot/demo/basic_frontend/app.py b/workflows/chatbot/demo/basic_frontend/app.py index a8a8c6b184c..41d883ed0ba 100644 --- a/workflows/chatbot/demo/basic_frontend/app.py +++ b/workflows/chatbot/demo/basic_frontend/app.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse from collections import defaultdict import datetime @@ -431,11 +445,11 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request: .user, .bot { width: 80% !important; - + } .bot { - white-space: pre-wrap !important; + white-space: pre-wrap !important; line-height: 1.3 !important; display: flex; flex-direction: column; @@ -451,7 +465,7 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request: #btn-list-style { background: #eee0; border: 1px solid #0053f4; -} +} .title { font-size: 1.5rem; @@ -512,7 +526,7 @@ def http_bot(state, model_selector, temperature, max_new_tokens, topk, request: def build_single_model_ui(models): - + notice_markdown = """
Large Language Model

4th Gen Intel® Xeon® with Intel® AMX

- +
""" @@ -530,7 +544,7 @@ def build_single_model_ui(models):

- + """ state = gr.State() diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/__init__.py b/workflows/chatbot/demo/basic_frontend/fastchat/__init__.py index d31c31eaeb0..4ce1bb159be 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/__init__.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/__init__.py @@ -1 +1,15 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + __version__ = "0.2.3" diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/client/__init__.py b/workflows/chatbot/demo/basic_frontend/fastchat/client/__init__.py index ff1f3f146bb..a0925aec00f 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/client/__init__.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/client/__init__.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from fastchat.client.api import ChatCompletion, set_baseurl __all__ = ["ChatCompletion", "set_baseurl"] diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/client/api.py b/workflows/chatbot/demo/basic_frontend/fastchat/client/api.py index 0e1eb773435..39c74c4c887 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/client/api.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/client/api.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Dict, List, Optional import asyncio import os diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/client/test_client.py b/workflows/chatbot/demo/basic_frontend/fastchat/client/test_client.py index a04197532d4..a81095911ae 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/client/test_client.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/client/test_client.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from fastchat import client completion = client.ChatCompletion.create( diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/constants.py b/workflows/chatbot/demo/basic_frontend/fastchat/constants.py index 70294c04e70..c1b01b3c316 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/constants.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/constants.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + CONTROLLER_HEART_BEAT_EXPIRATION = 90 WORKER_HEART_BEAT_INTERVAL = 30 diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/conversation.py b/workflows/chatbot/demo/basic_frontend/fastchat/conversation.py index f7fab977a9e..aea84131807 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/conversation.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/conversation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Conversation prompt template. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/__init__.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/__init__.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/alpaca-converter.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/alpaca-converter.py index 392ed2c2bea..47b539bbb74 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/alpaca-converter.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/alpaca-converter.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse import json import pathlib diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/clean_sharegpt.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/clean_sharegpt.py index 224b0a47007..a80b97e7d2a 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/clean_sharegpt.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/clean_sharegpt.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ - Convert html to markdown with basic data cleaning. - Deduplication. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/hardcoded_questions.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/hardcoded_questions.py index 7abaff17693..c17b99b2932 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/hardcoded_questions.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/hardcoded_questions.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/inspect.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/inspect.py index c59e6238c3b..6911feded87 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/inspect.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/inspect.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Usage: python3 -m fastchat.data.inspect --in sharegpt_20230322_clean_lang_split.json diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/merge.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/merge.py index ea5b8a93b38..57b8b1f0f2b 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/merge.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/merge.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Merge two conversation files into one diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/optional_clean.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/optional_clean.py index bbdfb67932f..74fb6474c6d 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/optional_clean.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/optional_clean.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Do optional cleaning (e.g., remove some languages). diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/pretty_json.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/pretty_json.py index 426fadc2dd8..796a2a8ff41 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/pretty_json.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/pretty_json.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Usage: python3 pretty_json.py --in in.json --out out.json diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/sample.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/sample.py index b53df6a67d5..dc323559ce2 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/sample.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/sample.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Sample some conversations from a file. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/data/split_long_conversation.py b/workflows/chatbot/demo/basic_frontend/fastchat/data/split_long_conversation.py index 9362a922833..f63d6e034aa 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/data/split_long_conversation.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/data/split_long_conversation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Split long conversations based on certain max length. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/eval/eval_gpt_review.py b/workflows/chatbot/demo/basic_frontend/fastchat/eval/eval_gpt_review.py index 890bca730a1..c5c130aca0b 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/eval/eval_gpt_review.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/eval/eval_gpt_review.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse import json import os diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/eval/generate_webpage_data_from_table.py b/workflows/chatbot/demo/basic_frontend/fastchat/eval/generate_webpage_data_from_table.py index e24175aa588..c26b7728de0 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/eval/generate_webpage_data_from_table.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/eval/generate_webpage_data_from_table.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Generate json file for webpage.""" import json import os diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/eval/get_model_answer.py b/workflows/chatbot/demo/basic_frontend/fastchat/eval/get_model_answer.py index 2e9ba0bd670..0bf5eba0b8f 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/eval/get_model_answer.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/eval/get_model_answer.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM import torch diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/eval/qa_baseline_gpt35.py b/workflows/chatbot/demo/basic_frontend/fastchat/eval/qa_baseline_gpt35.py index f0f9f5fbc9a..924370ab8a7 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/eval/qa_baseline_gpt35.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/eval/qa_baseline_gpt35.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Generate answers with GPT-3.5""" # Note: you need to be using OpenAI Python v0.27.0 for the code below to work import argparse diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/eval/requirements.txt b/workflows/chatbot/demo/basic_frontend/fastchat/eval/requirements.txt index c2490e15ead..4a47981acfa 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/eval/requirements.txt +++ b/workflows/chatbot/demo/basic_frontend/fastchat/eval/requirements.txt @@ -1,2 +1,2 @@ +ray shortuuid -ray \ No newline at end of file diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/eval/script/run_model_qa.yaml b/workflows/chatbot/demo/basic_frontend/fastchat/eval/script/run_model_qa.yaml index 64e36560e60..d94f919eaac 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/eval/script/run_model_qa.yaml +++ b/workflows/chatbot/demo/basic_frontend/fastchat/eval/script/run_model_qa.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + resources: accelerators: A100:4 cloud: gcp diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/model/__init__.py b/workflows/chatbot/demo/basic_frontend/fastchat/model/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/model/__init__.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/model/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_delta.py b/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_delta.py index 67a1c590d41..964f4dc400c 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_delta.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_delta.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Apply the delta weights on top of a base model. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_lora.py b/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_lora.py index 870e64a3b4b..184a319a079 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_lora.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/model/apply_lora.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Apply the LoRA weights on top of a base model. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/model/convert_fp16.py b/workflows/chatbot/demo/basic_frontend/fastchat/model/convert_fp16.py index efc40aa83bf..bce2e8d574b 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/model/convert_fp16.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/model/convert_fp16.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Usage: python3 -m fastchat.model.convert_fp16 --in in-folder --out out-folder diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/model/make_delta.py b/workflows/chatbot/demo/basic_frontend/fastchat/model/make_delta.py index ebaa2db62e5..d04f6a6d2f4 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/model/make_delta.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/model/make_delta.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Make the delta weights by subtracting base weights. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/protocol/chat_completion.py b/workflows/chatbot/demo/basic_frontend/fastchat/protocol/chat_completion.py index 6acf682dc80..dbe45e90127 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/protocol/chat_completion.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/protocol/chat_completion.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import Optional, List, Dict, Any import time diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/__init__.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/__init__.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/api.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/api.py index a5aeb579e5a..92efd219ea3 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/api.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/api.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """This module provides a ChatGPT-compatible Restful API for chat completion. Usage: diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/cacheflow_worker.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/cacheflow_worker.py index f83b2598ec2..ed0828a14ab 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/cacheflow_worker.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/cacheflow_worker.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ A model worker executes the model based on Cacheflow. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/cli.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/cli.py index d501338ff63..7a9deea3de8 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/cli.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/cli.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Chat with a model with command line interface. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/compression.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/compression.py index 2c1dafd3907..9663eb4e26f 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/compression.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/compression.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import dataclasses import gc import glob @@ -10,7 +24,7 @@ import torch.nn as nn from torch.nn import functional as F from tqdm import tqdm -from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig +from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig @dataclasses.dataclass diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/controller.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/controller.py index d46eaa29b97..cecff95572e 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/controller.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/controller.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ A controller manages distributed workers. It sends worker addresses to clients. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_anony.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_anony.py index a2e71218743..f6f4e8c46e3 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_anony.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_anony.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import time @@ -240,7 +254,7 @@ def http_bot_all( def build_side_by_side_ui_anony(models): notice_markdown = """ -# ⚔️ Chatbot Arena ⚔️ +# ⚔️ Chatbot Arena ⚔️ Rules: - Chat with two anonymous models side-by-side and vote for which one is better! - The names of the models will be revealed after your vote. @@ -400,5 +414,3 @@ def build_side_by_side_ui_anony(models): button_row2, parameter_row, ) - - diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_named.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_named.py index 0db977bad88..f732157ac6e 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_named.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_block_arena_named.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import json import time @@ -223,7 +237,7 @@ def http_bot_all( def build_side_by_side_ui_named(models): notice_markdown = """ -# ⚔️ Chatbot Arena ⚔️ +# ⚔️ Chatbot Arena ⚔️ Rules: - Chat with two models side-by-side and vote for which one is better! - You pick the models you want to chat with. @@ -392,4 +406,3 @@ def build_side_by_side_ui_named(models): button_row2, parameter_row, ) - diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_css.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_css.py index 71d79b4a4b5..7416edd771e 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_css.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_css.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + code_highlight_css = """ #chatbot .hll { background-color: #ffffcc } #chatbot .c { color: #408080; font-style: italic } diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_patch.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_patch.py index af8731da17d..06713f51679 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_patch.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_patch.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Adopted from https://github.com/gradio-app/gradio/blob/main/gradio/components.py Fix a markdown render problem. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server.py index ef983254cad..cbeb8b8be4f 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse from collections import defaultdict import datetime diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server_multi.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server_multi.py index ad96fcc63cc..6b919267b13 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server_multi.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/gradio_web_server_multi.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse import gradio as gr diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/huggingface_api.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/huggingface_api.py index 9dd4ea466d7..6ff1348bb53 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/huggingface_api.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/huggingface_api.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Usage: python3 -m fastchat.serve.huggingface_api --model ~/model_weights/vicuna-7b/ diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/inference.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/inference.py index d4cbf2a34af..6f265555c87 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/inference.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/inference.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Inference for FastChat models.""" import abc from typing import Optional @@ -106,7 +120,7 @@ def load_model( replace_llama_attn_with_non_inplace_operations() else: raise ValueError(f"Invalid device: {device}") - + if load_8bit: if num_gpus != 1 and num_gpus != "1": warnings.warn("8-bit quantization is not supported for multi-gpu inference.") diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/model_worker.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/model_worker.py index 65aa2b726fd..8633ec73983 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/model_worker.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/model_worker.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ A model worker executes the model. """ @@ -252,7 +266,7 @@ async def api_get_status(request: Request): if args.num_gpus and len(args.gpus.split(",")) < int(args.num_gpus): raise ValueError(f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!") os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus - + worker = ModelWorker( args.controller_address, args.worker_address, diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/monkey_patch_non_inplace.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/monkey_patch_non_inplace.py index 9661d707512..f18d3121f33 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/monkey_patch_non_inplace.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/monkey_patch_non_inplace.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Monkey patch the llama implementation in the huggingface/transformers library. Avoid bugs in mps backend by not using in-place operations. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/register_worker.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/register_worker.py index 2c2c40295e0..f1e816d1f2a 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/register_worker.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/register_worker.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Manually register workers. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/serve_chatglm.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/serve_chatglm.py index 7b7745625d9..7c86e12394c 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/serve_chatglm.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/serve_chatglm.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch from typing import List, Tuple diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_message.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_message.py index ef2a2e36ec0..4a0cabe8aed 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_message.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_message.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import argparse import json diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_throughput.py b/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_throughput.py index 9cc5f45c7e0..254555bfd85 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_throughput.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/serve/test_throughput.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Benchmarking script to test the throughput of serving workers.""" import argparse import json diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/train/llama_flash_attn_monkey_patch.py b/workflows/chatbot/demo/basic_frontend/fastchat/train/llama_flash_attn_monkey_patch.py index 00fc39edff8..83045aab482 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/train/llama_flash_attn_monkey_patch.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/train/llama_flash_attn_monkey_patch.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from typing import List, Optional, Tuple import torch diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/train/train_mem.py b/workflows/chatbot/demo/basic_frontend/fastchat/train/train_mem.py index e4b33528482..5be0e634e56 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/train/train_mem.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/train/train_mem.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Make it more memory efficient by monkey patching the LLaMA model with FlashAttn. # Need to call this before importing transformers. diff --git a/workflows/chatbot/demo/basic_frontend/fastchat/utils.py b/workflows/chatbot/demo/basic_frontend/fastchat/utils.py index 9a6d7bffb16..8f275bc3068 100644 --- a/workflows/chatbot/demo/basic_frontend/fastchat/utils.py +++ b/workflows/chatbot/demo/basic_frontend/fastchat/utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging import logging.handlers import os diff --git a/workflows/chatbot/demo/basic_frontend/requirements.txt b/workflows/chatbot/demo/basic_frontend/requirements.txt index f2b6c5fb339..55cb9823fe4 100644 --- a/workflows/chatbot/demo/basic_frontend/requirements.txt +++ b/workflows/chatbot/demo/basic_frontend/requirements.txt @@ -1,9 +1,9 @@ -pip -torch diffusers==0.8.1 -transformers -requests +gradio huggingface_hub markdown2 nh3 -gradio +pip +requests +torch +transformers diff --git a/workflows/chatbot/demo/chatcli/__init__.py b/workflows/chatbot/demo/chatcli/__init__.py index 407b29e53be..18896e7b549 100644 --- a/workflows/chatbot/demo/chatcli/__init__.py +++ b/workflows/chatbot/demo/chatcli/__init__.py @@ -14,4 +14,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - diff --git a/workflows/chatbot/demo/chatcli/model.py b/workflows/chatbot/demo/chatcli/model.py index 6e8754546bc..77d291c1cb2 100644 --- a/workflows/chatbot/demo/chatcli/model.py +++ b/workflows/chatbot/demo/chatcli/model.py @@ -97,7 +97,7 @@ def __init__(self, endpoint: str, model: str, user: str, bot: str): self.model = model self.user = user self.bot = bot - + def prompt(self): prompt = "" for ctx in self.ctx: @@ -106,7 +106,7 @@ def prompt(self): def __headers__(self): return {'Content-Type': 'application/json'} - + def __data__(self): return json.dumps({ "model": self.model, @@ -123,7 +123,7 @@ def __stream__(self, chunk, newctx) -> Tuple[str, bool]: if not text.startswith(prompt): return ("", True) - + word = text.removeprefix(prompt).removeprefix(newctx["content"]) done = word.find('\n') != -1 word = word[:word.find('\n')] diff --git a/workflows/chatbot/demo/docker/README.md b/workflows/chatbot/demo/docker/README.md index 82bc887b3e1..e45f9f44e2b 100644 --- a/workflows/chatbot/demo/docker/README.md +++ b/workflows/chatbot/demo/docker/README.md @@ -94,4 +94,3 @@ cd /itrex/workflows/chatbot/inference/backend/sd/ ``` nohup bash run.sh & ``` - diff --git a/workflows/chatbot/fine_tuning/instruction_generator/data_generation.md b/workflows/chatbot/fine_tuning/instruction_generator/data_generation.md index ee0f5222e72..5ca955b0c3f 100644 --- a/workflows/chatbot/fine_tuning/instruction_generator/data_generation.md +++ b/workflows/chatbot/fine_tuning/instruction_generator/data_generation.md @@ -113,4 +113,4 @@ Below are some examples that are generated by our method, "correct": "Popular AI models can achieve up to 19x faster performance speeds when moving from 3rd Gen to 4th Gen Intel Xeon processors using Intel AMX.", "fake": "Popular AI models experience a dramatic decrease of up to 80% in performance speeds when moving from 3rd Gen to 4th Gen Intel Xeon processors using Intel AMX." }, -``` \ No newline at end of file +``` diff --git a/workflows/chatbot/fine_tuning/instruction_tuning_pipeline/finetune_seq2seq.py b/workflows/chatbot/fine_tuning/instruction_tuning_pipeline/finetune_seq2seq.py index f96e79e88a2..7af300a8c2e 100644 --- a/workflows/chatbot/fine_tuning/instruction_tuning_pipeline/finetune_seq2seq.py +++ b/workflows/chatbot/fine_tuning/instruction_tuning_pipeline/finetune_seq2seq.py @@ -57,4 +57,4 @@ def main(): finetune_model(finetune_cfg) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/workflows/chatbot/fine_tuning/requirements.txt b/workflows/chatbot/fine_tuning/requirements.txt index 927c9fc1773..8bdfbb97ccb 100644 --- a/workflows/chatbot/fine_tuning/requirements.txt +++ b/workflows/chatbot/fine_tuning/requirements.txt @@ -1,9 +1,9 @@ datasets -torch -transformers>=4.32.0 -sentencepiece -peft +einops evaluate nltk +peft rouge_score -einops +sentencepiece +torch +transformers>=4.32.0 diff --git a/workflows/chatbot/inference/__init__.py b/workflows/chatbot/inference/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/chatbot/inference/__init__.py +++ b/workflows/chatbot/inference/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/inference/backend/chat/README.md b/workflows/chatbot/inference/backend/chat/README.md index 4f00097a50a..e1a04df3217 100644 --- a/workflows/chatbot/inference/backend/chat/README.md +++ b/workflows/chatbot/inference/backend/chat/README.md @@ -81,4 +81,4 @@ nohup bash run_itrex.sh & pip install speechbrain pip install soundfile pip install pydub -``` \ No newline at end of file +``` diff --git a/workflows/chatbot/inference/backend/chat/__init__.py b/workflows/chatbot/inference/backend/chat/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/chatbot/inference/backend/chat/__init__.py +++ b/workflows/chatbot/inference/backend/chat/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/inference/backend/chat/asr.py b/workflows/chatbot/inference/backend/chat/asr.py index 88d005511ee..9757b809800 100644 --- a/workflows/chatbot/inference/backend/chat/asr.py +++ b/workflows/chatbot/inference/backend/chat/asr.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch import intel_extension_for_pytorch as ipex from transformers import WhisperForConditionalGeneration, WhisperProcessor diff --git a/workflows/chatbot/inference/backend/chat/compression.py b/workflows/chatbot/inference/backend/chat/compression.py index a839738aef4..d6bb42d429f 100644 --- a/workflows/chatbot/inference/backend/chat/compression.py +++ b/workflows/chatbot/inference/backend/chat/compression.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import dataclasses import torch diff --git a/workflows/chatbot/inference/backend/chat/constants.py b/workflows/chatbot/inference/backend/chat/constants.py index 5a16e9f1b83..85b98ab405f 100644 --- a/workflows/chatbot/inference/backend/chat/constants.py +++ b/workflows/chatbot/inference/backend/chat/constants.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + CONTROLLER_HEART_BEAT_EXPIRATION = 2 * 60 WORKER_HEART_BEAT_INTERVAL = 30 diff --git a/workflows/chatbot/inference/backend/chat/controller.py b/workflows/chatbot/inference/backend/chat/controller.py index 383f853847b..f078f34f3e5 100644 --- a/workflows/chatbot/inference/backend/chat/controller.py +++ b/workflows/chatbot/inference/backend/chat/controller.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ A controller manages distributed workers. It sends worker addresses to clients. diff --git a/workflows/chatbot/inference/backend/chat/conversation.py b/workflows/chatbot/inference/backend/chat/conversation.py index 69f07c99557..dd6f232d514 100644 --- a/workflows/chatbot/inference/backend/chat/conversation.py +++ b/workflows/chatbot/inference/backend/chat/conversation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Conversation prompt templates. """ @@ -818,4 +832,4 @@ def get_default_conv_template(model_path: str) -> Conversation: conv.append_message(conv.roles[1], "Hi!") conv.append_message(conv.roles[0], "How are you?") conv.append_message(conv.roles[1], None) - print(conv.get_prompt()) \ No newline at end of file + print(conv.get_prompt()) diff --git a/workflows/chatbot/inference/backend/chat/inference.py b/workflows/chatbot/inference/backend/chat/inference.py index 43f1b59e1ba..13075837cef 100644 --- a/workflows/chatbot/inference/backend/chat/inference.py +++ b/workflows/chatbot/inference/backend/chat/inference.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Inference for LLM models.""" import abc import torch @@ -71,7 +85,7 @@ def forward(self, input_ids, attention_mask=None, past_key_values=None): # print("ipex-bf16") with torch.cpu.amp.autocast(enabled=True, dtype=torch.bfloat16): out = self.model( - input_ids, + input_ids, attention_mask=attention_mask, past_key_values=past_key_values, use_cache=True) @@ -254,7 +268,7 @@ def generate_stream(model, model_name, tokenizer, params, device, # torch.manual_seed(100) token = int(torch.multinomial(probabilities, 1)) - + output_ids.append(token) diff --git a/workflows/chatbot/inference/backend/chat/model_worker.py b/workflows/chatbot/inference/backend/chat/model_worker.py index 9d78fc56c6e..39ab38d1721 100644 --- a/workflows/chatbot/inference/backend/chat/model_worker.py +++ b/workflows/chatbot/inference/backend/chat/model_worker.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ A model worker executes the model. """ @@ -12,7 +26,7 @@ import uuid from fastapi import FastAPI, Request, BackgroundTasks -from fastapi.responses import StreamingResponse, PlainTextResponse +from fastapi.responses import StreamingResponse, PlainTextResponse import requests from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer, GenerationConfig, StoppingCriteria, StoppingCriteriaList import torch diff --git a/workflows/chatbot/inference/backend/chat/register_worker.py b/workflows/chatbot/inference/backend/chat/register_worker.py index 819a6a85dd0..569f86b372b 100644 --- a/workflows/chatbot/inference/backend/chat/register_worker.py +++ b/workflows/chatbot/inference/backend/chat/register_worker.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """ Manually register workers. diff --git a/workflows/chatbot/inference/backend/chat/run_ipex.sh b/workflows/chatbot/inference/backend/chat/run_ipex.sh index 0dd3b87b7ba..a92927e0197 100644 --- a/workflows/chatbot/inference/backend/chat/run_ipex.sh +++ b/workflows/chatbot/inference/backend/chat/run_ipex.sh @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Kill the exist and re-run ps -ef |grep 'controller' |awk '{print $2}' |xargs kill -9 ps -ef |grep 'model_worker' |awk '{print $2}' |xargs kill -9 diff --git a/workflows/chatbot/inference/backend/chat/run_itrex.sh b/workflows/chatbot/inference/backend/chat/run_itrex.sh index 9aceeb2f1c4..1c0bfc1069e 100644 --- a/workflows/chatbot/inference/backend/chat/run_itrex.sh +++ b/workflows/chatbot/inference/backend/chat/run_itrex.sh @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Kill the exist and re-run ps -ef |grep 'controller' |awk '{print $2}' |xargs kill -9 ps -ef |grep 'model_worker' |awk '{print $2}' |xargs kill -9 diff --git a/workflows/chatbot/inference/backend/chat/tts.py b/workflows/chatbot/inference/backend/chat/tts.py index c713cfe7637..8b1bb3b1586 100644 --- a/workflows/chatbot/inference/backend/chat/tts.py +++ b/workflows/chatbot/inference/backend/chat/tts.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, set_seed from datasets import load_dataset, Audio, Dataset, Features, ClassLabel import os diff --git a/workflows/chatbot/inference/backend/chat/utils.py b/workflows/chatbot/inference/backend/chat/utils.py index 0964a1bf157..757ef2025ad 100644 --- a/workflows/chatbot/inference/backend/chat/utils.py +++ b/workflows/chatbot/inference/backend/chat/utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import datetime import logging import logging.handlers diff --git a/workflows/chatbot/inference/backend/database/db_config.py b/workflows/chatbot/inference/backend/database/db_config.py index 41a4e516af1..0190607d82c 100644 --- a/workflows/chatbot/inference/backend/database/db_config.py +++ b/workflows/chatbot/inference/backend/database/db_config.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from dotenv import load_dotenv @@ -37,4 +51,3 @@ class Settings(BaseSettings): def get_settings() -> BaseSettings: # logger.info("Loading config settings from the environment...") return Settings() - diff --git a/workflows/chatbot/inference/backend/database/mysqldb.py b/workflows/chatbot/inference/backend/database/mysqldb.py index a67c5d9cf02..db3a0a0bf6e 100644 --- a/workflows/chatbot/inference/backend/database/mysqldb.py +++ b/workflows/chatbot/inference/backend/database/mysqldb.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from db_config import get_settings from pymysql import connect diff --git a/workflows/chatbot/inference/backend/fastrag/doc_index.py b/workflows/chatbot/inference/backend/fastrag/doc_index.py index 4a39b7d57f0..a88f038f4e4 100644 --- a/workflows/chatbot/inference/backend/fastrag/doc_index.py +++ b/workflows/chatbot/inference/backend/fastrag/doc_index.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import argparse from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings @@ -357,4 +371,3 @@ def load_pdf(pdf_path): document_store.save(index_path="my_index.faiss") else: print("in memory db is done") - diff --git a/workflows/chatbot/inference/backend/fastrag/embedding_xlsx.py b/workflows/chatbot/inference/backend/fastrag/embedding_xlsx.py index 5b4cb65883b..e942e84ae99 100644 --- a/workflows/chatbot/inference/backend/fastrag/embedding_xlsx.py +++ b/workflows/chatbot/inference/backend/fastrag/embedding_xlsx.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings from langchain.vectorstores import Chroma diff --git a/workflows/chatbot/inference/backend/fastrag/fastrag_service.py b/workflows/chatbot/inference/backend/fastrag/fastrag_service.py index 43def493810..bb083862df3 100644 --- a/workflows/chatbot/inference/backend/fastrag/fastrag_service.py +++ b/workflows/chatbot/inference/backend/fastrag/fastrag_service.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from haystack.document_stores import InMemoryDocumentStore,ElasticsearchDocumentStore from fastapi import FastAPI, Request, BackgroundTasks @@ -150,7 +164,7 @@ def ask_gm_documents_sparse_embedding(folder_path, process_content=False): with tempfile.TemporaryDirectory(dir="/tmp/my_subdirectory") as temp_dir: english_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large") chinese_embeddings = HuggingFaceInstructEmbeddings(model_name="shibing624/text2vec-base-chinese") - + young_pat_vectordb = Chroma(persist_directory=temp_dir, embedding_function=english_embeddings) young_pat_dense_retriever = young_pat_vectordb.as_retriever(search_type="mmr", @@ -664,5 +678,3 @@ def _format_filters(filters): put("test","test") uvicorn.run(app, host=args.host, port=args.port, log_level="info") - - diff --git a/workflows/chatbot/inference/backend/fastrag/inc_document.py b/workflows/chatbot/inference/backend/fastrag/inc_document.py index a15db1c8383..bdbc14b8bf4 100644 --- a/workflows/chatbot/inference/backend/fastrag/inc_document.py +++ b/workflows/chatbot/inference/backend/fastrag/inc_document.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + inc_examples = [{"doc": "## FX Introduction\nFX is a PyTorch toolkit for developers to use to transform nn.Module instance. FX consists of three main components: a symbolic tracer, an intermediate representation, and Python code generation.\n\nWith converted torch.fx.GraphModule, we can resolve three problems in quantization:\n1. Automatically insert quant/dequant operation within PyTorch.\n2. Use FloatFunctional to wrap tensor operations that require special handling for quantization into modules. Examples are operations like add and cat which require special handling to determine output quantization parameters.\n3. Fuse modules: combine operations/modules into a single module to obtain higher accuracy and performance. This is done using the fuse_modules() API, which takes in lists of modules to be fused. We currently support the following fusions: [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu].\n\nFor detailed description, please refer to [PyTorch FX](https://pytorch.org/docs/stable/fx.html) and [FX Graph Mode Quantization](https://pytorch.org/docs/master/quantization.html#prototype-fx-graph-mode-quantization)", "doc_id": 0}, {"doc": "## FX Mode Support Matrix in Neural Compressor\n\n|quantization |FX |\n|-----------------------|:-----------:|\n|Static Quantization |✔ |\n|Dynamic Quantization |✔ |\n|Quantization-Aware Training |✔ |", "doc_id": 1}, {"doc": "## Get Start with FX in Neural Compressor\n\n**Note:** \"backend\" field indicates the backend used by the user in configure. And the \"default\" value means it will quantization model with fx backend for PyTorch model.\n\n### Post Training Static Quantization\n\n```\n from neural_compressor import quantization, PostTrainingQuantConfig\n conf = PostTrainingQuantConfig(backend=\"default\")\n model.eval()\n q_model = quantization.fit(model, conf, calib_dataloader=dataloader, eval_func=eval_func)\n q_model.save(\"save/to/path\")\n```", "doc_id": 2}, @@ -201,4 +215,3 @@ {"doc": "# Intel Neural Compressor User YAML Configuration Files\n## Introduction\n\nIntel\u00ae Neural Compressor uses YAML files for quick \nand user-friendly configurations. There are two types of YAML files - \nuser YAML files and framework YAML files, which are used in \nrunning user cases and setting up framework capabilities, respectively.\n\nFirst, let's take a look at a user YAML file, It defines the model, tuning\nstrategies, tuning calibrations and evaluations, and performance benchmarking\nof the passing model vs. original model.", "doc_id": 200}, {"doc": "# Intel Neural Compressor User YAML Configuration Files\n## Supported Feature Matrix\n\n| Optimization Techniques | YAML Configuration Files |\n|-------------------------|:------------------------:|\n| Quantization | ✔ |\n| Pruning | ✔ |\n| Distillation | ✔ |", "doc_id": 201}, {"doc": "# Intel Neural Compressor User YAML Configuration Files\n## Get started with User YAML Files\nA complete user YAML file is organized logically into several sections: \n* ***model***: The model specifications define a user model's name, inputs, outputs and framework.\n\n```yaml\nmodel: # mandatory. used to specify model specific information.\n name: mobilenet_v1 \n framework: tensorflow # mandatory. supported values are tensorflow, pytorch, pytorch_ipex, onnxrt_integer, onnxrt_qlinear or mxnet; allow new framework backend extension.\n inputs: image_tensor # optional. inputs field is only required in tensorflow.\n outputs: num_detections,detection_boxes,detection_scores,detection_classes # optional. outputs field is only required in tensorflow.\n```\n* ***quantization***: The quantization specifications define quantization tuning space and related calibrations. To calibrate, users can \nspecify *sampling_size* (optional) and use the subsection *dataloader* to specify\nthe dataset location using *root* and transformation using *transform*. To \nimplement tuning space constraints, users can use the subsection *model_wise* and *op_wise* for specific configurations.\n \n```yaml\nquantization: # optional. tuning constraints on model-wise for advance user to reduce tuning space.\n calibration:\n sampling_size: 20 # optional. default value is 100. used to set how many samples should be used in calibration.\n dataloader:\n dataset:\n ImageRecord:\n root: /path/to/imagenet/ # NOTE: modify to calibration dataset location if needed\n transform:\n BilinearImagenet: \n height: 224\n width: 224\n model_wise: # optional. tuning constraints on model-wise for advance user to reduce tuning space.\n weight:\n granularity: per_channel\n scheme: asym\n dtype: int8\n algorithm: minmax\n activation:\n granularity: per_tensor\n scheme: asym\n dtype: int8, fp32\n algorithm: minmax, kl\n op_wise: { # optional. tuning constraints on op-wise for advance user to reduce tuning space. \n 'conv1': {\n 'activation': {'dtype': ['uint8', 'fp32'], \n 'algorithm': ['minmax', 'kl'], \n 'scheme':['sym']},\n 'weight': {'dtype': ['int8', 'fp32'], \n 'algorithm': ['minmax']}\n }\n }\n```\n\n* ***pruning***: The pruning specifications define pruning tuning space. To define the training behavior, uses can \nuse the subsection *train* to specify the training hyper-parameters and the training dataloader. \nTo define the pruning approach, users can use the subsection *approach* to specify \npruning target, choose the type of pruning algorithm, and the way to apply it \nduring training process. \n\n```yaml\npruning:\n train:\n dataloader:\n ... \n epoch: 40\n optimizer:\n Adam:\n learning_rate: 1e-06\n beta_1: 0.9\n beta_2: 0.999\n epsilon: 1e-07\n criterion:\n SparseCategoricalCrossentropy:\n reduction: sum_over_batch_size\n from_logits: False\n approach:\n weight_compression:\n initial_sparsity: 0.0\n target_sparsity: 0.54\n start_epoch: 0\n end_epoch: 19\n pruners:\n - !Pruner\n start_epoch: 0\n end_epoch: 19\n prune_type: basic_magnitude\n```\n* ***distillation***: The distillation specifications define distillation's tuning\nspace. Similar to pruning, to define the training behavior, users can use the \nsubsection *train* to specify the training hyper-parameters and the training \ndataloader and it is optional if users implement *train_func* and set the attribute\nof distillation instance to *train_func*. For criterion, Intel\u00ae Neural Compressor provides a built-in \nknowledge distillation loss class to calculate distillation loss.\n```yaml\ndistillation:\n train:\n start_epoch: 0\n end_epoch: 90\n iteration: 1000\n frequency: 1\n dataloader:\n ...\n optimizer:\n SGD:\n learning_rate: 0.001 \n momentum: 0.1\n nesterov: True\n weight_decay: 0.001\n criterion:\n KnowledgeDistillationLoss:\n temperature: 1.0\n loss_types: ['CE', 'CE']\n loss_weights: [0.5, 0.5]\n```\n* ***evaluation***: The evaluation specifications define the dataloader and metric for accuracy evaluation as well as dataloader \nand configurations for performance benchmarking. \n```yaml\nevaluation: # optional. required if user doesn't provide eval_func in neural_compressor.Quantization.\n accuracy: \n metric:\n ...\n dataloader:\n ...\n```\n* ***tuning***: The tuning specifications define overall tuning targets. Users can\nuse *accuracy_criterion* to specify the target of accuracy loss percentage and use\n*exit_policy* to specify the tuning timeout in seconds. The random\nseed can be specified using *random_seed*. \n\n```yaml\ntuning:\n accuracy_criterion:\n relative: 0.01 # the tuning target of accuracy loss percentage: 1%\n higher_is_better: True\n exit_policy:\n timeout: 0 # tuning timeout (seconds), 0 means early stop\n random_seed: 9527 # random seed\n```", "doc_id": 202}] - diff --git a/workflows/chatbot/inference/backend/fastrag/llm_invocation.py b/workflows/chatbot/inference/backend/fastrag/llm_invocation.py index 3889692142b..f616b8aad3d 100644 --- a/workflows/chatbot/inference/backend/fastrag/llm_invocation.py +++ b/workflows/chatbot/inference/backend/fastrag/llm_invocation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import logging from typing import Dict, List, Optional, Type, Union @@ -176,4 +190,3 @@ def __init__( def invoke(self, prompt: Union[str, List[str]], **kwargs) -> List[str]: output = self.model_invocation_layer.invoke(prompt=prompt, **kwargs, **self.model_kwargs) return output - diff --git a/workflows/chatbot/inference/backend/fastrag/logger.py b/workflows/chatbot/inference/backend/fastrag/logger.py index 61e6300cd27..3e72f1d15f1 100644 --- a/workflows/chatbot/inference/backend/fastrag/logger.py +++ b/workflows/chatbot/inference/backend/fastrag/logger.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys import platform import os @@ -92,4 +106,4 @@ def build_logger(logger_name, logger_filename): if isinstance(item, logging.Logger): item.addHandler(handler) - return logger \ No newline at end of file + return logger diff --git a/workflows/chatbot/inference/backend/fastrag/mpt_invocation.py b/workflows/chatbot/inference/backend/fastrag/mpt_invocation.py index 46ceb5d11d9..bea43cbdd7e 100644 --- a/workflows/chatbot/inference/backend/fastrag/mpt_invocation.py +++ b/workflows/chatbot/inference/backend/fastrag/mpt_invocation.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import time import logging from typing import Dict, List, Optional, Type, Union diff --git a/workflows/chatbot/inference/backend/fastrag/run.sh b/workflows/chatbot/inference/backend/fastrag/run.sh index a53b8f0c01c..50f5f84ca72 100644 --- a/workflows/chatbot/inference/backend/fastrag/run.sh +++ b/workflows/chatbot/inference/backend/fastrag/run.sh @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Kill the exist and re-run ps -ef |grep 'fastrag_service' |awk '{print $2}' |xargs kill -9 diff --git a/workflows/chatbot/inference/backend/fastrag/utils.py b/workflows/chatbot/inference/backend/fastrag/utils.py index 646291709f6..1351f6224c1 100644 --- a/workflows/chatbot/inference/backend/fastrag/utils.py +++ b/workflows/chatbot/inference/backend/fastrag/utils.py @@ -1,4 +1,18 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + def detect_language(query): is_english = all(ord(c) < 128 for c in query) is_chinese = any('\u4e00' <= c <= '\u9fff' for c in query) diff --git a/workflows/chatbot/inference/backend/llmcache/__init__.py b/workflows/chatbot/inference/backend/llmcache/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/chatbot/inference/backend/llmcache/__init__.py +++ b/workflows/chatbot/inference/backend/llmcache/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/inference/backend/llmcache/cache.py b/workflows/chatbot/inference/backend/llmcache/cache.py index ad18b509976..458fe2f703d 100644 --- a/workflows/chatbot/inference/backend/llmcache/cache.py +++ b/workflows/chatbot/inference/backend/llmcache/cache.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # pylint: disable=wrong-import-position from typing import Any, Optional, Callable diff --git a/workflows/chatbot/inference/backend/llmcache/cache_config.yml b/workflows/chatbot/inference/backend/llmcache/cache_config.yml index 24faabb7855..f9195153a25 100644 --- a/workflows/chatbot/inference/backend/llmcache/cache_config.yml +++ b/workflows/chatbot/inference/backend/llmcache/cache_config.yml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # For `model_src`, `evaluation`, `post_function`, `pre_function`, # `storage_config` options, Check README for more. @@ -15,7 +29,7 @@ storage_config: sqlite,faiss vector_params: # Set vector storage related params here -evaluation: +evaluation: distance evaluation_config: max_distance: 1.0 diff --git a/workflows/chatbot/inference/backend/sd/README.md b/workflows/chatbot/inference/backend/sd/README.md index 557bd270fe3..31e1323335f 100644 --- a/workflows/chatbot/inference/backend/sd/README.md +++ b/workflows/chatbot/inference/backend/sd/README.md @@ -59,4 +59,4 @@ Start the Stable Diffusion server: ```bash nohup bash run.sh & -``` \ No newline at end of file +``` diff --git a/workflows/chatbot/inference/backend/sd/config.py b/workflows/chatbot/inference/backend/sd/config.py index 0e9749f3ad9..b0961c2714c 100644 --- a/workflows/chatbot/inference/backend/sd/config.py +++ b/workflows/chatbot/inference/backend/sd/config.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import socket MODEL_PATH="runwayml/stable-diffusion-v1-5" diff --git a/workflows/chatbot/inference/backend/sd/inference_server_ipex.py b/workflows/chatbot/inference/backend/sd/inference_server_ipex.py index 666ec63ddbb..aab8b2aced2 100644 --- a/workflows/chatbot/inference/backend/sd/inference_server_ipex.py +++ b/workflows/chatbot/inference/backend/sd/inference_server_ipex.py @@ -1,5 +1,19 @@ # -*- coding: utf-8 -* #!/usr/bin/env python3 +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from gevent import monkey monkey.patch_all() import gevent diff --git a/workflows/chatbot/inference/backend/sd/run.sh b/workflows/chatbot/inference/backend/sd/run.sh index 0b4de4bb2be..1c089fc3dda 100644 --- a/workflows/chatbot/inference/backend/sd/run.sh +++ b/workflows/chatbot/inference/backend/sd/run.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # IOMP export OMP_NUM_THREADS=32 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so diff --git a/workflows/chatbot/inference/backend/sd/sql_conn.py b/workflows/chatbot/inference/backend/sd/sql_conn.py index fddf53af9d5..dbb4b804f1f 100644 --- a/workflows/chatbot/inference/backend/sd/sql_conn.py +++ b/workflows/chatbot/inference/backend/sd/sql_conn.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pymysql from threading import Condition import threading @@ -44,7 +58,7 @@ def run(self): """ LOGGER.info("ping mysql.") self.conn.ping(reconnect=True) - + if not self._ping(): self.re_connection() """ @@ -89,7 +103,7 @@ def update(self, *values): self.conn.commit() finally: self.lock.release() - + return result @@ -111,7 +125,7 @@ def insert(self, *values): self.conn.commit() finally: self.lock.release() - + return result mysql = MysqlWorker() diff --git a/workflows/chatbot/inference/document_indexing/README.md b/workflows/chatbot/inference/document_indexing/README.md index 2312c216d0b..a9946bc62de 100644 --- a/workflows/chatbot/inference/document_indexing/README.md +++ b/workflows/chatbot/inference/document_indexing/README.md @@ -48,4 +48,4 @@ embeddings = OpenAIEmbeddings() The user can start the dense indexing with, ```bash python doc_index.py --file_path "xxx" --output_path "xxx" --embedding_model hkunlp/instructor-large --embedding_method dense --store Chroma - ``` \ No newline at end of file + ``` diff --git a/workflows/chatbot/inference/document_indexing/doc_index.py b/workflows/chatbot/inference/document_indexing/doc_index.py index 629e55dd7f1..e81e47fd0a7 100644 --- a/workflows/chatbot/inference/document_indexing/doc_index.py +++ b/workflows/chatbot/inference/document_indexing/doc_index.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import argparse from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings @@ -71,7 +85,7 @@ def d_load_jsonl_file(file_path, process, max_length=378): # def d_load_xlsx_file(file_path, process, max_length=378): # data = [] # data = pd.read_excel(file_path) -# +# # new_sens = [] # documents = [] # paragraphs = [] @@ -215,7 +229,7 @@ def s_load_file(file_path, process, document_store, max_length=378): new_doc = SDocument(content=paragraph, metadata=metadata) documents.append(new_doc) document_store.write_documents(documents) - + return document_store @@ -226,7 +240,7 @@ def persist_embedding(documents, persist_directory, model_path): vectordb.persist() vectordb = None - + def read_docx(doc_path): doc = DDocument(doc_path) text = '' @@ -282,4 +296,3 @@ def load_pdf(pdf_path): document_store.save(index_path="my_index.faiss") else: print("in memory db is done") - diff --git a/workflows/chatbot/inference/document_indexing/docker/README.md b/workflows/chatbot/inference/document_indexing/docker/README.md index d4e9c18d751..2fb9f297deb 100644 --- a/workflows/chatbot/inference/document_indexing/docker/README.md +++ b/workflows/chatbot/inference/document_indexing/docker/README.md @@ -38,5 +38,3 @@ Users have the flexibility to choose their preferred pretrained encoder model fo ``` python doc_index.py --file_path "xxx" --output_path "xxx" --embedding_model hkunlp/instructor-large --embedding_method dense --store Chroma ``` - - diff --git a/workflows/chatbot/inference/document_indexing/requirements.txt b/workflows/chatbot/inference/document_indexing/requirements.txt index 5c4d9cba3a3..b20b74004a0 100644 --- a/workflows/chatbot/inference/document_indexing/requirements.txt +++ b/workflows/chatbot/inference/document_indexing/requirements.txt @@ -1,6 +1,6 @@ -langchain chromadb +InstructorEmbedding +langchain +pandas PyPDF2 python-docx -pandas -InstructorEmbedding \ No newline at end of file diff --git a/workflows/chatbot/inference/memory_controller/Entity_Memory.py b/workflows/chatbot/inference/memory_controller/Entity_Memory.py index 28f3f87f094..7a50ca4c3a3 100644 --- a/workflows/chatbot/inference/memory_controller/Entity_Memory.py +++ b/workflows/chatbot/inference/memory_controller/Entity_Memory.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from langchain.schema import BaseMemory from pydantic import BaseModel from typing import List, Dict, Any @@ -44,4 +58,4 @@ def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: if ent_str in self.entities: self.entities[ent_str] += f"\n{text}" else: - self.entities[ent_str] = text \ No newline at end of file + self.entities[ent_str] = text diff --git a/workflows/chatbot/inference/memory_controller/chat_with_memory.py b/workflows/chatbot/inference/memory_controller/chat_with_memory.py index 4913cffd25c..8cbdfaf1e10 100644 --- a/workflows/chatbot/inference/memory_controller/chat_with_memory.py +++ b/workflows/chatbot/inference/memory_controller/chat_with_memory.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os, re from langchain.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate @@ -26,7 +40,7 @@ def inference(args, query, memory): template=prompt_template, input_variables=["entities", "question"] ) else: - prompt_template = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. + prompt_template = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. AI should revise the answer according to the human feedback. {chat_history} diff --git a/workflows/chatbot/inference/models/__init__.py b/workflows/chatbot/inference/models/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/chatbot/inference/models/__init__.py +++ b/workflows/chatbot/inference/models/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/inference/models/mpt/__init__.py b/workflows/chatbot/inference/models/mpt/__init__.py index e69de29bb2d..28f108cb636 100755 --- a/workflows/chatbot/inference/models/mpt/__init__.py +++ b/workflows/chatbot/inference/models/mpt/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/chatbot/inference/models/mpt/mpt_trace.py b/workflows/chatbot/inference/models/mpt/mpt_trace.py index 59b1b82f464..39c0cc83d6a 100644 --- a/workflows/chatbot/inference/models/mpt/mpt_trace.py +++ b/workflows/chatbot/inference/models/mpt/mpt_trace.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import torch from typing import Optional, Tuple from transformers.modeling_outputs import CausalLMOutputWithPast diff --git a/workflows/chatbot/inference/requirements.txt b/workflows/chatbot/inference/requirements.txt index 43c62b04c5f..df899c47d4f 100644 --- a/workflows/chatbot/inference/requirements.txt +++ b/workflows/chatbot/inference/requirements.txt @@ -1,11 +1,11 @@ datasets -torch -intel_extension_for_pytorch -git+https://github.com/huggingface/transformers.git -sentencepiece -peft +einops evaluate +git+https://github.com/huggingface/transformers.git +gptcache +intel_extension_for_pytorch nltk +peft rouge_score -einops -gptcache \ No newline at end of file +sentencepiece +torch diff --git a/workflows/compression_aware_training/chart/Chart.yaml b/workflows/compression_aware_training/chart/Chart.yaml index 8867b821919..c03e111b21e 100644 --- a/workflows/compression_aware_training/chart/Chart.yaml +++ b/workflows/compression_aware_training/chart/Chart.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: v2 name: Compression Aware description: A Helm chart for Kubernetes diff --git a/workflows/compression_aware_training/chart/templates/workflowTemplate.yaml b/workflows/compression_aware_training/chart/templates/workflowTemplate.yaml index 0a17dbada48..ed910f1f001 100644 --- a/workflows/compression_aware_training/chart/templates/workflowTemplate.yaml +++ b/workflows/compression_aware_training/chart/templates/workflowTemplate.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: argoproj.io/v1alpha1 kind: WorkflowTemplate metadata: @@ -10,8 +24,8 @@ spec: tasks: - name: compression-aware template: compression-aware - arguments: - parameters: + arguments: + parameters: - name: config value: '{{"{{workflow.parameters.config}}"}}' failFast: true @@ -26,7 +40,7 @@ spec: {{ end }} parameters: - name: config - outputs: + outputs: artifacts: - name: 'compression-aware-output' path: /workspace/output @@ -55,7 +69,7 @@ spec: {{ if eq .Values.dataset.type "nfs" }} volumes: - name: config-dir - nfs: + nfs: server: {{ .Values.dataset.nfs.server }} path: {{ .Values.dataset.nfs.path }} readOnly: true diff --git a/workflows/compression_aware_training/chart/values.yaml b/workflows/compression_aware_training/chart/values.yaml index 62bf39dc599..a39c18beb22 100644 --- a/workflows/compression_aware_training/chart/values.yaml +++ b/workflows/compression_aware_training/chart/values.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + serviceAccountName: argo metadata: name: compression-aware diff --git a/workflows/compression_aware_training/config/config.yaml b/workflows/compression_aware_training/config/config.yaml index 383fe741cc3..48e31757b6e 100755 --- a/workflows/compression_aware_training/config/config.yaml +++ b/workflows/compression_aware_training/config/config.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + model_name_or_path: "prajjwal1/bert-mini" teacher_model_name_or_path: "bert-base-uncased" task_name: "sst2" @@ -13,4 +27,4 @@ perf_tol: 0.03 quantization: true quantization_approach: "QuantizationAwareTraining" is_relative: true -int8: false \ No newline at end of file +int8: false diff --git a/workflows/compression_aware_training/config/distillation.yaml b/workflows/compression_aware_training/config/distillation.yaml index 673bf0d9145..ca267e46166 100755 --- a/workflows/compression_aware_training/config/distillation.yaml +++ b/workflows/compression_aware_training/config/distillation.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + model_name_or_path: "prajjwal1/bert-mini" teacher_model_name_or_path: "bert-base-uncased" task_name: "sst2" diff --git a/workflows/compression_aware_training/config/distillation_multinode.yaml b/workflows/compression_aware_training/config/distillation_multinode.yaml index d623e0bb634..5dfbb2746f7 100755 --- a/workflows/compression_aware_training/config/distillation_multinode.yaml +++ b/workflows/compression_aware_training/config/distillation_multinode.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + model_name_or_path: "prajjwal1/bert-mini" teacher_model_name_or_path: "bert-base-uncased" task_name: "sst2" diff --git a/workflows/compression_aware_training/config/distillation_with_qat.yaml b/workflows/compression_aware_training/config/distillation_with_qat.yaml index 0975fc1af07..48e31757b6e 100755 --- a/workflows/compression_aware_training/config/distillation_with_qat.yaml +++ b/workflows/compression_aware_training/config/distillation_with_qat.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + model_name_or_path: "prajjwal1/bert-mini" teacher_model_name_or_path: "bert-base-uncased" task_name: "sst2" diff --git a/workflows/compression_aware_training/config/qat.yaml b/workflows/compression_aware_training/config/qat.yaml index c4f300a3fb6..faf0416ed2f 100644 --- a/workflows/compression_aware_training/config/qat.yaml +++ b/workflows/compression_aware_training/config/qat.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + model_name_or_path: "prajjwal1/bert-mini" task_name: "sst2" distillation: false diff --git a/workflows/compression_aware_training/config/sat.yaml b/workflows/compression_aware_training/config/sat.yaml index 608f51bca9f..7731f0dfb69 100755 --- a/workflows/compression_aware_training/config/sat.yaml +++ b/workflows/compression_aware_training/config/sat.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + model_name_or_path: "Intel/distilbert-base-uncased-sparse-90-unstructured-pruneofa" teacher_model_name_or_path: "distilbert-base-uncased-finetuned-sst-2-english" task_name: "sst2" diff --git a/workflows/compression_aware_training/docker/docker-compose.yaml b/workflows/compression_aware_training/docker/docker-compose.yaml index 64914b11417..46fef68f3be 100644 --- a/workflows/compression_aware_training/docker/docker-compose.yaml +++ b/workflows/compression_aware_training/docker/docker-compose.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + services: dev: build: @@ -8,7 +22,7 @@ services: context: ../ dockerfile: ./docker/Dockerfile command: python /workspace/src/run.py --config_file /workspace/config/${CONFIG}.yaml - environment: + environment: - http_proxy=${http_proxy} - https_proxy=${https_proxy} - no_proxy=${no_proxy} diff --git a/workflows/compression_aware_training/requirements.txt b/workflows/compression_aware_training/requirements.txt index 3e016988a92..40bc25c1adf 100644 --- a/workflows/compression_aware_training/requirements.txt +++ b/workflows/compression_aware_training/requirements.txt @@ -1,8 +1,8 @@ -transformers +--extra-index-url https://download.pytorch.org/whl/cpu +accelerate==0.21.0 datasets==2.11.0 +intel-extension-for-transformers==1.1 +intel_extension_for_pytorch==1.13.100 neural-compressor==2.1 ---extra-index-url https://download.pytorch.org/whl/cpu torch==1.13.1 -intel_extension_for_pytorch==1.13.100 -intel-extension-for-transformers==1.1 -accelerate==0.21.0 \ No newline at end of file +transformers diff --git a/workflows/compression_aware_training/src/itrex_opt.py b/workflows/compression_aware_training/src/itrex_opt.py index b20d6a4e31b..b727d22c412 100755 --- a/workflows/compression_aware_training/src/itrex_opt.py +++ b/workflows/compression_aware_training/src/itrex_opt.py @@ -90,7 +90,7 @@ def __init__(self, config_file, no_cuda): OptimizationArguments, ) ) - + if config_file.endswith(".yaml"): model_args, data_args, training_args, optim_args = parser.parse_yaml_file( yaml_file=os.path.abspath(config_file) @@ -946,7 +946,7 @@ def get_logits(teacher_model, train_dataset, teacher_train_dataset): pruner_config = PrunerConfig(prune_type=prune_type, target_sparsity_ratio=target_sparsity_ratio) pruning_conf = PruningConfig(framework="pytorch_fx",pruner_config=[pruner_config], metrics=tune_metric) distillation_conf = DistillationConfig(framework="pytorch_fx", metrics=tune_metric) - + objective = objectives.performance quantization_conf = QuantizationConfig( approach=self.optim_args.quantization_approach, diff --git a/workflows/compression_aware_training/src/utils.py b/workflows/compression_aware_training/src/utils.py index c699279acd2..6f158392c67 100755 --- a/workflows/compression_aware_training/src/utils.py +++ b/workflows/compression_aware_training/src/utils.py @@ -250,4 +250,4 @@ class OptimizationArguments: accuracy_only: bool = field( default=False, metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."} - ) \ No newline at end of file + ) diff --git a/workflows/dlsa/requirements.txt b/workflows/dlsa/requirements.txt index a244ac6f138..632f2e0b4cd 100644 --- a/workflows/dlsa/requirements.txt +++ b/workflows/dlsa/requirements.txt @@ -1,6 +1,6 @@ -transformers +--extra-index-url https://download.pytorch.org/whl/cpu datasets==2.9.0 +intel_extension_for_pytorch==1.13 neural-compressor==2.0 ---extra-index-url https://download.pytorch.org/whl/cpu torch==1.13.1 -intel_extension_for_pytorch==1.13 \ No newline at end of file +transformers diff --git a/workflows/hf_finetuning_and_inference_nlp/config/finetune.yaml b/workflows/hf_finetuning_and_inference_nlp/config/finetune.yaml index 6943de498a4..8180fcacc6e 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/finetune.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/finetune.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "bert-base-uncased" # input the fine-tuned model path tokenizer_name: "bert-base-uncased" # input the fine-tuned model path @@ -24,4 +38,4 @@ training_args: do_predict: true per_device_train_batch_size: 512 per_device_eval_batch_size: 512 - output_dir: "./output_dir" \ No newline at end of file + output_dir: "./output_dir" diff --git a/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bertbase_Task-sentiment_Dataset-imdb.yaml b/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bertbase_Task-sentiment_Dataset-imdb.yaml index 44b8ce7fd3d..be84b1441bb 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bertbase_Task-sentiment_Dataset-imdb.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bertbase_Task-sentiment_Dataset-imdb.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "bert-base-uncased" # input the fine-tuned model path tokenizer_name: "bert-base-uncased" # input the fine-tuned model path diff --git a/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml b/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml index a60cb71a6a8..85bcd3deeb4 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/finetune_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml @@ -1,8 +1,22 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "emilyalsentzer/Bio_ClinicalBERT" tokenizer_name: "emilyalsentzer/Bio_ClinicalBERT" dataset: "local" # local or huggingface datasets name - + # Add local dataset configurations below. Skip for HF datasets. # Make sure to specify your local dataset . The code will fail otherwise. local_dataset: @@ -14,7 +28,7 @@ args: data_column: "symptoms" id: "Patient_ID" label_list: ["Malignant", "Normal", "Benign"] - + # Add the fine tuning configurations below pipeline: "finetune" finetune_impl: "itrex" diff --git a/workflows/hf_finetuning_and_inference_nlp/config/inference.yaml b/workflows/hf_finetuning_and_inference_nlp/config/inference.yaml index 8114d8296a6..9dd4468842a 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/inference.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/inference.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "bert-base-uncased" # input the fine-tuned model path tokenizer_name: "bert-base-uncased" # input the fine-tuned model path @@ -12,13 +26,13 @@ args: data_column: "symptoms" id: "Patient_ID" label_list: ["Malignant", "Normal", "Benign"] - + # Add the Inference configurations below - pipeline: "inference" + pipeline: "inference" infer_impl: "itrex" dtype_inf: "bf16" - use_ipex: true - use_onednn: true + use_ipex: true + use_onednn: true max_seq_len: 64 smoke_test: false max_train_samples: null @@ -32,4 +46,4 @@ args: training_args: do_predict: true per_device_eval_batch_size: 512 - output_dir: "./output_dir" \ No newline at end of file + output_dir: "./output_dir" diff --git a/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bertbase_Task-sentiment_Dataset-imdb.yaml b/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bertbase_Task-sentiment_Dataset-imdb.yaml index 697c08c22e9..bf9c7347b79 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bertbase_Task-sentiment_Dataset-imdb.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bertbase_Task-sentiment_Dataset-imdb.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "bert-base-uncased" # input the fine-tuned model path tokenizer_name: "bert-base-uncased" # input the fine-tuned model path @@ -12,9 +26,9 @@ args: data_column: "symptoms" id: "Patient_ID" label_list: ["Malignant", "Normal", "Benign"] - + # Add the Inference configurations below - pipeline: "inference" + pipeline: "inference" infer_impl: "itrex" dtype_inf: "fp32" max_seq_len: 64 diff --git a/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml b/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml index 2b17a22a04d..0e25cbce5a9 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/inference_Model-bioclinicalbert_Task-HLSDiseasePrediction_Dataset-local.yaml @@ -1,8 +1,22 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "./models/hls/" # input the fine-tuned model path tokenizer_name: "./models/hls/" # input the fine-tuned model path dataset: "local" # local or huggingface datasets name - + # Add local dataset configurations below. Skip for HF datasets. # Make sure to specify your local dataset . The code will fail otherwise. local_dataset: @@ -13,9 +27,9 @@ args: data_column: "symptoms" id: "Patient_ID" label_list: ["Malignant", "Normal", "Benign"] - + # Add the Inference configurations below - pipeline: "inference" + pipeline: "inference" infer_impl: "itrex" dtype_inf: "fp32" max_seq_len: 64 diff --git a/workflows/hf_finetuning_and_inference_nlp/config/inference_only_Model-bertbase_Task-sentiment_Dataset-imdb.yaml b/workflows/hf_finetuning_and_inference_nlp/config/inference_only_Model-bertbase_Task-sentiment_Dataset-imdb.yaml index 66f7faf0645..b4117b90909 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/inference_only_Model-bertbase_Task-sentiment_Dataset-imdb.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/inference_only_Model-bertbase_Task-sentiment_Dataset-imdb.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "bert-base-uncased" # input the fine-tuned model path tokenizer_name: "bert-base-uncased" # input the fine-tuned model path @@ -12,9 +26,9 @@ args: data_column: "symptoms" id: "Patient_ID" label_list: ["Malignant", "Normal", "Benign"] - + # Add the Inference configurations below - pipeline: "inference_only" + pipeline: "inference_only" infer_impl: "itrex" dtype_inf: "fp32" max_seq_len: 64 diff --git a/workflows/hf_finetuning_and_inference_nlp/config/inference_trainerStockPT.yaml b/workflows/hf_finetuning_and_inference_nlp/config/inference_trainerStockPT.yaml index d49ef574696..a5ce50e2584 100644 --- a/workflows/hf_finetuning_and_inference_nlp/config/inference_trainerStockPT.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/config/inference_trainerStockPT.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + args: model_name_or_path: "bert-base-uncased" # input the fine-tuned model path tokenizer_name: "bert-base-uncased" # input the fine-tuned model path @@ -12,12 +26,12 @@ args: data_column: "symptoms" id: "Patient_ID" label_list: ["Malignant", "Normal", "Benign"] - + # Add the Inference configurations below - pipeline: "inference" + pipeline: "inference" infer_impl: "trainer" # Use trainer for default stock PT Huggingface NLP Trainer - dtype_inf: "fp32" # bf16 is ineffective for stockPTtrainer: Use infer_impl=itrex & use_ipex=true. - use_ipex: false # No effect / Not valid when using trainer + dtype_inf: "fp32" # bf16 is ineffective for stockPTtrainer: Use infer_impl=itrex & use_ipex=true. + use_ipex: false # No effect / Not valid when using trainer use_onednn: false # No effect / not valid when using trainer max_seq_len: 64 smoke_test: false diff --git a/workflows/hf_finetuning_and_inference_nlp/docker/docker-compose.yaml b/workflows/hf_finetuning_and_inference_nlp/docker/docker-compose.yaml index 727c394447e..a1ca5f6a153 100644 --- a/workflows/hf_finetuning_and_inference_nlp/docker/docker-compose.yaml +++ b/workflows/hf_finetuning_and_inference_nlp/docker/docker-compose.yaml @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + services: dev: build: @@ -8,7 +22,7 @@ services: context: ../ dockerfile: ./docker/Dockerfile command: python /workspace/src/run.py --config_file /workspace/config/${CONFIG}.yaml - environment: + environment: - http_proxy=${http_proxy} - https_proxy=${https_proxy} - no_proxy=${no_proxy} diff --git a/workflows/hf_finetuning_and_inference_nlp/requirements.txt b/workflows/hf_finetuning_and_inference_nlp/requirements.txt index f728ef2c852..36050cac8d4 100644 --- a/workflows/hf_finetuning_and_inference_nlp/requirements.txt +++ b/workflows/hf_finetuning_and_inference_nlp/requirements.txt @@ -1,8 +1,8 @@ -transformers +--extra-index-url https://download.pytorch.org/whl/cpu +accelerate==0.21.0 datasets==2.11.0 +intel-extension-for-transformers==1.0.0 +intel_extension_for_pytorch==2.1.0 neural-compressor==2.1 ---extra-index-url https://download.pytorch.org/whl/cpu torch==2.1.0 -intel_extension_for_pytorch==2.1.0 -intel-extension-for-transformers==1.0.0 -accelerate==0.21.0 \ No newline at end of file +transformers diff --git a/workflows/hf_finetuning_and_inference_nlp/src/__init__.py b/workflows/hf_finetuning_and_inference_nlp/src/__init__.py index e69de29bb2d..28f108cb636 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/__init__.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/workflows/hf_finetuning_and_inference_nlp/src/finetune_itrex.py b/workflows/hf_finetuning_and_inference_nlp/src/finetune_itrex.py index 0c5dbee3ea9..9e3ba13c89d 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/finetune_itrex.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/finetune_itrex.py @@ -70,11 +70,11 @@ def _do_finetune(self): with torch.backends.mkldnn.flags(enabled = self.training_args.use_ipex or vars(self.args).get("use_onednn", True)): train_result = self.trainer.train() - + self.trainer.save_model() - + save_train_metrics(train_result, self.trainer, len(self.train_data)) - + def _do_infer(self): with torch.backends.mkldnn.flags(enabled = self.training_args.use_ipex or vars(self.args).get("use_onednn", True)): if self.training_args.do_predict: diff --git a/workflows/hf_finetuning_and_inference_nlp/src/finetune_trainer.py b/workflows/hf_finetuning_and_inference_nlp/src/finetune_trainer.py index d4997fd0a8b..2dc9e2ecd75 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/finetune_trainer.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/finetune_trainer.py @@ -59,5 +59,5 @@ def _do_infer(self): f"\n*********** TEST_METRICS ***********\nAccuracy: {metrics['test_acc']}\n" ) else: - save_performance_metrics(self.trainer, self.train_data, + save_performance_metrics(self.trainer, self.train_data, path.join(self.training_args.output_dir, self.args.finetune_output) ) diff --git a/workflows/hf_finetuning_and_inference_nlp/src/infer.py b/workflows/hf_finetuning_and_inference_nlp/src/infer.py index cd1aa9fd572..6391124fbb2 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/infer.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/infer.py @@ -51,7 +51,7 @@ def e2e_infer_setup_only(self): self.tokenize_data = False self._preprocess() self._load_model() - + def e2e_infer_only(self, filename): with self.track("Inference Only Run"): self.load_tokenizer = False diff --git a/workflows/hf_finetuning_and_inference_nlp/src/infer_itrex.py b/workflows/hf_finetuning_and_inference_nlp/src/infer_itrex.py index 8a1ee83d321..b666c6f8bbc 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/infer_itrex.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/infer_itrex.py @@ -87,13 +87,13 @@ def _load_model(self): self.model = self.trainer.quantize( quant_config=q_config, calib_dataloader=eval_dataloader ) - + else: error_msg = f"Now only support fp32, bf16 and int8.Your input datatype is {self.args.dtype_inf}." raise ValueError(error_msg) def _do_infer(self): - + if self.args.dtype_inf == "bf16" and not (self.training_args.use_ipex or vars(self.args).get("use_onednn", True)): raise ValueError("BF16 with both IPEX and OneDNN disabled is currently not implemented...") diff --git a/workflows/hf_finetuning_and_inference_nlp/src/infer_trainer.py b/workflows/hf_finetuning_and_inference_nlp/src/infer_trainer.py index 7979ce41aee..cd858906364 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/infer_trainer.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/infer_trainer.py @@ -53,5 +53,5 @@ def _do_infer(self): f"\n*********** TEST_METRICS ***********\nAccuracy: {metrics['test_acc']}\n" ) else: - save_performance_metrics(self.trainer, self.test_data, - path.join(self.training_args.output_dir, self.args.inference_output) ) \ No newline at end of file + save_performance_metrics(self.trainer, self.test_data, + path.join(self.training_args.output_dir, self.args.inference_output) ) diff --git a/workflows/hf_finetuning_and_inference_nlp/src/run.py b/workflows/hf_finetuning_and_inference_nlp/src/run.py index 4fa9b3e1795..aa6c9b72438 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/run.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/run.py @@ -83,7 +83,7 @@ def main(): if args.dataset != "local": error_msg = f"Now only support local datasets for inference_only pipeline." raise ValueError(error_msg) - + if args.infer_impl == "trainer": from infer_trainer import TrainerInfer infer = TrainerInfer(**kwargs) @@ -93,12 +93,12 @@ def main(): else: error_msg = f"Now only support trainer and itrex implementation for inference pipeline." raise ValueError(error_msg) - + infer.e2e_infer_setup_only() if type(args.local_dataset["inference_input"]) == str: args.local_dataset["inference_input"] = args.local_dataset["inference_input"].split(",") - + for f in args.local_dataset["inference_input"]: infer.e2e_infer_only(f) diff --git a/workflows/hf_finetuning_and_inference_nlp/src/utils.py b/workflows/hf_finetuning_and_inference_nlp/src/utils.py index 4cdb1f4a388..bfa11ef24ba 100644 --- a/workflows/hf_finetuning_and_inference_nlp/src/utils.py +++ b/workflows/hf_finetuning_and_inference_nlp/src/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -202,11 +202,9 @@ def save_performance_metrics(trainer, data, output_file): predictions_report = {} predictions_report["label_id"] = [label_map[i] for i in predictions.label_ids.tolist()] - predictions_report["predictions_label"] = [label_map[i] for i in np.argmax(predictions.predictions, axis=1).tolist() ] - predictions_report["predictions_probabilities"] = softmax(predictions.predictions, axis=1).tolist() + predictions_report["predictions_label"] = [label_map[i] for i in np.argmax(predictions.predictions, axis=1).tolist() ] + predictions_report["predictions_probabilities"] = softmax(predictions.predictions, axis=1).tolist() predictions_report["metrics"] = predictions.metrics - - with open(output_file, 'w') as file: - _ = yaml.dump(predictions_report, file) - + with open(output_file, 'w') as file: + _ = yaml.dump(predictions_report, file) diff --git a/workflows/hf_finetuning_and_inference_nlp/test/test_inference_only_programatically.py b/workflows/hf_finetuning_and_inference_nlp/test/test_inference_only_programatically.py index 14f66d102eb..37a456daf63 100644 --- a/workflows/hf_finetuning_and_inference_nlp/test/test_inference_only_programatically.py +++ b/workflows/hf_finetuning_and_inference_nlp/test/test_inference_only_programatically.py @@ -1,3 +1,17 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from transformers import TrainingArguments from transformers import logging as hf_logging import argparse @@ -57,9 +71,9 @@ for item in data["args"]: setattr(args, item, data["args"][item]) -kwargs = {"args": args, "training_args": training_args} +kwargs = {"args": args, "training_args": training_args} -infer = ItrexInfer(**kwargs) +infer = ItrexInfer(**kwargs) infer.e2e_infer_setup_only() inf_list = [ @@ -67,6 +81,6 @@ '/data/datac/samanway/annotation/d1.csv', '/data/datac/samanway/annotation/d2.csv' ] - + for f in inf_list: - infer.e2e_infer_only(f) \ No newline at end of file + infer.e2e_infer_only(f)