From a927f6a3ed42a129534e6ab409d6cc384cb6031a Mon Sep 17 00:00:00 2001 From: VincyZhang Date: Thu, 24 Nov 2022 06:36:14 -0800 Subject: [PATCH] add perf PDT reviewed data (#372) --- .gitignore | 6 +- .gitmodules | 104 +- LICENSE | 201 + README.md | 144 +- SECURITY.md | 13 + conda_meta/meta.yaml | 41 + docs/AutoDistillation design.md | 22 +- docs/benchmark.md | 10 +- docs/data_augmentation.md | 24 +- docs/distillation.md | 10 +- docs/examples.md | 206 +- docs/export.md | 42 +- docs/imgs/arch.png | Bin 0 -> 68114 bytes docs/metrics.md | 12 +- docs/objectives.md | 41 +- docs/pipeline.md | 12 +- docs/profiling.md | 4 +- docs/pruning.md | 22 +- docs/quantization.md | 22 +- .../pytorch/language-modeling/benchmark.py | 4 +- .../language-modeling/bert-base-uncased.ipynb | 6 +- .../pytorch/multiple-choice/benchmark.py | 4 +- .../bert-base-uncased_SWAG.ipynb | 6 +- .../Dynamic_MiniLM_SQuAD.ipynb | 62 +- .../pytorch/question-answering/benchmark.py | 4 +- .../bert-base-uncased_distilled-squad.ipynb | 6 +- .../question-answering/distillation.ipynb | 4 +- .../pytorch/summarization/benchmark.py | 4 +- .../summarization/pegasus-samsum.ipynb | 8 +- .../pytorch/text-classification/benchmark.py | 4 +- .../bert-base-uncased-MRPC.ipynb | 6 +- .../text-classification/distillation.ipynb | 6 +- .../pytorch/token-classification/benchmark.py | 4 +- .../distilbert_base_ner.ipynb | 6 +- .../pytorch/translation/benchmark.py | 4 +- .../pytorch/translation/t5-small.ipynb | 8 +- examples/deployment/README.md | 41 +- examples/deployment/ipex/common.py | 6 +- .../ipex/squad/bert_large/README.md | 6 +- .../ipex/squad/bert_large/run_qa.py | 2 +- .../ipex/squad/bert_large/trainer_qa.py | 2 +- .../squad/distillbert_base_uncased/README.md | 6 +- .../squad/distillbert_base_uncased/run_qa.py | 2 +- .../distillbert_base_uncased/trainer_qa.py | 2 +- examples/deployment/neural_engine/README.md | 2 +- examples/deployment/neural_engine/common.py | 4 +- .../emotion/distilbert_base_uncased/README.md | 12 +- .../distilbert_base_uncased/run_emotion.py | 4 +- .../neural_engine/imagenet/vit/README.md | 2 +- .../neural_engine/imagenet/vit/model_eval.py | 2 +- .../imagenet/vit/model_quant_convert.py | 4 +- examples/deployment/neural_engine/launcher.py | 4 +- .../neural_engine/mrpc/bert_base/README.md | 10 +- .../neural_engine/mrpc/bert_base/run_glue.py | 4 +- .../mrpc/bert_base_cased/README.md | 10 +- .../mrpc/bert_base_cased/run_glue.py | 4 +- .../neural_engine/mrpc/bert_mini/README.md | 10 +- .../neural_engine/mrpc/bert_mini/run_glue.py | 4 +- .../mrpc/distilbert_base_uncased/README.md | 10 +- .../mrpc/distilbert_base_uncased/run_glue.py | 4 +- .../neural_engine/mrpc/roberta_base/README.md | 10 +- .../mrpc/roberta_base/run_glue.py | 4 +- .../neural_engine/sparse/bert_mini/README.md | 22 +- ..._tranpose_ir.py => export_transpose_ir.py} | 4 +- .../sparse/bert_mini/run_glue.py | 4 +- .../sparse/distilbert_base_uncased/README.md | 22 +- ..._tranpose_ir.py => export_transpose_ir.py} | 4 +- .../sparse/distilbert_base_uncased/run_qa.py | 2 +- .../distilbert_base_uncased/trainer_qa.py | 2 +- .../neural_engine/squad/bert_large/README.md | 10 +- .../neural_engine/squad/bert_large/run_qa.py | 2 +- .../squad/bert_large/trainer_qa.py | 2 +- .../neural_engine/sst2/bert_mini/README.md | 10 +- .../neural_engine/sst2/bert_mini/run_glue.py | 4 +- .../sst2/distilbert_base_uncased/README.md | 10 +- .../sst2/distilbert_base_uncased/run_glue.py | 4 +- .../sst2/minilm_l6_h384_uncased/README.md | 10 +- .../sst2/minilm_l6_h384_uncased/run_glue.py | 4 +- .../run_mlm_autodistillation.py | 6 +- .../quantization/inc/run_clm.py | 4 +- .../quantization/inc/run_mlm.py | 4 +- .../quantization/inc/run_plm.py | 4 +- .../quantization/inc/run_swag.py | 4 +- .../question-answering/distillation/run_qa.py | 4 +- .../distillation/trainer_qa.py | 2 +- .../question-answering/dynamic/README.md | 201 +- .../question-answering/dynamic/run_qa.py | 6 +- .../question-answering/dynamic/trainer_qa.py | 2 +- .../orchestrate_optimizations/run_qa.py | 4 +- .../orchestrate_optimizations/trainer_qa.py | 2 +- .../pruning/basic_magnitude/run_qa.py | 4 +- .../pruning/basic_magnitude/trainer_qa.py | 2 +- .../pruning/group_lasso/README.md | 4 +- .../pruning/group_lasso/run_squad_sparse.py | 2 +- .../quantization/inc/README.md | 1 + .../quantization/inc/run_qa.py | 2 +- .../quantization/inc/trainer_qa.py | 2 +- .../summarization/quantization/README.md | 2 +- .../quantization/run_summarization.py | 6 +- .../distillation/run_glue.py | 6 +- .../orchestrate_optimizations/run_glue.py | 6 +- .../text-classification/pruning/run_glue.py | 6 +- .../quantization/inc/run_glue.py | 4 +- .../quantization/inc/run_glue_no_trainer.py | 4 +- .../nncf/configs/nncf_bert_config_conll.json | 44 - .../nncf/configs/nncf_bert_config_mrpc.json | 42 - .../nncf/configs/nncf_bert_config_squad.json | 44 - ...config_squad_magnitude_sparsity_cubic.json | 31 - .../nncf/configs/nncf_bert_config_xnli.json | 36 - .../configs/nncf_distilbert_config_sst2.json | 33 - .../nncf_gpt2_config_wikitext_hw_config.json | 58 - .../nncf_mobilebert_config_squad_int8.json | 46 - .../configs/nncf_roberta_config_mnli.json | 36 - .../quantization/nncf/run_glue.py | 733 -- .../quantization/inc/run_ner.py | 4 +- .../translation/quantization/README.md | 4 +- .../quantization/run_translation.py | 6 +- .../auto_distillation/README.md | 2 +- .../auto_distillation/run_glue.py | 4 +- .../distillation/README.md | 2 +- .../distillation/run_glue.py | 6 +- .../text-classification/pruning/README.md | 2 +- .../text-classification/pruning/run_glue.py | 6 +- .../quantization/inc/run_glue.py | 6 +- .../__init__.py | 0 .../backends/__init__.py | 0 .../backends/neural_engine/.clang-format | 0 .../backends/neural_engine/.editorconfig | 0 .../backends/neural_engine/CMakeLists.txt | 6 +- .../backends/neural_engine/CMakePresets.json | 2 +- .../backends/neural_engine/__init__.py | 0 .../backends/neural_engine/bin/neural_engine | 4 +- .../neural_engine/cmake/PresetOs.cmake | 0 .../neural_engine/compile/__init__.py | 0 .../backends/neural_engine/compile/compile.py | 0 .../compile/extractors/__init__.py | 0 .../compile/extractors/extractor.py | 0 .../compile/extractors/onnx_extractor.py | 4 +- .../compile/extractors/tf_extractor.py | 0 .../neural_engine/compile/graph/__init__.py | 0 .../neural_engine/compile/graph/graph.py | 2 +- .../neural_engine/compile/graph_utils.py | 0 .../neural_engine/compile/loaders/__init__.py | 0 .../neural_engine/compile/loaders/loader.py | 0 .../backends/neural_engine/compile/logger.py | 0 .../neural_engine/compile/onnx_utils.py | 0 .../neural_engine/compile/ops/__init__.py | 0 .../backends/neural_engine/compile/ops/all.py | 0 .../neural_engine/compile/ops/assert.py | 0 .../neural_engine/compile/ops/batch_matmul.py | 0 .../compile/ops/batch_matmul_v2.py | 0 .../neural_engine/compile/ops/bias_add.py | 0 .../neural_engine/compile/ops/cast.py | 0 .../neural_engine/compile/ops/concat.py | 0 .../neural_engine/compile/ops/conv.py | 0 .../neural_engine/compile/ops/empty_ops.py | 0 .../neural_engine/compile/ops/expand_dims.py | 0 .../compile/ops/fused_batch_matmul_v2.py | 0 .../compile/ops/fused_batch_norm_v3.py | 0 .../neural_engine/compile/ops/fused_gemm.py | 0 .../neural_engine/compile/ops/fused_matmul.py | 0 .../neural_engine/compile/ops/gather.py | 0 .../compile/ops/gather_elements.py | 0 .../neural_engine/compile/ops/gelu.py | 0 .../neural_engine/compile/ops/gemm.py | 0 .../compile/ops/iterator_get_next.py | 0 .../neural_engine/compile/ops/iterator_v2.py | 0 .../compile/ops/layer_normalization.py | 0 .../compile/ops/map_and_batch_dataset.py | 0 .../neural_engine/compile/ops/matmul.py | 0 .../neural_engine/compile/ops/mean.py | 0 .../compile/ops/mkl_layer_norm.py | 0 .../compile/ops/model_dataset.py | 0 .../neural_engine/compile/ops/one_hot.py | 0 .../neural_engine/compile/ops/onnx_input.py | 0 .../backends/neural_engine/compile/ops/op.py | 0 .../compile/ops/optimize_dataset.py | 0 .../neural_engine/compile/ops/pack.py | 0 .../neural_engine/compile/ops/placeholder.py | 0 .../compile/ops/quantize_linear.py | 0 .../neural_engine/compile/ops/quantize_v2.py | 0 .../quantized_fused_matmul_and_dequantize.py | 0 ...antized_matmul_with_bias_and_dequantize.py | 0 .../neural_engine/compile/ops/reduce_mean.py | 0 .../neural_engine/compile/ops/reduce_sum.py | 0 .../neural_engine/compile/ops/reshape.py | 0 .../compile/ops/scatter_elements.py | 0 .../neural_engine/compile/ops/softmax.py | 0 .../neural_engine/compile/ops/split.py | 0 .../neural_engine/compile/ops/squeeze.py | 0 .../compile/ops/strided_slice.py | 0 .../neural_engine/compile/ops/tensor.py | 0 .../neural_engine/compile/ops/top_k.py | 0 .../neural_engine/compile/ops/transpose.py | 0 .../neural_engine/compile/ops/unpack.py | 0 .../neural_engine/compile/ops/unsqueeze.py | 0 .../compile/sub_graph/__init__.py | 0 .../compile/sub_graph/add_cls_token.py | 2 +- .../compile/sub_graph/add_embeddings.py | 0 ...ntion_mask_length_adaptive_keep_indices.py | 0 ...layer_norm_length_adaptive_keep_indices.py | 0 .../compile/sub_graph/attention_reshape.py | 0 .../compile/sub_graph/collect_quant_info.py | 0 .../compile/sub_graph/conv_reshape.py | 0 .../compile/sub_graph/embeddingbag.py | 0 .../neural_engine/compile/sub_graph/gelu.py | 0 .../compile/sub_graph/generate_sequence.py | 0 .../compile/sub_graph/input_data.py | 0 .../compile/sub_graph/input_file.py | 0 .../compile/sub_graph/insert_bf16_node.py | 0 .../compile/sub_graph/insert_quant_node.py | 0 .../compile/sub_graph/interact_features.py | 0 .../compile/sub_graph/last_layer_shape.py | 0 .../compile/sub_graph/layer_norm.py | 0 .../sub_graph/layer_norm_with_reduce_mean.py | 0 .../compile/sub_graph/matmul_with_bias.py | 0 .../compile/sub_graph/matmul_with_bias_add.py | 0 .../sub_graph/matmul_with_bias_gelu.py | 0 .../sub_graph/matmul_with_bias_relu.py | 0 .../sub_graph/matmul_with_bias_sigmoid.py | 0 .../sub_graph/matmul_with_bias_tanh.py | 0 .../compile/sub_graph/merged_embeddingbag.py | 0 .../compile/sub_graph/output_data.py | 0 .../compile/sub_graph/padding_sequence.py | 0 .../compile/sub_graph/pattern.py | 0 .../compile/sub_graph/position_embeddings.py | 0 .../sub_graph/position_embeddings_v1.py | 0 .../compile/sub_graph/qkv_merge.py | 0 .../compile/sub_graph/qkv_reshape.py | 0 .../compile/sub_graph/quantize_fusion.py | 0 .../reshape_after_restore_hidden_states.py | 0 ...ttention_out_layer_norm_gather_elements.py | 0 .../reshape_before_restore_hidden_states.py | 0 .../compile/sub_graph/reshape_fusion.py | 0 ...tates_in_length_adaptive_update_indices.py | 0 .../compile/sub_graph/start_end_logits.py | 0 .../compile/sub_graph/subgraph_matcher.py | 0 .../sub_graph/token_type_embeddings.py | 0 .../sub_graph/token_type_embeddings_v1.py | 0 .../sub_graph/transpose_batch_matmul.py | 0 .../compile/sub_graph/word_embeddings.py | 0 .../neural_engine/compile/tf_utils.py | 0 .../docs/Deploy and Integration.md | 4 +- .../neural_engine/docs/Installation.md | 0 .../docs/add_customized_pattern.md | 8 +- .../neural_engine/docs/engine_inferencer.md | 2 +- .../neural_engine/docs/graph_fusion.md | 0 .../docs/imgs/compile_workflow.png | Bin .../docs/imgs/engine_adaptor_example.png | Bin .../docs/imgs/engine_adaptor_workflow.png | Bin .../docs/imgs/infrastructure.png | Bin .../docs/imgs/layernorm_bert_large_tf.png | Bin .../imgs/layernorm_distilbert_base_onnx.png | Bin .../docs/imgs/layernorm_with_index.png | Bin .../neural_engine/docs/onnx_compile.md | 2 +- .../neural_engine/docs/onnx_quantize.md | 37 + .../neural_engine/docs/operator_register.md | 0 .../neural_engine/docs/pattern_recognize.md | 8 +- .../neural_engine/docs/tensorflow_compile.md | 2 +- .../neural_engine/docs/tensorflow_quantize.md | 0 .../neural_engine/docs/validated_model.md | 1504 +++ .../neural_engine/executor/CMakeLists.txt | 0 .../neural_engine/executor/include/common.hpp | 0 .../neural_engine/executor/include/conf.hpp | 0 .../executor/include/dataloader.hpp | 0 .../executor/include/dispatch_table.hpp | 0 .../executor/include/dispatcher.hpp | 0 .../executor/include/execution_options.hpp | 0 .../executor/include/executor.hpp | 0 .../executor/include/i_malloc.hpp | 0 .../neural_engine/executor/include/isa.hpp | 0 .../executor/include/llga_info.hpp | 0 .../llga_operators/inner_product_graph.hpp | 0 .../include/llga_operators/llga_kernel.hpp | 0 .../llga_operators/llga_op_creator.hpp | 0 .../include/llga_operators/softmax_graph.hpp | 0 .../executor/include/memory_allocator.hpp | 0 .../neural_engine/executor/include/model.hpp | 0 .../executor/include/op_tuning.hpp | 0 .../executor/include/operator.hpp | 0 .../executor/include/operator_registry.hpp | 0 .../executor/include/operators/binary_add.hpp | 0 .../executor/include/operators/concat.hpp | 0 .../include/operators/convolution.hpp | 0 .../include/operators/embeddingbag.hpp | 0 .../executor/include/operators/erf.hpp | 0 .../include/operators/expand_indices.hpp | 0 .../executor/include/operators/gather.hpp | 2 +- .../include/operators/gather_elements.hpp | 0 .../executor/include/operators/gelu.hpp | 3 +- .../executor/include/operators/group_norm.hpp | 0 .../include/operators/inner_product.hpp | 3 +- .../executor/include/operators/input.hpp | 0 .../executor/include/operators/layer_norm.hpp | 4 +- .../executor/include/operators/matmul.hpp | 2 +- .../include/operators/merged_embeddingbag.hpp | 0 .../executor/include/operators/one_hot.hpp | 0 .../executor/include/operators/output.hpp | 0 .../include/operators/padding_sequence.hpp | 0 .../include/operators/position_ids.hpp | 0 .../executor/include/operators/pow.hpp | 0 .../executor/include/operators/quantize.hpp | 0 .../executor/include/operators/range.hpp | 0 .../include/operators/reduce_mean.hpp | 0 .../executor/include/operators/reduce_sum.hpp | 0 .../executor/include/operators/reorder.hpp | 0 .../executor/include/operators/reshape.hpp | 0 .../include/operators/scatter_elements.hpp | 0 .../executor/include/operators/shape.hpp | 0 .../executor/include/operators/slice.hpp | 0 .../executor/include/operators/softmax.hpp | 2 +- .../executor/include/operators/split.hpp | 0 .../include/operators/strided_slice.hpp | 0 .../include/operators/token_type_ids.hpp | 0 .../executor/include/operators/topk.hpp | 0 .../executor/include/operators/transpose.hpp | 0 .../executor/include/operators/unsqueeze.hpp | 0 .../executor/include/profiling.hpp | 0 .../sparse_operators/sparse_inner_product.hpp | 0 .../neural_engine/executor/include/tensor.hpp | 0 .../executor/include/thread_pool.hpp | 0 .../executor/python/bind_executor.cpp | 0 .../executor/python/pybind_tensor.hpp | 0 .../executor/python/test_model.py | 0 .../neural_engine/executor/src/common.cpp | 0 .../neural_engine/executor/src/i_malloc.cpp | 0 .../llga_operators/inner_product_graph.cpp | 0 .../src/llga_operators/llga_kernel.cpp | 0 .../src/llga_operators/llga_op_creator.cpp | 0 .../src/llga_operators/softmax_graph.cpp | 0 .../neural_engine/executor/src/model.cpp | 0 .../executor/src/nlp_executor.cc | 0 .../neural_engine/executor/src/op_tuning.cpp | 0 .../executor/src/operators/binary_add.cpp | 0 .../executor/src/operators/concat.cpp | 0 .../executor/src/operators/convolution.cpp | 0 .../executor/src/operators/embeddingbag.cpp | 0 .../executor/src/operators/erf.cpp | 0 .../executor/src/operators/expand_indices.cpp | 0 .../executor/src/operators/gather.cpp | 0 .../src/operators/gather_elements.cpp | 0 .../executor/src/operators/gelu.cpp | 0 .../executor/src/operators/group_norm.cpp | 0 .../executor/src/operators/inner_product.cpp | 0 .../executor/src/operators/input.cpp | 0 .../executor/src/operators/layer_norm.cpp | 0 .../executor/src/operators/matmul.cpp | 0 .../src/operators/merged_embeddingbag.cpp | 0 .../executor/src/operators/one_hot.cpp | 0 .../executor/src/operators/output.cpp | 0 .../src/operators/padding_sequence.cpp | 0 .../executor/src/operators/position_ids.cpp | 0 .../executor/src/operators/pow.cpp | 0 .../executor/src/operators/quantize.cpp | 0 .../executor/src/operators/range.cpp | 0 .../executor/src/operators/reduce_mean.cpp | 0 .../executor/src/operators/reduce_sum.cpp | 0 .../executor/src/operators/reorder.cpp | 0 .../executor/src/operators/reshape.cpp | 0 .../src/operators/scatter_elements.cpp | 0 .../executor/src/operators/shape.cpp | 0 .../executor/src/operators/slice.cpp | 0 .../executor/src/operators/softmax.cpp | 0 .../executor/src/operators/split.cpp | 0 .../executor/src/operators/strided_slice.cpp | 0 .../executor/src/operators/token_type_ids.cpp | 0 .../executor/src/operators/topk.cpp | 0 .../executor/src/operators/transpose.cpp | 0 .../executor/src/operators/unsqueeze.cpp | 0 .../sparse_operators/sparse_inner_product.cpp | 0 .../neural_engine/kernels}/CMakeLists.txt | 0 .../backends/neural_engine/kernels}/README.md | 10 +- .../kernels}/cmake/FindVTune.cmake | 0 .../docs/imgs/kernel_amx_bf16x16_calc.png | Bin .../docs/imgs/kernel_amx_bf16x16_relayout.png | Bin .../docs/imgs/kernel_avx512f_pattern_base.png | Bin .../imgs/kernel_avx512f_pattern_unroll4.png | Bin .../kernels}/docs/imgs/kernel_vnni_calc.png | Bin .../docs/imgs/kernel_vnni_pattern.png | Bin .../imgs/kernel_vnni_pattern_left_1x4.png | Bin .../imgs/kernel_vnni_pattern_left_4x1.png | Bin .../imgs/kernel_vnni_pattern_right_1x16.png | Bin .../imgs/kernel_vnni_pattern_right_4x1.png | Bin .../kernels}/docs/imgs/kernel_vnni_perf.png | Bin .../kernels}/docs/imgs/relu_formula.svg | 0 .../kernels}/docs/kernel_desc/kernel_amx.md | 0 .../docs/kernel_desc/kernel_avx512f.md | 0 .../kernels}/docs/kernel_desc/kernel_vnni.md | 0 .../docs/kernel_desc/postop_injector.md | 6 +- .../neural_engine/kernels}/docs/profiling.md | 16 +- .../kernels/docs/validated_data.md | 8039 +++++++++++++++++ .../kernels}/include/amx_utils.hpp | 0 .../kernels}/include/cpu_engine.hpp | 0 .../kernels}/include/cpu_isa.hpp | 0 .../neural_engine/kernels}/include/engine.hpp | 0 .../kernels}/include/engine_factory.hpp | 0 .../kernels}/include/impl_list_item.hpp | 0 .../kernels}/include/interface.hpp | 0 .../include/jit_domain/jit_amx_configure.hpp | 0 .../jit_domain/jit_binary_injector.hpp | 0 .../jit_domain/jit_eltwise_injector.hpp | 0 .../include/jit_domain/jit_eltwiseop.hpp | 0 .../include/jit_domain/jit_gather.hpp | 0 .../include/jit_domain/jit_layernorm_ba.hpp | 0 .../jit_matmul_avx512f_p2031_p2013.hpp | 0 .../jit_matmul_vnni_Ba4b_Ab4a_ba.hpp | 0 .../jit_matmul_vnni_noperm_p2031_p1302.hpp | 0 .../include/jit_domain/jit_softmax.hpp | 0 .../jit_domain/jit_spmm_amx_bf16_x16.hpp | 0 .../include/jit_domain/jit_spmm_avx512f.hpp | 0 .../include/jit_domain/jit_spmm_vnni.hpp | 0 .../jit_domain/jit_trans_cpy_nx8_4b.hpp | 0 .../kernels}/include/jit_generator.hpp | 0 .../neural_engine/kernels}/include/kernel.hpp | 0 .../kernels}/include/kernel_cache.hpp | 0 .../kernels}/include/kernel_desc.hpp | 0 .../kernels}/include/kernel_hashing.hpp | 0 .../kernels}/include/kernels/attention.hpp | 0 .../kernels}/include/kernels/eltwiseop.hpp | 0 .../include/kernels/eltwiseop_ref.hpp | 0 .../include/kernels/eltwiseop_types.hpp | 0 .../kernels}/include/kernels/gather.hpp | 0 .../kernels}/include/kernels/gather_ref.hpp | 0 .../kernels}/include/kernels/gather_types.hpp | 0 .../kernels}/include/kernels/layernorm_ba.hpp | 0 .../include/kernels/layernorm_ba_ref.hpp | 0 .../include/kernels/layernorm_ba_types.hpp | 0 .../kernels/matmul_avx512f_p2031_p2013.hpp | 0 .../kernels}/include/kernels/matmul_ref.hpp | 0 .../kernels}/include/kernels/matmul_types.hpp | 0 .../matmul_vnni_noperm_p2031_p1302.hpp | 0 .../kernels/include/kernels/postop_types.hpp | 43 + .../kernels}/include/kernels/softmax.hpp | 0 .../kernels}/include/kernels/softmax_ref.hpp | 0 .../include/kernels/softmax_types.hpp | 0 .../kernels}/include/kernels/sparse_data.hpp | 0 .../include/kernels/spmm_amx_bf16_x16.hpp | 0 .../kernels}/include/kernels/spmm_avx512f.hpp | 0 .../kernels}/include/kernels/spmm_ref.hpp | 0 .../kernels}/include/kernels/spmm_types.hpp | 0 .../kernels}/include/kernels/spmm_vnni.hpp | 0 .../kernels}/include/operator_desc.hpp | 0 .../kernels}/include/param_types.hpp | 0 .../kernels}/include/tensor_desc.hpp | 0 .../neural_engine/kernels}/include/utils.hpp | 0 .../kernels}/include/verbose.hpp | 0 .../kernels}/include/vtune_wrapper.hpp | 0 .../neural_engine/kernels}/src/amx_utils.cpp | 0 .../neural_engine/kernels}/src/cpu_engine.cpp | 0 .../neural_engine/kernels}/src/cpu_isa.cpp | 0 .../neural_engine/kernels}/src/interface.cpp | 0 .../src/jit_domain/jit_amx_configure.cpp | 0 .../src/jit_domain/jit_binary_injector.cpp | 0 .../src/jit_domain/jit_eltwise_injector.cpp | 0 .../kernels}/src/jit_domain/jit_eltwiseop.cpp | 0 .../kernels}/src/jit_domain/jit_gather.cpp | 0 .../src/jit_domain/jit_layernorm_ba.cpp | 0 .../jit_matmul_avx512f_p2031_p2013.cpp | 0 .../jit_matmul_vnni_Ba4b_Ab4a_ba.cpp | 0 .../jit_matmul_vnni_noperm_p2031_p1302.cpp | 0 .../kernels}/src/jit_domain/jit_softmax.cpp | 0 .../src/jit_domain/jit_spmm_amx_bf16_x16.cpp | 0 .../src/jit_domain/jit_spmm_avx512f.cpp | 0 .../kernels}/src/jit_domain/jit_spmm_vnni.cpp | 0 .../src/jit_domain/jit_trans_cpy_nx8_4b.cpp | 0 .../kernels}/src/jit_generator.cpp | 0 .../neural_engine/kernels}/src/kernel.cpp | 0 .../kernels}/src/kernel_cache.cpp | 0 .../kernels}/src/kernel_desc.cpp | 0 .../kernels/src/kernels/CMakeLists.txt | 3 + .../kernels}/src/kernels/attention.cpp | 0 .../src/kernels/cpu_attention_list.cpp | 0 .../src/kernels/cpu_eltwiseop_list.cpp | 0 .../kernels}/src/kernels/cpu_gather_list.cpp | 0 .../src/kernels/cpu_layernorm_ba_list.cpp | 0 .../kernels}/src/kernels/cpu_softmax_list.cpp | 0 .../src/kernels/cpu_sparse_matmul_list.cpp | 0 .../src/kernels/cpu_transpose_matmul_list.cpp | 0 .../kernels}/src/kernels/eltwiseop.cpp | 0 .../kernels}/src/kernels/eltwiseop_ref.cpp | 0 .../kernels}/src/kernels/gather.cpp | 0 .../kernels}/src/kernels/gather_ref.cpp | 0 .../kernels}/src/kernels/layernorm_ba.cpp | 0 .../kernels}/src/kernels/layernorm_ba_ref.cpp | 0 .../kernels/matmul_avx512f_p2031_p2013.cpp | 0 .../kernels}/src/kernels/matmul_ref.cpp | 0 .../matmul_vnni_noperm_p2031_p1302.cpp | 0 .../kernels}/src/kernels/softmax.cpp | 0 .../kernels}/src/kernels/softmax_ref.cpp | 0 .../kernels}/src/kernels/sparse_data.cpp | 0 .../src/kernels/spmm_amx_bf16_x16.cpp | 0 .../kernels}/src/kernels/spmm_avx512f.cpp | 0 .../kernels}/src/kernels/spmm_ref.cpp | 0 .../kernels}/src/kernels/spmm_vnni.cpp | 0 .../neural_engine/kernels}/src/utils.cpp | 0 .../neural_engine/kernels}/src/verbose.cpp | 0 .../kernels}/src/vtune_wrapper.cpp | 0 .../neural_engine/oneDNN-THIRD-PARTY-PROGRAMS | 583 ++ .../neural_engine/test/gtest/CMakeLists.txt | 0 .../test/gtest/kernels}/CMakeLists.txt | 0 .../test/gtest/kernels}/main.cpp | 0 .../test/gtest/kernels}/test_attention.cpp | 0 .../gtest/kernels}/test_eltwiseop_kernel.cpp | 0 .../gtest/kernels}/test_gather_kernel.cpp | 0 .../kernels}/test_layernorm_ba_kernel.cpp | 0 .../test_matmul_avx512f_p2031_p2013.cpp | 0 .../test_matmul_vnni_noperm_p2031_p1302.cpp | 0 .../gtest/kernels}/test_softmax_kernel.cpp | 0 .../test_spmm_amx_bf16_x16_kernel.cpp | 0 .../kernels}/test_spmm_avx512f_kernel.cpp | 0 .../gtest/kernels}/test_spmm_vnni_kernel.cpp | 0 .../test/gtest/kernels}/unit_test_utils.hpp | 0 .../neural_engine/test/gtest/main.cpp | 0 .../test/gtest/test_binary_add_op.cpp | 0 .../neural_engine/test/gtest/test_cast_op.cpp | 0 .../test/gtest/test_concat_op.cpp | 0 .../test/gtest/test_convolution_op.cpp | 0 .../test/gtest/test_dequantize_op.cpp | 0 .../neural_engine/test/gtest/test_div_op.cpp | 0 .../test/gtest/test_embeddingbag_op.cpp | 0 .../neural_engine/test/gtest/test_erf_op.cpp | 0 .../test/gtest/test_expand_indices_op.cpp | 0 .../test/gtest/test_gather_element_op.cpp | 0 .../test/gtest/test_gather_op.cpp | 0 .../neural_engine/test/gtest/test_gelu_op.cpp | 0 .../test/gtest/test_group_norm_op.cpp | 0 .../test/gtest/test_i_malloc.cpp | 0 .../test/gtest/test_inner_product_op.cpp | 0 .../test/gtest/test_layer_norm_op.cpp | 0 .../test/gtest/test_matmul_op.cpp | 0 .../neural_engine/test/gtest/test_mul_op.cpp | 0 .../test/gtest/test_one_hot_op.cpp | 0 .../test/gtest/test_padding_sequence_op.cpp | 0 .../test/gtest/test_position_ids_op.cpp | 0 .../neural_engine/test/gtest/test_pow_op.cpp | 0 .../test/gtest/test_range_op.cpp | 0 .../test/gtest/test_reduce_mean_op.cpp | 0 .../test/gtest/test_reorder_op.cpp | 0 .../test/gtest/test_shape_op.cpp | 0 .../test/gtest/test_slice_op.cpp | 0 .../test/gtest/test_softmax_op.cpp | 0 .../test/gtest/test_sparse_inner_product.cpp | 0 .../test/gtest/test_split_op.cpp | 0 .../neural_engine/test/gtest/test_sqrt_op.cpp | 0 .../test/gtest/test_strided_slice_op.cpp | 0 .../neural_engine/test/gtest/test_sub_op.cpp | 0 .../neural_engine/test/gtest/test_tanh_op.cpp | 0 .../test/gtest/test_token_type_ids_op.cpp | 0 .../test/gtest/test_unsqueeze_op.cpp | 0 .../test/kernels}/benchmark/CMakeLists.txt | 0 .../test/kernels}/benchmark/README.md | 4 +- .../test/kernels}/benchmark/benchmark.cpp | 0 .../kernels}/benchmark/benchmark_utils.cpp | 0 .../kernels}/benchmark/benchmark_utils.hpp | 0 .../test/kernels}/benchmark/ci/benchmark.sh | 0 .../kernels}/benchmark/ci/inputs/README.md | 0 .../benchmark/ci/inputs/ci_amx_bf16_x16_input | 0 .../benchmark/ci/inputs/ci_eltwiseop_input | 0 .../benchmark/ci/inputs/ci_layernorm_ba_input | 0 .../ci_matmul_avx512f_p2031_p2013_input | 0 .../ci_matmul_vnni_noperm_p2031_p1302_input | 0 .../benchmark/ci/inputs/ci_softmax_input | 0 .../benchmark/ci/inputs/ci_vnni_input | 0 .../test/kernels}/benchmark/ci/run_ci.sh | 0 .../test/kernels}/benchmark/ci/to_summary.sh | 0 .../test/kernels}/benchmark/common_utils.cpp | 0 .../test/kernels}/benchmark/common_utils.hpp | 0 .../benchmark/eltwiseop/CMakeLists.txt | 0 .../benchmark/eltwiseop/eltwiseop.cpp | 0 .../benchmark/eltwiseop/eltwiseop.hpp | 0 .../benchmark/layernorm_ba/CMakeLists.txt | 0 .../benchmark/layernorm_ba/layernorm_ba.cpp | 0 .../benchmark/layernorm_ba/layernorm_ba.hpp | 0 .../kernels}/benchmark/softmax/CMakeLists.txt | 0 .../kernels}/benchmark/softmax/softmax.cpp | 0 .../kernels}/benchmark/softmax/softmax.hpp | 0 .../benchmark/sparse_matmul/CMakeLists.txt | 0 .../benchmark/sparse_matmul/sparse_matmul.cpp | 0 .../benchmark/sparse_matmul/sparse_matmul.hpp | 0 .../sparse_matmul/spmm_amx_bf16_x16.cpp | 0 .../sparse_matmul/spmm_amx_bf16_x16.hpp | 0 .../benchmark/sparse_matmul/spmm_avx512f.cpp | 0 .../benchmark/sparse_matmul/spmm_avx512f.hpp | 0 .../benchmark/sparse_matmul/spmm_vnni.cpp | 0 .../benchmark/sparse_matmul/spmm_vnni.hpp | 0 .../benchmark/transpose_matmul/CMakeLists.txt | 0 .../matmul_avx512f_p2031_p2013.cpp | 0 .../matmul_avx512f_p2031_p2013.hpp | 0 .../matmul_vnni_noperm_p2031_p1302.cpp | 0 .../matmul_vnni_noperm_p2031_p1302.hpp | 0 .../transpose_matmul/transpose_matmul.cpp | 0 .../transpose_matmul/transpose_matmul.hpp | 0 .../test/pytest/requirements.txt | 0 .../test/pytest/test_add_cls_token.py | 8 +- ...layer_norm_length_adaptive_keep_indices.py | 8 +- .../test/pytest/test_attention_reshape.py | 8 +- .../test/pytest/test_conv_reshape.py | 8 +- .../test/pytest/test_embeddingbag.py | 8 +- .../test/pytest/test_execution_options.py | 0 .../neural_engine/test/pytest/test_gelu.py | 8 +- .../test/pytest/test_graph_dispatch.py | 2 +- .../pytest/test_insert_input_output_data.py | 10 +- .../pytest/test_is_supported_onnx_node.py | 6 +- .../test/pytest/test_last_layer_shape.py | 8 +- .../test_layer_norm_with_reduce_mean.py | 8 +- .../neural_engine/test/pytest/test_main.py | 4 +- .../test/pytest/test_matmul_with_bias_relu.py | 8 +- .../pytest/test_matmul_with_bias_sigmoid.py | 8 +- .../test/pytest/test_matmul_with_bias_tanh.py | 8 +- .../test/pytest/test_merged_embeddingbag.py | 8 +- .../test/pytest/test_onnx_utils.py | 6 +- .../neural_engine/test/pytest/test_ops.py | 4 +- .../test/pytest/test_padding_sequence.py | 8 +- .../test/pytest/test_pattern_dispatch.py | 8 +- .../test/pytest/test_position_embeddings.py | 8 +- .../test/pytest/test_qkv_merge.py | 4 +- .../test/pytest/test_qkv_reshape.py | 8 +- .../test/pytest/test_quant_onnx_execute.py | 2 +- ...est_reshape_after_restore_hidden_states.py | 8 +- ...ttention_out_layer_norm_gather_elements.py | 8 +- ...st_reshape_before_restore_hidden_states.py | 8 +- .../test/pytest/test_start_end_logits.py | 8 +- .../test/pytest/test_tf_utils.py | 2 +- .../test/pytest/test_token_type_embeddings.py | 8 +- .../test/pytest/test_transpose.py | 4 +- .../pytest/test_transpose_batch_matmul.py | 8 +- .../third_party/boost/libs/assert | 0 .../third_party/boost/libs/config | 0 .../third_party/boost/libs/container | 0 .../third_party/boost/libs/container_hash | 0 .../neural_engine/third_party/boost/libs/core | 0 .../third_party/boost/libs/interprocess | 0 .../third_party/boost/libs/intrusive | 0 .../neural_engine/third_party/boost/libs/move | 0 .../neural_engine/third_party/boost/libs/mp11 | 0 .../third_party/boost/libs/predef | 0 .../third_party/boost/libs/preprocessor | 0 .../third_party/boost/libs/static_assert | 0 .../third_party/boost/libs/throw_exception | 0 .../third_party/boost/libs/tuple | 0 .../third_party/boost/libs/type_traits | 0 .../third_party/boost/libs/unordered | 0 .../third_party/boost/libs/winapi | 0 .../backends/neural_engine/third_party/gflags | 0 .../backends/neural_engine/third_party/glog | 0 .../neural_engine/third_party/googletest | 0 .../neural_engine/third_party/jemalloc | 0 .../backends/neural_engine/third_party/oneDNN | 0 .../neural_engine/third_party/oneDNNGraph | 0 .../neural_engine/third_party/pybind11 | 0 .../backends/neural_engine/third_party/xbyak | 0 .../neural_engine/third_party/yaml-cpp | 0 .../optimization/__init__.py | 0 .../optimization/benchmark.py | 6 +- .../optimization/config.py | 10 +- .../optimization/distillation.py | 0 .../optimization/dynamic/__init__.py | 0 .../dynamic/drop_and_restore_utils.py | 0 .../optimization/dynamic/evolution.py | 2 +- .../optimization/mixture/__init__.py | 0 .../optimization/mixture/auto_distillation.py | 4 +- .../optimization/model.py | 2 +- ...neural-compressor-third-party-programs.txt | 1916 ++++ .../optimization/optimizer.py | 8 +- .../optimization/optimizer_tf.py | 8 +- .../optimization/pipeline.py | 4 +- .../optimization/pruning.py | 0 .../optimization/pytorch_pruner/__init__.py | 0 .../optimization/pytorch_pruner/patterns.py | 0 .../pytorch_pruner/prune_utils.py | 0 .../optimization/pytorch_pruner/pruner.py | 0 .../optimization/pytorch_pruner/pruning.py | 0 .../optimization/pytorch_pruner/scheduler.py | 0 .../optimization/quantization.py | 0 .../optimization/trainer.py | 12 +- .../optimization/utils/__init__.py | 0 .../optimization/utils/metrics.py | 0 .../optimization/utils/models/__init__.py | 0 .../utils/models/modeling_bert_dynamic.py | 0 .../utils/models/modeling_roberta_dynamic.py | 0 .../optimization/utils/objectives.py | 0 .../optimization/utils/utility.py | 0 .../optimization/utils/utility_tf.py | 0 .../preprocessing/__init__.py | 0 .../preprocessing/data_augmentation.py | 2 +- .../preprocessing/utils.py | 0 .../version.py | 2 +- .../neural_engine/docs/onnx_quantize.md | 107 - nlp_toolkit/backends/openvino/nncf_utils.py | 56 - setup.py | 12 +- tests/test_autodistillation.py | 10 +- tests/test_benchmark.py | 4 +- tests/test_config.py | 14 +- tests/test_data_augmentation.py | 4 +- tests/test_distillation.py | 6 +- tests/test_dynamic_length.py | 16 +- tests/test_orchestrate_optimization.py | 9 +- tests/test_pipeline.py | 2 +- tests/test_pruning.py | 8 +- tests/test_quantization.py | 10 +- tests/test_tf_autodistillation.py | 6 +- tests/test_tf_distillation.py | 6 +- tests/test_tf_pruning.py | 8 +- tests/test_tf_quantization.py | 4 +- third_party_programs.txt | 235 + 705 files changed, 13782 insertions(+), 2046 deletions(-) create mode 100644 LICENSE create mode 100644 SECURITY.md create mode 100644 conda_meta/meta.yaml create mode 100644 docs/imgs/arch.png rename examples/deployment/neural_engine/sparse/bert_mini/{export_tranpose_ir.py => export_transpose_ir.py} (78%) rename examples/deployment/neural_engine/sparse/distilbert_base_uncased/{export_tranpose_ir.py => export_transpose_ir.py} (78%) delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_conll.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_mrpc.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad_magnitude_sparsity_cubic.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_xnli.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_distilbert_config_sst2.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_gpt2_config_wikitext_hw_config.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_mobilebert_config_squad_int8.json delete mode 100644 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_roberta_config_mnli.json delete mode 100755 examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/run_glue.py rename {nlp_toolkit => intel_extension_for_transformers}/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/.clang-format (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/.editorconfig (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/CMakeLists.txt (97%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/CMakePresets.json (95%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/bin/neural_engine (89%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/cmake/PresetOs.cmake (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/compile.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/extractors/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/extractors/extractor.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/extractors/onnx_extractor.py (95%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/extractors/tf_extractor.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/graph/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/graph/graph.py (99%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/graph_utils.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/loaders/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/loaders/loader.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/logger.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/onnx_utils.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/all.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/assert.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/batch_matmul.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/batch_matmul_v2.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/bias_add.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/cast.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/concat.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/conv.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/empty_ops.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/expand_dims.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/fused_batch_matmul_v2.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/fused_batch_norm_v3.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/fused_gemm.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/fused_matmul.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/gather.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/gather_elements.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/gelu.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/gemm.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/iterator_get_next.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/iterator_v2.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/layer_normalization.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/map_and_batch_dataset.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/matmul.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/mean.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/mkl_layer_norm.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/model_dataset.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/one_hot.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/onnx_input.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/op.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/optimize_dataset.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/pack.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/placeholder.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/quantize_linear.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/quantize_v2.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/quantized_fused_matmul_and_dequantize.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/quantized_matmul_with_bias_and_dequantize.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/reduce_mean.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/reduce_sum.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/reshape.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/scatter_elements.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/softmax.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/split.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/squeeze.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/strided_slice.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/tensor.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/top_k.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/transpose.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/unpack.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/ops/unsqueeze.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/add_cls_token.py (98%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/add_embeddings.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/attention_mask_length_adaptive_keep_indices.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/attention_reshape.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/collect_quant_info.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/conv_reshape.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/embeddingbag.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/gelu.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/generate_sequence.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/input_data.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/input_file.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/insert_bf16_node.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/insert_quant_node.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/interact_features.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/last_layer_shape.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/layer_norm.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/layer_norm_with_reduce_mean.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/matmul_with_bias.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/matmul_with_bias_add.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/matmul_with_bias_gelu.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/matmul_with_bias_relu.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/matmul_with_bias_sigmoid.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/matmul_with_bias_tanh.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/merged_embeddingbag.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/output_data.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/padding_sequence.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/pattern.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/position_embeddings.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/position_embeddings_v1.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/qkv_merge.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/qkv_reshape.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/quantize_fusion.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/reshape_after_restore_hidden_states.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/reshape_before_and_after_attention_out_layer_norm_gather_elements.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/reshape_before_restore_hidden_states.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/reshape_fusion.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/restore_hidden_states_in_length_adaptive_update_indices.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/start_end_logits.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/subgraph_matcher.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/token_type_embeddings.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/token_type_embeddings_v1.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/transpose_batch_matmul.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/sub_graph/word_embeddings.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/compile/tf_utils.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/Deploy and Integration.md (94%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/Installation.md (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/add_customized_pattern.md (97%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/engine_inferencer.md (98%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/graph_fusion.md (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/imgs/compile_workflow.png (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/imgs/engine_adaptor_example.png (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/imgs/engine_adaptor_workflow.png (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/imgs/infrastructure.png (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/imgs/layernorm_bert_large_tf.png (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/imgs/layernorm_distilbert_base_onnx.png (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/imgs/layernorm_with_index.png (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/onnx_compile.md (97%) create mode 100644 intel_extension_for_transformers/backends/neural_engine/docs/onnx_quantize.md rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/operator_register.md (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/pattern_recognize.md (96%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/tensorflow_compile.md (96%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/docs/tensorflow_quantize.md (100%) create mode 100644 intel_extension_for_transformers/backends/neural_engine/docs/validated_model.md rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/CMakeLists.txt (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/common.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/conf.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/dataloader.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/dispatch_table.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/dispatcher.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/execution_options.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/executor.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/i_malloc.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/isa.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/llga_info.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/llga_operators/inner_product_graph.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/llga_operators/llga_kernel.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/llga_operators/llga_op_creator.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/llga_operators/softmax_graph.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/memory_allocator.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/model.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/op_tuning.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operator.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operator_registry.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/binary_add.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/concat.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/convolution.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/embeddingbag.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/erf.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/expand_indices.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/gather.hpp (97%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/gather_elements.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/gelu.hpp (98%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/group_norm.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/inner_product.hpp (99%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/input.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/layer_norm.hpp (98%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/matmul.hpp (99%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/merged_embeddingbag.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/one_hot.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/output.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/padding_sequence.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/position_ids.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/pow.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/quantize.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/range.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/reduce_mean.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/reduce_sum.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/reorder.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/reshape.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/scatter_elements.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/shape.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/slice.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/softmax.hpp (98%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/split.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/strided_slice.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/token_type_ids.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/topk.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/transpose.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/operators/unsqueeze.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/profiling.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/sparse_operators/sparse_inner_product.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/tensor.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/include/thread_pool.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/python/bind_executor.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/python/pybind_tensor.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/python/test_model.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/common.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/i_malloc.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/llga_operators/inner_product_graph.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/llga_operators/llga_kernel.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/llga_operators/llga_op_creator.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/llga_operators/softmax_graph.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/model.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/nlp_executor.cc (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/op_tuning.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/binary_add.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/concat.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/convolution.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/embeddingbag.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/erf.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/expand_indices.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/gather.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/gather_elements.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/gelu.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/group_norm.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/inner_product.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/input.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/layer_norm.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/matmul.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/merged_embeddingbag.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/one_hot.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/output.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/padding_sequence.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/position_ids.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/pow.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/quantize.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/range.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/reduce_mean.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/reduce_sum.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/reorder.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/reshape.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/scatter_elements.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/shape.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/slice.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/softmax.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/split.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/strided_slice.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/token_type_ids.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/topk.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/transpose.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/operators/unsqueeze.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/executor/src/sparse_operators/sparse_inner_product.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/README.md (78%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/cmake/FindVTune.cmake (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_amx_bf16x16_calc.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_amx_bf16x16_relayout.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_avx512f_pattern_base.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_avx512f_pattern_unroll4.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_vnni_calc.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_vnni_pattern.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_vnni_pattern_left_1x4.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_vnni_pattern_left_4x1.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_vnni_pattern_right_1x16.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_vnni_pattern_right_4x1.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/kernel_vnni_perf.png (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/imgs/relu_formula.svg (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/kernel_desc/kernel_amx.md (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/kernel_desc/kernel_avx512f.md (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/kernel_desc/kernel_vnni.md (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/kernel_desc/postop_injector.md (90%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/docs/profiling.md (91%) create mode 100644 intel_extension_for_transformers/backends/neural_engine/kernels/docs/validated_data.md rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/amx_utils.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/cpu_engine.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/cpu_isa.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/engine.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/engine_factory.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/impl_list_item.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/interface.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_amx_configure.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_binary_injector.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_eltwise_injector.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_eltwiseop.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_gather.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_layernorm_ba.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_matmul_avx512f_p2031_p2013.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_softmax.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_spmm_amx_bf16_x16.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_spmm_avx512f.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_spmm_vnni.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_domain/jit_trans_cpy_nx8_4b.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/jit_generator.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernel.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernel_cache.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernel_desc.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernel_hashing.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/attention.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/eltwiseop.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/eltwiseop_ref.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/eltwiseop_types.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/gather.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/gather_ref.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/gather_types.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/layernorm_ba.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/layernorm_ba_ref.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/layernorm_ba_types.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/matmul_avx512f_p2031_p2013.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/matmul_ref.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/matmul_types.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/matmul_vnni_noperm_p2031_p1302.hpp (100%) create mode 100644 intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/postop_types.hpp rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/softmax.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/softmax_ref.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/softmax_types.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/sparse_data.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/spmm_amx_bf16_x16.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/spmm_avx512f.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/spmm_ref.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/spmm_types.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/kernels/spmm_vnni.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/operator_desc.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/param_types.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/tensor_desc.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/utils.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/verbose.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/include/vtune_wrapper.hpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/amx_utils.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/cpu_engine.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/cpu_isa.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/interface.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_amx_configure.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_binary_injector.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_eltwise_injector.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_eltwiseop.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_gather.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_layernorm_ba.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_matmul_avx512f_p2031_p2013.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_softmax.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_spmm_amx_bf16_x16.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_spmm_avx512f.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_spmm_vnni.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_domain/jit_trans_cpy_nx8_4b.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/jit_generator.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernel_cache.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernel_desc.cpp (100%) create mode 100644 intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/CMakeLists.txt rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/attention.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/cpu_attention_list.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/cpu_eltwiseop_list.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/cpu_gather_list.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/cpu_layernorm_ba_list.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/cpu_softmax_list.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/cpu_sparse_matmul_list.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/cpu_transpose_matmul_list.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/eltwiseop.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/eltwiseop_ref.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/gather.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/gather_ref.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/layernorm_ba.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/layernorm_ba_ref.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/matmul_avx512f_p2031_p2013.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/matmul_ref.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/matmul_vnni_noperm_p2031_p1302.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/softmax.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/softmax_ref.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/sparse_data.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/spmm_amx_bf16_x16.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/spmm_avx512f.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/spmm_ref.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/kernels/spmm_vnni.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/utils.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/verbose.cpp (100%) rename {nlp_toolkit/backends/neural_engine/SparseLib => intel_extension_for_transformers/backends/neural_engine/kernels}/src/vtune_wrapper.cpp (100%) create mode 100644 intel_extension_for_transformers/backends/neural_engine/oneDNN-THIRD-PARTY-PROGRAMS rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/main.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_attention.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_eltwiseop_kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_gather_kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_layernorm_ba_kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_matmul_avx512f_p2031_p2013.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_matmul_vnni_noperm_p2031_p1302.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_softmax_kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_spmm_amx_bf16_x16_kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_spmm_avx512f_kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/test_spmm_vnni_kernel.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/gtest/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels}/unit_test_utils.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/main.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_binary_add_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_cast_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_concat_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_convolution_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_dequantize_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_div_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_embeddingbag_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_erf_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_expand_indices_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_gather_element_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_gather_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_gelu_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_group_norm_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_i_malloc.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_inner_product_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_layer_norm_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_matmul_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_mul_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_one_hot_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_padding_sequence_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_position_ids_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_pow_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_range_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_reduce_mean_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_reorder_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_shape_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_slice_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_softmax_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_sparse_inner_product.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_split_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_sqrt_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_strided_slice_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_sub_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_tanh_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_token_type_ids_op.cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/gtest/test_unsqueeze_op.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/README.md (94%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/benchmark.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/benchmark_utils.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/benchmark_utils.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/benchmark.sh (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/README.md (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/ci_amx_bf16_x16_input (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/ci_eltwiseop_input (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/ci_layernorm_ba_input (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/ci_matmul_avx512f_p2031_p2013_input (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/ci_matmul_vnni_noperm_p2031_p1302_input (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/ci_softmax_input (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/inputs/ci_vnni_input (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/run_ci.sh (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/ci/to_summary.sh (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/common_utils.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/common_utils.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/eltwiseop/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/eltwiseop/eltwiseop.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/eltwiseop/eltwiseop.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/layernorm_ba/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/layernorm_ba/layernorm_ba.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/layernorm_ba/layernorm_ba.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/softmax/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/softmax/softmax.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/softmax/softmax.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/sparse_matmul.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/sparse_matmul.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/spmm_amx_bf16_x16.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/spmm_amx_bf16_x16.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/spmm_avx512f.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/spmm_avx512f.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/spmm_vnni.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/sparse_matmul/spmm_vnni.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/transpose_matmul/CMakeLists.txt (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.hpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/transpose_matmul/transpose_matmul.cpp (100%) rename {nlp_toolkit/backends/neural_engine/test/SparseLib => intel_extension_for_transformers/backends/neural_engine/test/kernels}/benchmark/transpose_matmul/transpose_matmul.hpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/requirements.txt (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_add_cls_token.py (97%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py (91%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_attention_reshape.py (94%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_conv_reshape.py (91%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_embeddingbag.py (85%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_execution_options.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_gelu.py (89%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_graph_dispatch.py (97%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_insert_input_output_data.py (76%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_is_supported_onnx_node.py (82%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_last_layer_shape.py (89%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_layer_norm_with_reduce_mean.py (86%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_main.py (95%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_matmul_with_bias_relu.py (84%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_matmul_with_bias_sigmoid.py (84%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_matmul_with_bias_tanh.py (86%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_merged_embeddingbag.py (94%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_onnx_utils.py (92%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_ops.py (99%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_padding_sequence.py (97%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_pattern_dispatch.py (84%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_position_embeddings.py (90%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_qkv_merge.py (87%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_qkv_reshape.py (86%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_quant_onnx_execute.py (96%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_reshape_after_restore_hidden_states.py (87%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_reshape_before_and_after_attention_out_layer_norm_gather_elements.py (88%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_reshape_before_restore_hidden_states.py (86%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_start_end_logits.py (87%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_tf_utils.py (93%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_token_type_embeddings.py (94%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_transpose.py (93%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/test/pytest/test_transpose_batch_matmul.py (90%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/assert (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/config (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/container (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/container_hash (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/core (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/interprocess (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/intrusive (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/move (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/mp11 (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/predef (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/preprocessor (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/static_assert (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/throw_exception (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/tuple (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/type_traits (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/unordered (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/boost/libs/winapi (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/gflags (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/glog (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/googletest (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/jemalloc (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/oneDNN (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/oneDNNGraph (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/pybind11 (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/xbyak (100%) rename {nlp_toolkit => intel_extension_for_transformers}/backends/neural_engine/third_party/yaml-cpp (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/benchmark.py (94%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/config.py (98%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/distillation.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/dynamic/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/dynamic/drop_and_restore_utils.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/dynamic/evolution.py (98%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/mixture/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/mixture/auto_distillation.py (99%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/model.py (97%) create mode 100644 intel_extension_for_transformers/optimization/neural-compressor-third-party-programs.txt rename {nlp_toolkit => intel_extension_for_transformers}/optimization/optimizer.py (97%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/optimizer_tf.py (98%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pipeline.py (94%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pruning.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pytorch_pruner/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pytorch_pruner/patterns.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pytorch_pruner/prune_utils.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pytorch_pruner/pruner.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pytorch_pruner/pruning.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/pytorch_pruner/scheduler.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/quantization.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/trainer.py (99%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/metrics.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/models/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/models/modeling_bert_dynamic.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/models/modeling_roberta_dynamic.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/objectives.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/utility.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/optimization/utils/utility_tf.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/preprocessing/__init__.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/preprocessing/data_augmentation.py (99%) rename {nlp_toolkit => intel_extension_for_transformers}/preprocessing/utils.py (100%) rename {nlp_toolkit => intel_extension_for_transformers}/version.py (96%) delete mode 100644 nlp_toolkit/backends/neural_engine/docs/onnx_quantize.md delete mode 100644 nlp_toolkit/backends/openvino/nncf_utils.py create mode 100644 third_party_programs.txt diff --git a/.gitignore b/.gitignore index ea2b10d4378..22b9ac29c29 100644 --- a/.gitignore +++ b/.gitignore @@ -14,12 +14,12 @@ tags build/ _build dist/ -nlp_toolkit.egg-info/ +intel_extension_for_transformers.egg-info/ /.vs -/nlp_toolkit/backends/neural_engine/.vs -/nlp_toolkit/backends/neural_engine/out +/intel_extension_for_transformers/backends/neural_engine/.vs +/intel_extension_for_transformers/backends/neural_engine/out /examples/**/*.npy /examples/**/*.bin /examples/**/*.yaml diff --git a/.gitmodules b/.gitmodules index b142cf98db9..2daf17ca142 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,96 +1,96 @@ -[submodule "nlp_toolkit/backends/neural_engine/third_party/yaml-cpp"] - path = nlp_toolkit/backends/neural_engine/third_party/yaml-cpp +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/yaml-cpp"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/yaml-cpp url = https://github.com/jbeder/yaml-cpp.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/oneDNN"] - path = nlp_toolkit/backends/neural_engine/third_party/oneDNN +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/oneDNN"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/oneDNN url = https://github.com/oneapi-src/oneDNN.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/pybind11"] - path = nlp_toolkit/backends/neural_engine/third_party/pybind11 +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/pybind11"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/pybind11 url = https://github.com/pybind/pybind11.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/googletest"] - path = nlp_toolkit/backends/neural_engine/third_party/googletest +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/googletest"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/googletest url = https://github.com/google/googletest.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/glog"] - path = nlp_toolkit/backends/neural_engine/third_party/glog +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/glog"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/glog url = https://github.com/google/glog.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/gflags"] - path = nlp_toolkit/backends/neural_engine/third_party/gflags +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/gflags"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/gflags url = https://github.com/gflags/gflags.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/jemalloc"] - path = nlp_toolkit/backends/neural_engine/third_party/jemalloc +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/jemalloc"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/jemalloc url = https://github.com/jemalloc/jemalloc.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/xbyak"] - path = nlp_toolkit/backends/neural_engine/third_party/xbyak +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/xbyak"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/xbyak url = https://github.com/herumi/xbyak.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/assert"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/assert +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/assert"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/assert url = https://github.com/boostorg/assert.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/core"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/core +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/core"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/core url = https://github.com/boostorg/core.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/move"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/move +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/move"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/move url = https://github.com/boostorg/move.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/config"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/config +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/config"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/config url = https://github.com/boostorg/config.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/container"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/container +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container url = https://github.com/boostorg/container.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/intrusive"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/intrusive +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/intrusive"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/intrusive url = https://github.com/boostorg/intrusive.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/static_assert"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/static_assert +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/static_assert"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/static_assert url = https://github.com/boostorg/static_assert.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/type_traits"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/type_traits +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/type_traits"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/type_traits url = https://github.com/boostorg/type_traits.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/interprocess"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/interprocess +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/interprocess"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/interprocess url = https://github.com/boostorg/interprocess.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/unordered"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/unordered +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/unordered"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/unordered url = https://github.com/boostorg/unordered.git branch = 1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/container_hash"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/container_hash +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container_hash"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container_hash url = https://github.com/boostorg/container_hash.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/preprocessor"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/preprocessor +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/preprocessor"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/preprocessor url = https://github.com/boostorg/preprocessor.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/throw_exception"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/throw_exception +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/throw_exception"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/throw_exception url = https://github.com/boostorg/throw_exception.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/tuple"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/tuple +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/tuple"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/tuple url = https://github.com/boostorg/tuple.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/predef"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/predef +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/predef"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/predef url = https://github.com/boostorg/predef.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/mp11"] - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/mp11 +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/mp11"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/mp11 url = https://github.com/boostorg/mp11.git branch = boost-1.80.0 -[submodule "nlp_toolkit/backends/neural_engine/third_party/boost/libs/winapi"] +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/winapi"] branch = boost-1.80.0 - path = nlp_toolkit/backends/neural_engine/third_party/boost/libs/winapi + path = intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/winapi url = https://github.com/boostorg/winapi.git -[submodule "nlp_toolkit/backends/neural_engine/third_party/oneDNNGraph"] - path = nlp_toolkit/backends/neural_engine/third_party/oneDNNGraph +[submodule "intel_extension_for_transformers/backends/neural_engine/third_party/oneDNNGraph"] + path = intel_extension_for_transformers/backends/neural_engine/third_party/oneDNNGraph url = https://github.com/oneapi-src/oneDNN.git branch = dev-graph diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000000..2623aa5267d --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + ============================================================================ + + Copyright 2016-2019 Intel Corporation + Copyright 2018 YANDEX LLC + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + This distribution includes third party software ("third party programs"). + This third party software, even if included with the distribution of + the Intel software, may be governed by separate license terms, including + without limitation, third party license terms, other Intel software license + terms, and open source software license terms. These separate license terms + govern your use of the third party programs as set forth in the + "THIRD-PARTY-PROGRAMS" file. \ No newline at end of file diff --git a/README.md b/README.md index c9b3b876856..9ff61514b36 100644 --- a/README.md +++ b/README.md @@ -1,38 +1,51 @@ -# NLP Toolkit: Optimization for Natural Language Processing (NLP) Models -NLP Toolkit is a powerful toolkit for automatically applying model optimizations on Natural Language Processing Models. It leverages [Intel® Neural Compressor](https://intel.github.io/neural-compressor) to provide a variety of model compression techniques: quantization, pruning, distillation and so on. +# Intel® Extension for Transformers: Accelerating Transformer-based Models on Intel Platforms +Intel® Extension for Transformers is an innovative toolkit to accelerate Transformer-based models on Intel platforms. The toolkit helps developers to improve the productivity through ease-of-use model compression APIs by extending Hugging Face transformers APIs. The compression infrastructure leverages Intel® Neural Compressor which provides a rich set of model compression techniques: quantization, pruning, distillation and so on. The toolkit provides Transformers-accelerated Libraries and Neural Engine to demonstrate the performance of extremely compressed models, and therefore significantly improve the inference efficiency on Intel platforms. Some of the key features have been published in NeurIPS 2021 and 2022. -## What does NLP Toolkit offer? -This toolkit allows developers to improve the productivity through ease-of-use model compression APIs by extending HuggingFace transformer APIs for deep learning models in NLP (Natural Language Processing) domain and accelerate the inference performance using compressed models. +## What does Intel® Extension for Transformers offer? +This toolkit helps developers to improve the productivity of inference deployment by extending Hugging Face transformers APIs for Transformer-based models in natural language processing (NLP) domain. With extremely compressed models, the toolkit can greatly improve the inference efficiency on Intel platforms. - Model Compression - |Framework |Quantization |Pruning/Sparsity |Distillation |AutoDistillation | - |-------------------|:-----------:|:---------------:|:-----------:|:--------------:| - |PyTorch |✔ |✔ |✔ |✔ | - |TensorFlow |✔ |✔ |Stay tuned :star:|Stay tuned :star:| + |Framework |Quantization |Pruning/Sparsity |Distillation |Neural Architecture Search | + |-------------------|:-----------:|:---------------:|:-----------:|:-------------------------:| + |PyTorch |✔ |✔ |✔ |✔ | + |TensorFlow |✔ |✔ |✔ |Stay tuned :star: | - Data Augmentation for NLP Datasets -- Neural Engine for Reference Deployment - -## Getting Started -### Installation -#### Install Dependency +- Transformers-accelerated Neural Engine +- Transformers-accelerated Libraries +- Domain Algorithms + |Length Adaptive Transformer | + |:--------------------------:| + |PyTorch ✔ | + +- Architecture of Intel® Extension for Transformers +arch +
+ +## Installation +### Release Binary Install ```bash -pip install -r requirements.txt +pip install intel-extension-for-transformers ``` -#### Install NLP Toolkit +### Install From Source +#### Install Intel® Extension for Transformers ```bash -git clone https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit.git nlp_toolkit -cd nlp_toolkit +git clone https://github.com/intel/intel-extension-for-transformers.git intel_extension_for_transformers +cd intel_extension_for_transformers +# Install Dependency +pip install -r requirements.txt git submodule update --init --recursive +# Install intel_extension_for_transformers python setup.py install ``` +## Getting Started ### Quantization ```python -from nlp_toolkit import QuantizationConfig, metric, objectives -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import QuantizationConfig, metric, objectives +from intel_extension_for_transformers.optimization.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(...) @@ -50,8 +63,8 @@ Please refer to [quantization document](docs/quantization.md) for more details. ### Pruning ```python -from nlp_toolkit import PrunerConfig, PruningConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import PrunerConfig, PruningConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(...) @@ -66,8 +79,8 @@ Please refer to [pruning document](docs/pruning.md) for more details. ### Distillation ```python -from nlp_toolkit import DistillationConfig, Criterion -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import DistillationConfig, Criterion +from intel_extension_for_transformers.optimization.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(...) @@ -84,7 +97,7 @@ Please refer to [distillation document](docs/distillation.md) for more details. Data augmentation provides the facilities to generate synthesized NLP dataset for further model optimization. The data augmentation supports text generation on popular fine-tuned models like GPT, GPT2, and other text synthesis approaches from [nlpaug](https://github.com/makcedward/nlpaug). ```python -from nlp_toolkit.preprocessing.data_augmentation import DataAugmentation +from intel_extension_for_transformers.preprocessing.data_augmentation import DataAugmentation aug = DataAugmentation(augmenter_type="TextGenerationAug") aug.input_dataset = "original_dataset.csv" # example: https://huggingface.co/datasets/glue/viewer/sst2/train aug.column_names = "sentence" @@ -97,10 +110,10 @@ raw_datasets = load_dataset("csv", data_files=aug.output_path, delimiter="\t", s Please refer to [data augmentation document](docs/data_augmentation.md) for more details. ### Neural Engine -Neural Engine is one of reference deployments that NLP toolkit provides. Neural Engine aims to demonstrate the optimal performance of extremely compressed NLP models by exploring the optimization opportunities from both HW and SW. +Neural Engine is one of reference deployments that Intel Extension for Transformers provides. Neural Engine aims to demonstrate the optimal performance of extremely compressed NLP models by exploring the optimization opportunities from both HW and SW. ```python -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile # /path/to/your/model is a TensorFlow pb model or ONNX model model = compile('/path/to/your/model') inputs = ... # [input_ids, segment_ids, input_mask] @@ -112,8 +125,8 @@ Please refer to [Neural Engine](examples/deployment/) for more details. ### Quantized Length Adaptive Transformer Quantized Length Adaptive Transformer leverages sequence-length reduction and low-bit representation techniques to further enhance model inference performance, enabling adaptive sequence-length sizes to accommodate different computational budget requirements with an optimal accuracy efficiency tradeoff. ```python -from nlp_toolkit import QuantizationConfig, DynamicLengthConfig, metric, objectives -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import QuantizationConfig, DynamicLengthConfig, metric, objectives +from intel_extension_for_transformers.optimization.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(...) @@ -131,4 +144,77 @@ trainer.set_dynamic_config(dynamic_config=dynamic_length_config) model = trainer.quantize(quant_config=q_config) ``` -Please refer to paper [QuaLA-MiniLM](https://arxiv.org/pdf/2210.17114.pdf) and [code](examples/optimization/pytorch/huggingface/question-answering/dynamic) for details \ No newline at end of file +Please refer to paper [QuaLA-MiniLM](https://arxiv.org/pdf/2210.17114.pdf) and [code](examples/optimization/pytorch/huggingface/question-answering/dynamic) for details + + +### Transformers-accelerated Neural Engine +Transformers-accelerated Neural Engine is one of reference deployments that Intel® Extension for Transformers provides. Neural Engine aims to demonstrate the optimal performance of extremely compressed NLP models by exploring the optimization opportunities from both HW and SW. + +```python +from intel_extension_for_transformers.backends.neural_engine.compile import compile +# /path/to/your/model is a TensorFlow pb model or ONNX model +model = compile('/path/to/your/model') +inputs = ... # [input_ids, segment_ids, input_mask] +model.inference(inputs) +``` + +Please refer to [example](examples/deployment/neural_engine/sparse/distilbert_base_uncased/) in [Transformers-accelerated Neural Engine](examples/deployment/) and paper [Fast Distilbert on CPUs](https://arxiv.org/abs/2211.07715) for more details. + +### Transformers-accelerated Libraries +Transformers-accelerated Libraries is a high-performance operator computing library implemented by assembly. Transformers-accelerated Libraries contains a JIT domain, a kernel domain, and a scheduling proxy framework. + +```C++ +#include "interface.hpp" + ... + operator_desc op_desc(ker_kind, ker_prop, eng_kind, ts_descs, op_attrs); + sparse_matmul_desc spmm_desc(op_desc); + sparse_matmul spmm_kern(spmm_desc); + std::vector rt_data = {data0, data1, data2, data3, data4}; + spmm_kern.execute(rt_data); +``` +Please refer to [Transformers-accelerated Libraries](intel_extension_for_transformers/backends/neural_engine/kernels/README.md) for more details. + + +## System Requirements +### Validated Hardware Environment +Intel® Extension for Transformers supports systems based on [Intel 64 architecture or compatible processors](https://en.wikipedia.org/wiki/X86-64) that are specifically optimized for the following CPUs: + +* Intel Xeon Scalable processor (formerly Cascade Lake, Icelake) +* Future Intel Xeon Scalable processor (code name Sapphire Rapids) + +### Validated Software Environment + +* OS version: CentOS 8.4, Ubuntu 20.04 +* Python version: 3.7, 3.8, 3.9, 3.10 + + + + + + + + + + + + + + + + + +
FrameworkTensorFlowIntel TensorFlowPyTorchIPEX
Version2.10.0
+ 2.9.1
+
2.10.0
+ 2.9.1
+
1.13.0+cpu
+ 1.12.0+cpu
+
1.13.0
+ 1.12.0
+ + +## Selected Publications/Events +* NeurIPS'2022: [Fast Distilbert on CPUs](https://arxiv.org/abs/2211.07715) (Nov 2022) +* NeurIPS'2022: [QuaLA-MiniLM: a Quantized Length Adaptive MiniLM](https://arxiv.org/abs/2210.17114) (Nov 2022) +* Blog published by Alibaba: [Deep learning inference optimization for Address Purification](https://zhuanlan.zhihu.com/p/552484413) (Aug 2022) +* NeurIPS'2021: [Prune Once for All: Sparse Pre-Trained Language Models](https://arxiv.org/abs/2111.05754) (Nov 2021) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000000..71a71eff1b6 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,13 @@ +Security Policy +=============== + +## Report a Vulnerability + +Please report security issues or vulnerabilities to the [Intel® Security Center]. + +For more information on how Intel® works to resolve security issues, see +[Vulnerability Handling Guidelines]. + +[Intel® Security Center]:https://www.intel.com/security + +[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html diff --git a/conda_meta/meta.yaml b/conda_meta/meta.yaml new file mode 100644 index 00000000000..f6f62d8e75c --- /dev/null +++ b/conda_meta/meta.yaml @@ -0,0 +1,41 @@ +{% set version = "1.0b" %} +{% set buildnumber = 0 %} +package: + name: intel_extension_for_transformers + version: {{version}} +build: + script_env: + - IMEX_WHL + number: {{buildnumber}} + script: pip install --no-deps {{IMEX_WHL}} +requirements: + build: + - python + - pip + run: + - python + - pip + - numpy + - transformers + - packaging +<<<<<<< HEAD + - neural-compressor + - protobuf +======= +>>>>>>> cc446a0 (add conda meta yaml and fix some types (#409)) +test: + imports: + - intel_extension_for_transformers +about: + home: https://github.com/intel/intel_extension_for_transformers + license: Apache 2.0 + license_family: Apache + license_file: ../LICENSE + description: ' + LEGAL NOTICE: Use of this software package is subject to the software license agreement (as set forth above, in the license section of the installed Conda package and/or the README file) and all notices, disclaimers or license terms for third party or open source software included in or with the software. +

+ EULA: Apache 2.0
+ Third Party Programs: https://github.com/intel/intel_extension_for_transformers/blob/1.0a/third_party_programs.txt +

+ Intel® Extension for Transformers. + ' diff --git a/docs/AutoDistillation design.md b/docs/AutoDistillation design.md index 664cd1c9541..ec529056d0b 100644 --- a/docs/AutoDistillation design.md +++ b/docs/AutoDistillation design.md @@ -11,7 +11,7 @@ In Model Exploration, a search engine will search for a better compressed model Flash Distillation is the stage for training the searched model to discover its potential.
-In Evaluation stage, trained model will be evaluated to measure its performances (e.g. the prediction accuracy, the hardware performance etc.) inorder to select the best model architecture. +In Evaluation stage, the trained model will be evaluated to measure its performances (e.g. the prediction accuracy, the hardware performance etc.) in order to select the best model architecture.
For implementing AutoDistillation, a framework class ***'AutoDistillation'*** is designed for excuting the total pipeline, and a criterion class ***'IntermediateLayersKnowledgeDistillationLoss'*** is designed for handling Flash Distillation with existing Distillation class. @@ -34,19 +34,19 @@ Within each iteration, ***model_arch_proposition*** method will propose a promis **1. search_space** (e.g. {'hidden_size':[64, 128], 'layer_num':[4, 8]})
-**2. model_builder** (function for building model instance based on the specific sample point in the search space, ***need provided by user***) +**2. model_builder** (the function for building model instance based on the specific sample point in the search space, ***need provided by user***)
-**3. advisor** (search algorithm instance e.g. Bayesian Optimization, Random Search) +**3. advisor** (the search algorithm instance e.g. Bayesian Optimization, Random Search)
-**4. train_func** (train function to train the model) +**4. train_func** (the train function to train the model)
-**5. eval_func** (evaluation function to evaluate the model) +**5. eval_func** (the evaluation function to evaluate the model)
-**6. config** (configuration, ***need provided by user***) +**6. config** (the configuration, ***need provided by user***)
**7. search_result** (store results of the search process) @@ -62,7 +62,7 @@ Within each iteration, ***model_arch_proposition*** method will propose a promis **2. search_loop** (begin search iterations)
-**3. train_evaluate** (process of one search iteration to train and evaluate the model proposed by search algorithm) +**3. train_evaluate** (the process of one search iteration to train and evaluate the model proposed by search algorithm) ## **3. Criterion Class Design** ***IntermediateLayersKnowledgeDistillationLoss*** is designed for calculating the knowledge distillation loss of the intermediate layer features. @@ -71,7 +71,7 @@ Within each iteration, ***model_arch_proposition*** method will propose a promis To deal the issue of dimension mismatch between the intermediate layer features of the teacher model and the student model, feature_matchers is provided for matching the features dimension.
-For example, shape of a feature from the teacher model is (8, 512), shape of a corresponding feature from the student model is (8, 128), then feature_matcher will be a linear transformation layer whose weight has a shape of (128, 512). +For example, the shape of a feature from the teacher model is (8, 512), and the shape of a corresponding feature from the student model is (8, 128), then the feature_matcher will be a linear transformation layer whose weight has a shape of (128, 512). ### **Class IntermediateLayersKnowledgeDistillationLoss** @@ -111,7 +111,7 @@ For example, shape of a feature from the teacher model is (8, 512), shape of a c **2. init_feature_matcher** (initialize the feature_matcher instance)
-**3. teacher_model_forward** (run forward for teacher_model) +**3. teacher_model_forward** (run forward for the teacher_model)
**4. loss_cal** (calculate loss) @@ -252,7 +252,7 @@ trainer.autodistillation_config = { best_model_arch = trainer.autodistillation(teacher_model, model_builder, train_func=train_func, eval_func=eval_func) ``` -### **flash distillation config example** +### **Flash distillation config example** ```yaml model: @@ -275,7 +275,7 @@ distillation: add_origin_loss: False ``` -### **regular distillation config example** +### **Regular distillation config example** ```yaml model: diff --git a/docs/benchmark.md b/docs/benchmark.md index 8bb12e9bb0b..158fe2a7308 100644 --- a/docs/benchmark.md +++ b/docs/benchmark.md @@ -1,6 +1,6 @@ # Benchmark The Benchmark is inherited from transformers [Benchmark] -(https://github.com/huggingface/transformers/blob/main/docs/source/en/benchmarks.mdx). Right now, The classes `PyTorchBenchmark` and `ExecutorBenchmark` can help users to check performance benefit and model size benefit of FP32 and INT8 model on PyTorch and Executor backends. +(https://github.com/huggingface/transformers/blob/main/docs/source/en/benchmarks.mdx). Right now, The classes `PyTorchBenchmark` and `ExecutorBenchmark` can help users to check the performance benefit and the model size benefit of FP32 and INT8 model on PyTorch and Executor backends. The benchmark classes [PyTorchBenchmark] and [ExecutorBenchmark] expect an object of type [PyTorchBenchmarkArguments] and [ExecutorBenchmarkArguments]. @@ -8,10 +8,10 @@ The benchmark classes [PyTorchBenchmark] and [ExecutorBenchmark] expect an objec ## PyTorchBenchmark -The PyTorchBenchmark is only for inference when the input model is an INT8 model. +The PyTorchBenchmark is only used for inference when the input model is an INT8 model. ```py -from nlp_toolkit.optimization.benchmark import PyTorchBenchmark, PyTorchBenchmarkArguments +from intel_extension_for_transformers.optimization.benchmark import PyTorchBenchmark, PyTorchBenchmarkArguments MODEL_ID_FP32 = "distilbert-base-uncased-finetuned-sst-2-english" MODEL_ID_INT8 = "Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-static" @@ -55,10 +55,10 @@ Intel/distilbert-base-uncased- 1 128 0.017 ## ExecutorBenchmark -The ExecutorBenchmark is only for inference. The ONNX model is generated by our `export_to_onnx` API. For more details, please [go to](export.md). The MODEL_NAME is the pytorch model name you used for exporting the ONNX model. +The ExecutorBenchmark is only used for inference. The ONNX model is generated by our `export_to_onnx` API. For more details, please [go to](export.md). The MODEL_NAME is the pytorch model name you used for exporting the ONNX model. ```py -from nlp_toolkit.optimization.benchmark import ExecutorBenchmark, ExecutorBenchmarkArguments +from intel_extension_for_transformers.optimization.benchmark import ExecutorBenchmark, ExecutorBenchmarkArguments from transformers import AutoConfig MODEL_ID_FP32 = 'fp32.onnx' diff --git a/docs/data_augmentation.md b/docs/data_augmentation.md index 99d25df14e9..2504ee9627a 100644 --- a/docs/data_augmentation.md +++ b/docs/data_augmentation.md @@ -1,5 +1,5 @@ # Data Augmentation: The Tool for Augmenting NLP Datasets -Data Augmentation is a tool to helps you with augmenting nlp datasets for your machine learning projects. The tool integrated [nlpaug](https://github.com/makcedward/nlpaug) and other methods from Intel Lab. +Data Augmentation is a tool to helps you with augmenting nlp datasets for your machine learning projects. This tool integrates [nlpaug](https://github.com/makcedward/nlpaug) and other methods from Intel Lab. ## Getting Started! ### Installation @@ -7,16 +7,16 @@ Data Augmentation is a tool to helps you with augmenting nlp datasets for your m pip install nlpaug pip install transformers>=4.12.0 -#### Install Nlp-toolkit -git clone https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit.git nlp_toolkit -cd nlp_toolkit +#### Install Intel_Extension_for_Transformers +git clone https://github.com/intel/intel-extension-for-transformers.git intel_extension_for_transformers +cd intel_extension_for_transformers git submodule update --init --recursive python setup.py install ### Data Augmentation #### Script(Please refer to [example](tests/test_data_augmentation.py)) ```python - from nlp_toolkit.preprocessing.data_augmentation import DataAugmentation + from intel_extension_for_transformers.preprocessing.data_augmentation import DataAugmentation aug = DataAugmentation(augmenter_type="TextGenerationAug") aug.input_dataset = "dev.csv" aug.output_path = os.path.join(self.result_path, "test1.cvs") @@ -49,7 +49,7 @@ python setup.py install |"ContextualWordEmbsForSentenceAug"|refer to ["ContextualWordEmbsForSentenceAug"](https://github.com/makcedward/nlpaug/blob/40794970124c26ce2e587e567738247bf20ebcad/nlpaug/augmenter/sentence/context_word_embs_sentence.py#L77) | | #### Text Generation Augmenter -The text generation augment contains the recipe to run data augmentation algorithm based on conditional text generation using auto-regressive transformer model (like GPT, GPT-2, Transformer-XL, XLNet, CTRL) in order to automatically generate labeled data. +The text generation augment contains the recipe to run data augmentation algorithm based on the conditional text generation using auto-regressive transformer model (like GPT, GPT-2, Transformer-XL, XLNet, CTRL) in order to automatically generate labeled data. Our approach follows algorithms described by [Not Enough Data? Deep Learning to the Rescue!](https://arxiv.org/abs/1911.03118) and [Natural Language Generation for Effective Knowledge Distillation](https://www.aclweb.org/anthology/D19-6122.pdf). - First, we fine-tune an auto-regressive model on the training set. Each sample contains both the label and the sentence. @@ -58,7 +58,7 @@ Our approach follows algorithms described by [Not Enough Data? Deep Learning to example: ```python from datasets import load_dataset - from nlp_toolkit.preprocessing.utils import EOS + from intel_extension_for_transformers.preprocessing.utils import EOS for split in {'train', 'validation'}: dataset = load_dataset('glue', 'sst2', split=split) with open('SST-2/' + split + '.txt', 'w') as fw: @@ -66,9 +66,9 @@ Our approach follows algorithms described by [Not Enough Data? Deep Learning to fw.write(str(d['label']) + '\t' + d['sentence'] + EOS + '\n') ``` - - Fine-tuning Causal Language Model + - Fine-tune Causal Language Model - You can use the script [run_clm.py](https://github.com/huggingface/transformers/tree/v4.6.1/examples/pytorch/language-modeling/run_clm.py) from transformers examples for fine-tuning GPT2 (gpt2-medium) on SST-2. The loss is that of causal language modeling. + You can use the script [run_clm.py](https://github.com/huggingface/transformers/tree/v4.6.1/examples/pytorch/language-modeling/run_clm.py) from transformers examples for fine-tuning GPT2 (gpt2-medium) on SST-2 task. The loss is that of causal language modeling. ```shell DATASET=SST-2 @@ -90,7 +90,7 @@ Our approach follows algorithms described by [Not Enough Data? Deep Learning to - Second, we generate labeled data. Given class labels sampled from the training set, we use the fine-tuned language model to predict sentences with below script: ```python - from nlp_toolkit.preprocessing.data_augmentation import DataAugmentation + from intel_extension_for_transformers.preprocessing.data_augmentation import DataAugmentation aug = DataAugmentation(augmenter_type="TextGenerationAug") aug.input_dataset = "/your/original/training_set.csv" aug.output_path = os.path.join(self.result_path, "/your/augmented/dataset.cvs") @@ -104,8 +104,8 @@ This data augmentation algorithm can be used in several scenarios, like model di augmenter_arguments: |parameter |Type|Description |default value | |:---------|:---|:---------------------------------------------------|:-------------| -|"model_name_or_path"|String|Language modeling model to generate data, refer to [line](nlp_toolkit/preprocessing/data_augmentation.py#L181)|NA| -|"stop_token"|String|Stop token used in input data file |[EOS](nlp_toolkit/preprocessing/utils.py#L7)| +|"model_name_or_path"|String|Language modeling model to generate data, refer to [line](intel_extension_for_transformers/preprocessing/data_augmentation.py#L181)|NA| +|"stop_token"|String|Stop token used in input data file |[EOS](intel_extension_for_transformers/preprocessing/utils.py#L7)| |"num_return_sentences"|Integer|Total samples to generate, -1 means the number of the input samples |-1| |"temperature"|float|parameter for CLM model |1.0| |"k"|float|top K |0.0| diff --git a/docs/distillation.md b/docs/distillation.md index 203104fcfd1..6f5c52b25b9 100644 --- a/docs/distillation.md +++ b/docs/distillation.md @@ -1,6 +1,6 @@ # Distillation ## Introduction -Knowledge distillation is one of popular approaches of network compression, which transfers knowledge from a large model to a smaller one without loss of validity. As smaller models are less expensive to evaluate, they can be deployed on less powerful hardware (such as a mobile device). Graph shown below is the workflow of the distillation, the teacher model will take the same input that feed into the student model to produce the output that contains knowledge of the teacher model to instruct the student model. +Knowledge distillation is one of popular approaches of the network compression, which transfers knowledge from a large model to a smaller one without loss of validity. As smaller models are less expensive to be evaluated, they can be deployed on less powerful hardwares (such as mobile devices). The graph shown below is the workflow of the distillation, the teacher model will take the same input that feed into the student model to produce the output that contains knowledge of the teacher model to instruct the student model.
![Distillation Workflow](./imgs/Distillation_workflow.png)
@@ -8,8 +8,8 @@ Knowledge distillation is one of popular approaches of network compression, whic ## usage ### script: ```python -from nlp_toolkit import metric, objectives, DistillationConfig, Criterion -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metric, objectives, DistillationConfig, Criterion +from intel_extension_for_transformers.optimization.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(......) trainer = NLPTrainer(......) @@ -23,7 +23,7 @@ model = trainer.distill( Please refer to [example](../examples/optimize/pytorch/huggingface/text-classification/distillation/run_glue.py) for the details. ### Create an instance of Metric -The Metric define which metric will used to measure the performance of tuned models. +The Metric defines which metric will be used to measure the performance of tuned models. - example: ```python metric = metrics.Metric(name="eval_accuracy") @@ -66,7 +66,7 @@ The DistillationConfig contains all the information related to the model distill ### Distill with Trainer - Distill with Trainer - NLPTrainer inherits from transformers.Trainer, so you can create trainer like you do in transformers examples. Then you can distill model with trainer.distill function. + NLPTrainer inherits from transformers.Trainer, so you can create a trainer as in examples of Transformers. Then you can distill model with trainer.distill function. ```python model = trainer.distill( distillation_config=d_conf, teacher_model=teacher_model diff --git a/docs/examples.md b/docs/examples.md index d3e123d42d7..7095149e1d2 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -1,6 +1,6 @@ Examples === -NLP Toolkit is a powerful toolkit with multiple model optimization techniques for Natural Language Processing Models, including quantization, pruning, distillation, auto distillation and orchestrate. Meanwhile NLP Toolkit provides Neural Engine, an optimized backend for NLP models to demonstrate the deployment. +Intel Extension for Transformers is a powerful toolkit with multiple model optimization techniques for Natural Language Processing Models, including quantization, pruning, distillation, auto distillation and orchestrate. Meanwhile Intel Extension for Transformers provides Neural Engine, an optimized backend for NLP models to demonstrate the deployment. ## Quantization ### Stock PyTorch Examples @@ -12,8 +12,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo Dataset PostTrainingDynamic PostTrainingStatic - QuantizationAwareTraining - No Trainer quantization @@ -23,8 +21,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo wikitext ✔ ✔ - WIP :star: - WIP :star: xlnet-base-cased @@ -32,8 +28,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo wikitext ✔ ✔ - WIP :star: - WIP :star: EleutherAI/gpt-neo-125M @@ -41,8 +35,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo wikitext ✔ ✔ - WIP :star: - WIP :star: sshleifer/tiny-ctrl @@ -50,8 +42,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo wikitext WIP :star: ✔ - WIP :star: - WIP :star: ehdwns1516/bert-base-uncased_SWAG @@ -59,8 +49,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo swag ✔ ✔ - WIP :star: - WIP :star: distilbert-base-uncased-distilled-squad @@ -68,8 +56,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SQuAD ✔ ✔ - WIP :star: - WIP :star: lvwerra/pegasus-samsum @@ -77,8 +63,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo samsum ✔ WIP :star: - WIP :star: - WIP :star: textattack/bert-base-uncased-MRPC @@ -86,8 +70,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo MRPC ✔ ✔ - ✔ - WIP :star: echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybrid @@ -95,8 +77,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SST-2 ✔ ✔ - WIP :star: - ✔ distilbert-base-uncased-finetuned-sst-2-english @@ -104,8 +84,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SST-2 ✔ ✔ - WIP :star: - WIP :star: elastic/distilbert-base-uncased-finetuned-conll03-english @@ -113,8 +91,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo conll2003 ✔ ✔ - WIP :star: - WIP :star: t5-small @@ -122,8 +98,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo wmt16 ✔ WIP :star: - WIP :star: - WIP :star: Helsinki-NLP/opus-mt-en-ro @@ -131,12 +105,37 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo wmt16 ✔ WIP :star: - WIP :star: - WIP :star: + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelTaskDatasetQuantizationAwareTrainingNo Trainer quantization
textattack/bert-base-uncased-MRPCtext-classificationMRPC
echarlaix/bert-base-uncased-sst2-acc91.1-d37-hybridtext-classificationSST-2
### IPEX examples @@ -147,8 +146,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo - - @@ -156,19 +153,15 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo - + - - - + - -
Dataset PostTrainingDynamic PostTrainingStaticQuantizationAwareTrainingNo Trainer quantization
distilbert-base-uncased-distilled-squad question-answering SQuADWIP :star: WIP :star:WIP :star:
bert-large-uncased-whole-word-maskinuned-squad question-answering SQuADWIP :star: WIP :star:WIP :star:
@@ -182,8 +175,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo Dataset PostTrainingDynamic PostTrainingStatic - QuantizationAwareTraining - No Trainer quantization @@ -193,13 +184,91 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo MRPC WIP :star: ✔ - WIP :star: - WIP :star: + +## Dynamic-Length Transformer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Model NameDatatypeOptimization MethodModelsize (MB)InferenceResult
Accuracy(F1)Latency(ms)GFLOPS**Speedup(comparedwith BERT Base)
BERT Basefp32None415.4788.5856.5635.31x
LA-MiniLMfp32Drop and restore base MiniLMv2115.0489.2816.994.763.33x
LA-MiniLM(269, 253, 252, 202, 104, 34)*fp32Evolution search (best config)115.0487.7611.442.494.94x
QuaLA-MiniLMint8Quantization base LA-MiniLM84.8588.857.844.767.21x
QuaLA-MiniLM(315,251,242,159,142,33)*int8Evolution search (best config)84.8687.686.412.558.82x
+NOTES: * length config apply to LA model + + +NOTES: ** the multiplication and addition operation amount when model inference (GFLOPS is obtained from torchprofile tool) + + +Data is tested on Intel Xeon Platinum 8280 Scalable processor. Configuration detail please refer to [examples](../examples/optimization/pytorch/huggingface/question-answering/dynamic/README.md) + + ## Pruning @@ -237,7 +306,7 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SST-2 BasicMagnitude Unstructured - Stock PyTorch/
      Intel TensorFlow + Stock PyTorch/   Intel TensorFlow @@ -345,14 +414,11 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo Model Task Dataset - Dense - Sparse + Datatype INT8 BF16 - INT8 - BF16 @@ -362,8 +428,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SQuAD ✔ ✔ - WIP :star: - WIP :star: bhadresh-savani/distilbert-base-uncased-emotion @@ -371,8 +435,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo emotion ✔ ✔ - WIP :star: - WIP :star: textattack/bert-base-uncased-MRPC @@ -380,8 +442,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo MRPC ✔ ✔ - WIP :star: - WIP :star: textattack/distilbert-base-uncased-MRPC @@ -389,8 +449,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo MRPC ✔ ✔ - WIP :star: - WIP :star: Intel/roberta-base-mrpc @@ -398,8 +456,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo MRPC ✔ ✔ - WIP :star: - WIP :star: M-FAC/bert-mini-finetuned-mrpc @@ -407,8 +463,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo MRPC ✔ ✔ - WIP :star: - WIP :star: gchhablani/bert-base-cased-finetuned-mrpc @@ -416,8 +470,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo MRPC ✔ ✔ - WIP :star: - WIP :star: distilbert-base-uncased-finetuned-sst-2-english @@ -425,8 +477,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SST-2 ✔ ✔ - WIP :star: - WIP :star: philschmid/MiniLM-L6-H384-uncased-sst2 @@ -434,8 +484,6 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SST-2 ✔ ✔ - WIP :star: - WIP :star: moshew/bert-mini-sst2-distilled @@ -443,17 +491,41 @@ NLP Toolkit is a powerful toolkit with multiple model optimization techniques fo SST-2 ✔ ✔ - WIP :star: + + + + +## Sparse Reference Deployment on Neural Engine + + + + + + + + + + + + + + + + + + + + + - -
ModelTaskDatasetDatatype
INT8BF16
Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofaquestion-answeringSQuAD WIP :star:
Intel/bert-mini-sst2-distilled-sparse-90-1X4-block text-classification SST-2N/AN/A WIP :star:
+ diff --git a/docs/export.md b/docs/export.md index aaaa2d580e5..1e4c2a42cc5 100644 --- a/docs/export.md +++ b/docs/export.md @@ -1,14 +1,37 @@ # Export to ONNX +1. [Introduction](#introduction) + +2. [Supported Model Export Matrix](#supported-model-export-matrix) + +3. [Examples](#examples) + + 3.1. [Export to FP32 ONNX Model](#export-to-fp32-onnx-model) + + 3.2. [Export to BF16 ONNX Model](#export-to-bf16-onnx-model) + + 3.3. [Export to INT8 ONNX Model](#export-to-int8-onnx-model) + + +## Introduction We support exporting PyTorch models into ONNX models with our well-desighed API `trainer.export_to_onnx`. Users can get FP32 (Float precision 32 bit), BF16 (Bfloat 16 bit) and INT8 (Integer 8 bit) ONNX model with the same interface. ----- -## Export FP32 model +## Supported Model Export Matrix -If `export_to_onnx` is called before quantization, we will fetch the FP32 model and export it into a ONNX model. +| Input Model | Export FP32 | Export BF16 | Export INT8 | +| --- | --- | --- | --- | +| FP32 PyTorch Model | ✔ | ✔ | / | +| INT8 PyTorch Model
(PostTrainingDynamic) | / | / | ✔ | +| INT8 PyTorch Model
(PostTrainingStatic) | / | / | ✔ | +| INT8 PyTorch Model
(QuantizationAwareTraining) | / | / | ✔ | -### API usage + +## Examples + +### Export to FP32 ONNX Model + +If `export_to_onnx` is called before quantization, we will fetch the FP32 model and export it into a ONNX model. ```py trainer.export_to_onnx( @@ -18,11 +41,10 @@ trainer.export_to_onnx( [verbose=True,] ) ``` ----- -## Export BF16 model +### Export to BF16 ONNX Model -If the flag: `enable_bf16` is True, you will get an ONNX model with BFloat16 weights for ['MatMul', 'Gemm'] node type. This FP32 + BF16 ONNX model can be accelerated by our [executor](../nlp_toolkit/backends/neural_engine/) backend. +If the flag: `enable_bf16` is True, you will get an ONNX model with BFloat16 weights for ['MatMul', 'Gemm'] node type. This FP32 + BF16 ONNX model can be accelerated by our [executor](../intel_extension_for_transformers/backends/neural_engine/) backend. ### API usage @@ -35,14 +57,11 @@ trainer.export_to_onnx( [verbose=True,] ) ``` ----- -## Export INT8 model +### Export to INT8 ONNX Model If `export_to_onnx` is called after quantization, we will fetch the FP32 PyTorch model, convert it into ONNX model and do onnxruntime quantization based on pytorch quantization configuration. -### API usage - ```py trainer.export_to_onnx( save_path=None, @@ -58,4 +77,3 @@ Our executor backend provides highly optimized performance for INT8 `MatMul` nod ```py trainer.enable_executor = True ``` - diff --git a/docs/imgs/arch.png b/docs/imgs/arch.png new file mode 100644 index 0000000000000000000000000000000000000000..20b45a044d50d42524b1f8b9289701054fa67aae GIT binary patch literal 68114 zcmeEucTkgC_pTm~q97n3pdg^qLRX|nm)@j=9;yhDmVi=~5{@EWKp=pCfP^LzT7XbO zQ4o;O1f+yeq_+UlLWldJp7WjG{qN4)x&PcRGYpeV^6s_QTKm~+?e(l3uCJ?3P02!e z>eMM}O$`-;Q>Q5Lr%s*QIDZEC7Ub@W^;3M>r-Dgw2u=Ago)W7o25ZJ?S;q3$EprdPk?8g^EA*k zJZ1FPtE6TRolAMZCl?-TKs-*J68(7ccN%1GPCRw$SV&Vv(del)X*4P>`z|(jDb3JO zoWW!iA~lLFfRj27ZRQkRgzjn6e`l|HeQ(unz-pf2#$zS_iHWOgHut!XDdxjRlPZS5 zf6bB-(cRG$M)s1dUtf-f+~XkCqno4u5Ob)s5Ac?KcN0hWi|45Oc(>u6h;SIjo9kV^ zb{3kA;r&ZcB-U)^D;-MkoFcM0=n}Mtsoh9G!fQ4mJ$K(Q-3p7{IipJ!4_nKvMF|7o6y!Y=09>A`306a-k+;`v+*h z5>av;TG*YZI7Vr`$*qb!yoC~U=ES24O~4Y}&jA>e#V^th<;k?l8~>G7~OsO zX>a!r6e*7m3T6dBvLXVryYUJX>7WkL#;lw<8+c6E3g9w4dSH&E(!Zu>?imkAtOEI_ z2B2*x3PwJ~i=QNbB}T+{;eFjgHa`|X0ycMZ^sco!!*d}G5RJrTp;g}3WBhyRH zOVDESG)O>lr*lm8V3Rqe{zhT5z*R{PUHc9yxVn)Rpl^-?;yexr}zlz+))ROv+=(wrQ4mSoC`fS4G_qZhA7lp(AGiOGR?*o1oGA zzO$&4MR;K18uV+&C{zPuU!R@1pn_y0dSs!`2?~(DzJVSHv8ObiXIc8=5PF`e^Ci)4 zOwq2~y+J|Idy^tocgcinu~O#2-AUQ3gC&T0@6vaHKFIEP3yxi6Su#0BU_E#9;ST#C zFqM-Hs{o7?I#9VCgW1UVa4g^#I-`Q@S4=uBIldaJMeN#9cl$*C=74jf?p%FV_e0od z_WRd%oC&UT{O*dntRz09z}LS$H&FQoZF*oU_`a*X4c&M`jq>FHRk3W(AMtU*OD6Wq zNf)nBzh=MP(CUfRtQ+$%(QugQb`#BbD6!96WG%W~S3H6L+YF4*tzCO#rS#iNWvw@J z!`aPD%D(1~hOTqL^n$u+WddF{yE$;v$`foi6nXdIm99EgJ{R|!V;mHgG|40ydv|q* zY9Ggm6dh0YCm!`1XW3bykr#vwpUz}iP50~J?DtiQQ&(Qs%`UyhZO6k@6A^^Jmd-EG zQSl0HwRLiPVbi+!8+bOTQ@f553X-}4HX^H}f1B1^6%xSSLQ9|jhC@!K+eScS6N25> zPk*_rFc_M4&5Ru0+^5+dO{(sxVP+#eOL_U+{v9mvM$omlGb@?A!48M^QvSW9tV3}? zqAlyPMkc@0(`kDRbE{CxgoUt7-otwkp2RzuH}E<3~n z!XoEFc-qJlQ*qB6Zb2DX1QC*z9CRSwWVwD>vhl-mO%F>7{eWmJ+x9q%7HgDca!?r( z-fgZT)h4Xoomx1d6cdG6|KK{NrzRt0!?5=PEz>DU8nJ!$DbXq}OVDRWG>b!$=#3in}`!&4K|r-u($w>YKRNH(>;+B(A%4}Zr+6_J>utB!i8 zUB&FE^*1Tw50Wz=24Ri?>tXwBo=HS~lnw%Hqb?B>QJk<#G{Tdn~y=aChAO z0XejD(d%CFq5Yl5Y@WJ3VWJl#zkdp!rO@$y!bWNMc9%!4obknazjh46>Jz@-CLab& zey)g?Z(HN7uWF1)00?s9OGZgBul0L~cU&Xa%6gLD1}Iq&YwsAkEU+gTaf3Y!&9Q&*-<)*jt zB5Plh_q+7;w(9%GXIdn!6n$+Tl|6>?RZR&UATqzpBo?Konwvb~{twVh$IfQN9!(_3 zlh%1*!*=w}i4!@mX}O z{ukK-j%Bwl^*DXe9QOYHi2H1yP4_xt)k+R75@W^j4@#otH52o?$2_ zXK;pc((Nja>!DpS*W7TiGaoIA)rC0KAqx+QkyWA%OWERxr~_)0Pb%Z1HGzPq3JF>0 z27yV|oTs0Wq!MqutDL03vSlr;mBPX!;0Ff2`^6B+kNtnIMHc92P`7%G%G(+q0t0r_ zh*G>=-rxr<2Fc-B88}f|)uuC=R=R%2C-`IM;=CZkZYJEKxzqN}Ri1A{_gK07!v5D?0ZFDN_!E{TxJk9+y+pxhLDl5H{Z=N*lpiMH} z9h{3hvGl*)B}EMqaG!8S1G}U~ILsS>f(KUT9nvcTCGa&%tdgOf(=U}aBIkB)1B>0g z&}X$_U5;@zsbMd(v2Lecd%}CImCy6M_|oecki`!ZWCD0DXd1V}vMj43#VWDFOLViY zGwApFd%gg^-OhRV+IAF6d#X;GZ7yA~jE}J81@(C`Bj@BQaTx zj99y-FiQY?Z#ySi|6yZ+IJ<#^E*M-haNSr=lKX~Meq`g=YY7b74~8X8Yx~7rL`AB+ z&L&jZ?0%$>!^>C!nbS#XRfq+faPaI|Ca9-GVgeUzM9s~%rai#kN5PmfXqf)cEB2o9 zc5E(UM>b(}k|wjZ66ZpTr6~;mNU13M2F6SZDW*Ju=07Yl-~Sla#W1&Roo761_OZ%F zB7BCFkx{r{oxs$bpwEE@T)JgX8rGlF`XYU5fE4d%4j7dMyF6+y;JbqNXekY8%R|AA zlB|<%C{5tN-hPY0PJ8^+ZeMGFMTj z-%Co#_p3K^*t1}s;*@@uKcL*Y2P{NaOyz$l#;ofy-}t>{dm*hjoaOPeH|`)FIp_`b z8uEP;+^=f9@4cRQL&^5em$v8=I8s2QFLsf6aj+M?mo9tzuz44!SUc};JR(2Zlucv< zDI*Ur)$KT;Uolz}H7!%O&oizRk*y+>@%MFA32CXQr=MSZ)hP}c2{ot$(PEBvKJ!~P zLL|+CSNw=$XbXq44inD z((O!rkHTRWZ|sq1`1(1Ow}Cbz;Bb4=yGdOFwOEE~Rd&+B$76*@YS^!=>SL6jmGxW$ZRpE=Rv+(#*=y=?3Us#HiZt?&!|&4 z&s2j=iXOXDNUzs8Y$I#!$&J&+i1&}}_Jo6p&8D+#n=55Xd*Zh>-SSn|!p^qaYRQl; z-qguiT1zXa^m(@jEP&U-StPd`HxteLBpcs`VdlzXFzd3(>`DYX-e{9H2C?5*L}+D1 z8ma0jC^+`ccX|j9W%$~rXeM~Z)ILev55?}3?A=*}T&W75Q-jp)^>FJsQ3wc5;ty0K zEP?^w*p#65T3PCddehMusB?chakw6AT_0or)`@ANVd3KTxp3m-@~UOspo7JkpUx@A z*%1!%)7=NjD6q|i5)Bv?$T^KzUI2@J7_HMvnqKNO9mclhHOf8B9P zU~%JmbSXih{ZM&nS64aoSj}yeGH_)X2>BM-=G_$ahP6d@{Ih0G6r{W-yk}tX>9gd0 z+t2n%r25z7&f>_%zgRb2zunQ;U!@jZWv)Wl?spb!@4;Mt9g{e`l%j)nFI>kDH;L0c zwC$4uLm+EVFpvkp|fcr7>Skij8#S%QPGa&-YJVczOCP-?=9^G9Ai9mkMPE3rzd8Zh|ERLI(<9MgE zY7#BV67@^3NoC{~$Gw0vPvf+smg%rqS~l!qtX;2{jIzzlC$NdzMxD#sN|%EQrHOu@ z&gZKr$_<{zNSMsmGezQvKb|8>&2Pl(J&7b=%z_3jQ_}~|>?1ONjP?83VHr={ z`cRD^qoRT$J(E*`6w;GHl&NbBy6I9qx@%{2G$hKO^~Q`!FB(HO&#CO(yFq{Kt0NXERIGKxqKSi3jjK|4 zs(+J(2TupI@BX#++Q9QP*ts|UO&x-@GnC(RJLq6}cnaIwW`5&tt#8hUCEzTKOmhH2 zk*=x`Hfq8!dzUFuAl7Y7_K##F=7p9E3G9^92<|iHdbSR1NOv%9yHCQkTtL^l2i^CG5PpNxi4W53lQCUQ zBOu1!fh8>08(oSjjXhHX(rbqc6ppoyxs}f7tbUBSiBDHV$`wa~Tw+?og@oH`9(9^f zW|K^AE|dvVZWt0Gg0r+@wlp1N1UWUFTnghELB1*Ks;d)bW>T^yY{$mL zqWN_mCa+7QJKK*ULP;|*RY>p7`1XcZBiD!3MZ(iQjv%j*mJ7DKUb>G5b zle4}XPszbU_xogiWQAcIEoyyWRxj*IgR?Icgb`dZ8kVR*cRNTt@BKW$ln^cW_-xVEWAzvhB@b4-4*P; z{o?+}@^v|w#g4E!-j&aVae;1YwHadf&d0LkS&S|^=TGG+MAd z08?;15p>braAv!aqOBl}a5Q^F`7cGsfKq3L13(M>S;t~3`~!K9H@~GYzj$x-G>{L* ztKNURk8M_}gUaT1NusIh0#0G8u{Xg8vTY`NwvaDl=9962?81~o>xgW_X(sQ)5CJ(^ zJk#piof#b)&h2-dZtM$oKu}Z~*N`*Wq;o)bJkT}3lX!J?Bx`}r26cwF@50FS-SLOk z!kXSClEq^dWrtOzSuirDz5QB)HAmjIMNvQXC9%3Q3X9Ldje{VN%QCoD+FI9j$$^0$E{~9+wQ$E&l4idvkvW zBy;@Qe`0cg%T*V#>xS6S5Fs+vw_8TI8w^N z&Dmd)I}lv7x%4vt7)=q$R) zZgy@zw!D>13oNm~HrJosMk|iCMxoCZdI^;o^Sn;rnMTiNXsLG#3h0SThrBuGzAB=b z0~d*7^Q3n{v=7!p#H!zHjV9NK?S`TYdd03bbmkbodQ|EIL}`hmlLgR#LA&ZvtMT@W zbWjn)D&p73o-9~-_F0TuUnPsdRc0MS3>1leL1t%Us560SYsuPqxz!;%k)YFmEXc;xgHeJxp)LGl^ zovC~l<-!sb0`!4)u1nH%85IkhSF=<4j!gz0i1R9zf=uA7W6FfMLxYsy z)U`1)_!cYl&cGGH4E`U4JDg{P2%?uQ9prScy#%7ULPAWDmx;#Yf@p#9`O1MC=#wvo_2^wZohTv0{Knxq1HZf22Z?3k93_-T!nekmFnI<7$!LeCrH)xh$Tp6?jZjYP9fZXkCi=G+>mReiKu30so9waf1FBQExy^ zn2c!-zyzki{w0*`g21e@e zO1%~lq)M0eeqX9rU1a0s(zot#HLfM+ec^L9ORC|!J;^P0jJkQTsld`GX<%&3G7Q5! z5lt8!mC5OC#===l-Yh&Xyo8MhEyWk#zgJ+dK9<;d35r9xBrv`db_h<`)!DsUL%Cgc z-hC~^t9|8NO)ST-~hBaq1SU6K%?5YSMp+`ryVZ5Wa$KnFAnK7SmZI7=U#5#$^1_9jkr27yk)H%+Rvs8o%MT(i)J3Mv%qAQwSEAcmxU^*!@OO;Ay#!<57_ChQlKT zhutvJSVpN+*w32IC@~YWj6&P&aoL$0551a7BV9ASY^rm06NImszuUfH@Bf#-C2-<& zRSHbhrAdmJmiatbAXyoONwg1;EU+9~L#|hYmQr{rz~l!5@f@PC*EDiG?90DP&%+k) zSg5R!3BE*$CrIi5p`PHy2Cg5fjBaX#yEj5QVLp|4n?N|)oBO5ab2JLg+Ge`2wfG~{ zDW0fCm|5dht0(c4ZlFN~YKfq^*f`U1y5z(=brG!OnqAo#lH>YH>COIEi?F*uKvSRpT3h#<+4ug}Ok6^$Xu zWsy?OG_}O3K^yL}*9^F+K?c)F^pria%D2PoQ^PPJ>N3Ac2TFKDu67iR&$El`dnYT# z@co-Rg|tCbg#23{zI7W$RGL14*9zGO`rIE>ji1}2vde}MPo|UefvlqsFi=zl-=bgXO?}W@huFo zS%r2Qe7-KmcngN@z{`p?!{o337^Hw}_;4l*9qGrN*@To=Zu$0>IWEl@whWpmz64d? zienlTdnCH}XsEOA(Fix|*R<4eKxszM&7gC^8%|Tv;DKY9{N>p$j9jHzoE1e}96S=h z1u+I4j0;!)wKU{Z^6^dp-+g#brl+33#i)O@h#y<`jMbxTKIOnQu@DI3C^RdB>yq2D zGOoEKgV(+Y>xIC5{&+V03indSM_=j%oOgegGvNbMW?N@=4k8QIHe8eYaYxQPDlYEI zmQST~a0NaWmdu<*`o??l*0ID8=&vrh$y$DB+6$8Hl*`~a;)@ZY57B!|`kyt*(&oMA zXF3NGIPJPk-TKw`D3Fdk*pTqLK2KVJ_JgKxzOZ+k@J5M22|PBhvr2o&bE-M&mmPy~ z_JyM&cxTT91V19kaM+}Qpuvkm=X53b35XYH`{_xrj>LvG|Gi)d$wuMaNk9S~$yMF5 z`YEJiJCEeRcvUmlNvofWb(vE&gLXN6S@ui2^)IB{ST39-}^ zPfl@AO}1!QQzG z!e;Kt+2wVqyrd0>9fc%>^8Cn%HcfoFd0f=`gUqD+kHX_Ke>?(CQ`;YC2miEd#p_HM zu9~Py09AHP#$1`t=rJ&0z>}R!@{vLt=18xFZ_xRB(#;#;4!4%;S8fH~Q&i0;VPlQ) z28cAo*Rm32KF^NQkLPDI+>W%o2}^ZWYD!3}`9pEYDihxd6tZxu6h%XSR2jQZA`8F# zmTllXsaPJ*NXh#e&~YTGE8*tln)N{&CV_ZAqgKdL`JnWN%W#V? zG{S$iSRH`pl)IXC12$s)`^$a4@!RcIOK#SJ_N8`Hu{*Nt%MOy_4_2t3T&jtqe7ptUDBz~Qz3Q6&|sF-7*VyWLG)`pP1n0`ehA=4k=ES$;P{f6jDw#ziX?#| zF|ri7?2nD#dDprRkGA*2>bfUsI67f zW{G!^bT;4|ZN`j6U&sXOC=f0RShRhdOuj5@g@z$9DhXBFtQ&d9skJe7~F2z)6OQ(fl zT=#ly_aTo@qhNR3?+kfASZI_Z9UpnrS7#SKozsqVT^WnY;>5Rzt?pK|9vfXhj7<}( zkp?P5XWQ1d=eiW^Jk@n;G}GWzR|f<5xU5iT+L@Ji@EESbtf`**)s>YoomFL*eSxSOO-? zk%@O${K*~B2gzraC@i+7DtQm#9C2yF;r*M>f9Y`mEP)3H#|&)?`yBwWObupGCtG-w3~f!m7#E($wtF3eQhR*bB!n>{siy3-;4; zoDuYT^y#|_GA)~Vgrf{l3+Dpsy7^d5JdgX_bY(Aq(dIDL%^@aG9`?b4zJnJGrEmQKZSrSd1O#&jQQO-wELp%aBJNy?2`k z3eyK38#cjRfk`Dl1WXeH-#X8HIoO^p3#{53Q@B>qH4}KaH{5_vI6Xk)YZX`N%$o;P z2*2;1E>cCpIRi#>ssi(tgxiGBHfAR0v*Y2MYQIS;Nnr1yi^RS%_amV|7n^OrW32}3 zZcO$Qw#me4TOkS?m8g}cJ*NX`E(U+S?A0|lt0tO5;-g*~^X4MRq;EN(Nak0yoaC!7 zfw9zqoJn@hT6G7L4>K=pd zXeWEvVfv&QGES9qFOWE<2pTu*&ND1K28Xk07G&Mdc{tkeRL7x}SynEqoeoOWs8@}{ zeV_0Arj&O;(_X**;_q_QF{ofc=uJV{m*iKNmD?5=n=yOpcz-jZ(72O)IxV6%&MUeM zCLgCE()c;1HnA^~hqGvAQap{3b^Pi(c@&Iw!Z>EdXxHFwEmd3^4<-X%(6VjV`n#UE zD1@)kBGAOBZ$CGUd}~nQ|I#@1<&5g6`Vq~?US}O_0gM;EN3b|tI_(k9u9WwV;;jRx zaurCx{&mB0-2^D6rfIJ8w$rQOM9$!2wcUv{E_2v@ey<3VD!}8YpY;#ov{vd1iH;Mi zN7u*U+acFjepmEH)d?TvgvDWn#wq6qTYtQjEfj-D=0K&gL)xBoZ>}8L$azP1zt%g_-WnpS3i`UqB!np0qG_;hYBtN0!zzxK9P z{WX;Vijd!R%P|z%FDzs^oYn*=&4`HKGQZIs6U(KwckK!`vU{ZQjPGo!k}W-(Iew|g zpnUm(gZ+Ye1V*#tE*tdl@$U0tz{PG-zcmmXe{*|*X;i>ngn+$)cm3NkWuT=De*+En z{NOr&7OLQNySw253|BkSn;}zzE6CS8f%ZO*<10X32a=yF`?F-FKxe~0&LhsXx$F&+ zrR+>j57}IWz|&j&O5cBiN5gGi|GrzjzB*FqH`%qI3?!iFoKF?JDFrf?H>%2|+4aS=u+W%vZ_Yvg z9f{wS>Y4})StK)&A_M79#*12CQc(6mnK^2DM@ciACv=p_qTDEhv$Ko{STYvQ9bW;Q zJ-t8Du=N;|2l!i5@@sjX;wTuyLKdxs)XHc7PPU!rrx)Q zy%7RUk8ZO$8`5ty-YC?HP8Xxn>2A&|&CtorIC;MJaw=G&PGUu4?PZq=;oyayxB7bk zGF0|Z?NPCa1jhL^$KV|%D@|GyEc&-80c2&C1LweqY9D%e?KZh|QC8^9_faUce4=)s z^nC`Kg|?MYq$?I~X)OO-AN&^PsfJ$`VBk-8!`j?u@u=TlIp0qQP1Ug3oIgio-s`l9 z#>Bg-O|pmm<_q^*A)`3n8QA`Q{_Iy>dZLEAwwn&x@PQz$f>%c``r@uTGAcL!V5O8K zMyVd>lnxvhtd7!s;KWPB*3bLqnc2p8mJ>x`vy!^+x$#VYKN!e#+#<9R;HgUe_JvUy zh2gzxFJ{(c90*RQj!9`{q;G??%P~B1FyhXK>CPg-;UfF*cbRuM3;oigFa+FJ9H)^w z;D-S{J}dV{7Ux&L276w||JpAEhZjNkqvX0JzJ4s6(m1}td+Iq$c55oP;@2uMmt$G2 z&t;qH@sEfS^;#SU66FB~UJgZ1-reugCQ~;9k!QpA`2R?sRZo9!Fxp*PE3M4?FXi7L zx&*?prcB><5U>|&M<;AqMusIniw=Pl(v%m7&quJ@8 zS#vWj4B|shS{+Uj-F1nN<{JpJp@TfB{vbUD*fq z8*qd#f8?253mkVCf2+gR6iBsgA|ZP5w=4TZVm9LY!q-Q-vJ6wKFFNfh2K7$c4{TiY zUx{ZTN)&13?IxX_>7_7cCOFw&=<}U_W8(8z$A{IyeNu(+=_T{%XpI5*NP2yi_0~b& zV`8mmH+A@LFSbx4Y{p)lsTDj^~CQ;9;W8yq^Z{{?{W_g|?Y; zU!|;MZ)Um#bIPd@=6?iNFD0K_QbpDjy3`QwQU&0XRw-2o5|ou8Bj*XrafgSO_7|zN zwy|Qhm%>_qJI%l{3T7*^-4!DbiKOjL%YA#%nHc@*p?Z9qpxzJtyAc?5P4~q5)R~xU zr35pI(Yjm!$m`X9?io>yg|>A4hkc)a(5;h})c1AJ*IfK%wgC3(PZkPM6;k+0IW4=s zB9$rfxp8z=GW3vDx7S$m*qhGaTxrTv++8bE?mNVz!n3-*PYz;{!BJH7f@B_xry8vNO3#a=H1zrsxm(#j|N&N?`52kEqc}r$LHz?8OXYAxQ%)oh^N(lF< zYvS3JSv;8%rI(*>G(oi5R=;{tQrOk=7qg<747@Tewph8?!5Bg3Pt~0=1e{bZxJWi05a25F2P= z)DX>4=_lg6KBg%%7 z4 z1&2MFLDnU2I!vrK73nw;*J+Ff9)=R-$8KM}POwaXr`ytJKaaK^&a}C^i4Fn@A=fGj zv4WV=Az1y>7-PjFOXySIN0?JbF{^1O-zSoQw8m*w{ZBUm#8SxF-#~|UPm1-)w>0`@ z6N6)8mp_mo#2&U;0<-^56OGT+XQ|FWs!H!LH*SmDmY-K2=aISDSGsKSKe}`VlgrS9 zWZu2B|Iwaf_5L-K_4rnIxBcLT*69ce$I}@;4*%oH|LyQ6L-0{CXUH{*;%(*Kf&+a|=S6^Wm<^&U(N-Qw)GVJ=Xqp*7nDiF_?XX#Fzs%@&z z`eX0GHutGhAE^ZIoeC(~rw3Xg{~y1$*A$MJT~`8!wJ)4Hb&CZ>TBd`ZKK1c^7^YGk z3ABV}-NcWvgPBg9dKCfk^HYKRUl#A9;lY`I^=bZj>aAS?|5pl}h)ZeKsZN3rB!i9i z{TEk>cj18tQw`6pEiXZrHSQS#dq>2HE`L`mo&e=xyFU{CzjL?$_M-l`>-L{8>ip`>OfZ^(2m^?N)_o-A+Vl&BUOz6%a>w z)a$sC`Bq{bE$2xKIDs3VIA(esKSl+_N28*6Jj=XkY8luh$ylKF6?FBNG~Q7d#075r z*ri|JaenNjkSNUxg@3mr$9%3wgq)6O9~+r++25dCyD~YKlx|>)AJ}Z|Hn4ks$cbh4 z^8wBgf!Ko^@0{)x$p|Kgob+p^gG~}ynPs^f3aL=#Zo$A;k3ZuMlJ%Ub8;07P5XyHhw_q_{`j6wbrju6Upl(z!LGUW#rPGWm$Whn zzs51WAsJtZhUH4$+41C!^^D~5Bk`_7fM@e_NCu1AF^%TE1a_^|Pvt{zF6!qB;3rM% zNrehxhOUK_j5dhZC1o7qDM69VH=R4ONT%1~g0lJ+Xn&f2n35;bvR0R=(Z05%a2%z5 z!qIB|B>MD#{L7Ei{~d=heT)49@fh#0tS!-4x(7N zZB|^Q;NB5YZKqUwQc(8Bc=`&4N-aX6eY*jBX%|C57^M9(`=30Q>*Kimp2{Eo$^|GX(GXA?t)n)`a0)m8gwkM`?=}Of5`AOOPve$ z$?xF~6cwqfQ5QShIYZg&XO*2(02fg%-d`N*SphgJs6wI|#KO+r<7UoQq%2aRVOBc| zIFWxXsQFx2sQucc>cEbz@+ay{fUM+xN#U~=OVTHRJ}Q5D903Gr09fIQ^byIMkq@vd zYT5yq7R}p5KXU%KDq!ul<>-PW=Hw6^3bUT^Z+XGhlUMNIB|WhdF6t*GT?{X zQ2>J}Fim~Q;`^{ca0{%i16nXvJ7~b0(_3Zom1gZyjed(uywQ+1Px-E@H$nXJ0crXx zqW*H00QL|rVee7`?MDK#RUi?pN|ZKd3}oyBfWCscdn zN$-A_hy~~y?0g7muKX(A6R(^C6-en<>Zw84oL#EHz1$GBY}YX%$P}`}WlUSzUOS%1 zT@Y67)h*(Z`MN{*18g6-`~tYkLc;u$eVHE7XW695e{Eg_+m) z%^~Nua6C$(<)Pm?)wfHHorWruZ_DLO(+V47N10F1*#p@xuDU@a zf+Z^Zt=cGO4CX!y)!53geqII+tSzb%%(qOXq`rzuZs9_7Zp_=w5@IE%*RW zy^9YSMZ3H|;tNt^Or?GP{jrXS_gB8oDXmY~V4I{Mw6;n#mKh2sUj86`0&A7gn3|(D zrNRJ8z8`EeT9^z99)$I2p)AW0njZ=&_^x0{f(*g?15S8XP@Y&qZmRb{(*xpE?BM zo&A&tk63U+Tbpeu4Pg9(DH9(NyONTtB*vK3+pYH`8P2$9{d;y>0z);44Ghq%+h_^F zWS3xSkx8 zVM}{YNBgG=XN-5e&c)03@6;c+E+Uj z&Kx3a;&pSEk4@UK76oy=mEnHQBE41r5F9Ew|Evs!`NbSI1m7@KNy@PZmul96kn z9WKg(bT?hlLKvUBlx{K9h-IC8N{BR(Ly;%|uLov<-6OEDhH5s-t(x8&*dG91S0#@m@)t{>W zIB_we$Gkp`(J;=kX;GGz@!W3QA^M@A?YX1n3k%l)@!t5revmEwo;{S5WTk|S$G z1_O6CPY!|0t{)E2C}F288fI}OskfKz6IMkEjd^%esY*r;s7Q7fiv;p&`pNblob@}%xhztui0~8=VS`Z! z=m6YHz;UV~Vcl7fIIGb3xc@9-pb%IgVO@j1nZt(a-0q+nRSJn1>_>;XBv2BW_S?rW zT-~H{PYpncLqC<+1WG+_^Q24J+={j4bBeIpH~kV@?_rmoG#)-qcP{y|?hT^`KdCXU zQ0y|Qi6-eL&=^%MI3^VNp-%a_8(mc$TCQfPd@^Bhm^=!E?z0SDZ?Z!%@YoFF5mkz> zcY4H?_+cspWZ}nueC`#DHBKKe?)3}P?`ax~3%LFX7$O-{!R20#9@>X9531k!P+iyoz z);i;aKQOq^VM7!C7@f!=O0mk+8PA=)P9GSq6%3C&KwC2Np%KH=UK;~Vf+ z+oUJD2*aCsbIXUw?b8FhV^Pmc5WI*c_azG{dgnuCd@HwlV%rc_OIl0~Ia)QhDeNw( zU7JhUU}}=nKI>wmi6Au+8JrTmToe(8GY&H~J3YBm^J`xB|1G0eC5ZU;&`cw6*!a=$ z(4hmpeS>;nZiP=3?}9KsByu$a_UWK;k+YBTcAX~rhv>6>g(rPt^tmGI9_K6EY+?+j z9}Z(F84ssVp$u;BTLecS+)thG#m;>ibQrtwIC?yNN4z2H2YJx*lZaozRK3l@Szwb^MXIVxM`Cg$ z!>$GJKx}hj)C6jE)o`n4boHbU0ul7X-rUu1u!TxMx#s;}z)l5RoEF=KLf13e1=Dxd zn3%!2MmV>lF>&MC3J<82xWN+t6p5}$_nIR6-_@{c=BTg9eHm1WKcafHPk6+K?@|~Z zIb3k)wcCC2K7n(CL&$HdwVX`F>K$Ew{*0be#oHBSD<&u{;?Y5GJ`wDJDTWv5XNpVz zAjdqNTdFMY<_PTeDbcJyE_3)@5Ihm89)&)~B7k&?C~riYK~Pd)9Ziksx!$<_IpL!W z-S)1D>n_a84;Bn}oS-sLyj6b8HH-i9s$Q>Ybsp=|1JrfI!-$!hB>skw0SxaqI zbhCmbWFC9gDX$3-b{!-bmt@=Y9v;(6{p;MxN)77YQYL>2 zF8ChXs4gO{!zHiic-uuvAGCORaDg>HO{B`|WutA*HI2~`_9MORuTKWq&_Nyd9FpH| zu>)$J<^hv!;>Y2*xW{3E1n(ZV%qBOJk9j4)wR?bgn*5=$VxH@CYUHwIcw!K`JiF2h z6y@g>x~1XqAk`&t`+QOijEDCU?W4NFTcOGVg32QM&Z~7UbZMTV<>$qFR@yo|W46AP z`};4hH02xE5#t?|)THkxruEw9T%qdnd zACZk^$Fl7-xooWE??(z6W94G4U>2GblDGMIL!;FC2SF{lVEpIl0~7l{c4t8PRyz8IvBzPN&Ub2d-PsqL0LdiTQrV(-0!qTIH4L5?0pK|!*h zfaE5tB*{u{kest1O^_t1k!)%aklX}`k_5>#&?EuL(g+A5&@@3ZNRnWa;q8WV?mhR- zys4S0saI1|_dkm6{&rYluW#+&`YngFFkgGc>)&mN)4A!~@5mb6t{{DGjNtMchTl#E z+#Y(HWEstCLH<8GJ!5#VE$BGU$HD4zEGxzX;dZQa&H`QI*9^Boj*Yxu9~oC_^ri6H z_jJO^!4>MtRt=Ku$!dj}+$T`-|Wl;)vUUh-L!vKIx&=Urbi zn$LY4E1d;{y(O=gMWRRW3IZWZ*Lh+fOELCRZ>KWA+hpc+NHzc!?;Y^!qwBp6H`JVP zd(HcQSc3cI7h9mHbMsuzSCir*z|=qiYEL%yLiGF%_=ZdmU)kZ zd+HQG0ndc1H-w%y!at)tIRL_?Etj`ATjG))`8-%%gJr-GmhZ`XK zg#Am)EL2e3&wmg zv1z-3Ir;j7iTW=$7+|qtsMw?aow*O1Q)IEEnT0(L5@nt|B?L5}L@0`qCg}6A-G@pe zV~~erC1^#EHoV&s$KGp%$50}Ll`^x!J13uzZ-9l` zfnu91wYO#_6Js>AZq9DE-GVzoRY50e0$NVcckmJ)W4)t^?=6vI)W$~>ygE>dKi{(N zQ@4e1Io0l5uCOp`oeg|gZPQ~zSr<`f0l5O1AI5`7s=?Uiby<{v3Kb^;!%(~(}U zt1=5^A4_rb?|Vw!~1Yy-m@uBt7`ie^)Flt-Z}_?W%7O1v(z?- zxzAml9iLK3x07OH0Hv`43c(p=aXs{KIocyq@{KA zTHdzDvwT}i*<&A2{Px){&hvtQoS^!~w2e8fufSqO6@h4KOu;zhX|3@EFNe6fmWiO3 zS01ICg-?90+<~O4o6Cgft3P|Cvnxw~@8y&ml18+cHe21^hxL%3M@?{7uV|(8mFLn8 zp|8dQvGf;!Yazr!ckrqbZ@(Gttf{Si=_G5rX&cN7f86&IkwW8rG1=vjz|!uC_{zV= zqm!z(h2fc0Z_!+B3ms&3=9sTb)8b}%vSZs+GIWQPWWc&vER8ffF1rcr9@7qE{ zKe+q~S4L87+bj34tBwWh=Cq@^)?W^w65>{FE`eiBf5D1t^Jd`4BJ^N*ciVxy9UNp^ zG%%5BCw4l!z}}k&^Etw7HC#{+0r3Qh1^>AxJ90gtplC|WBG1To|73=?S#5^F&Ay&P z$Y|*asox=6r7NAY+Jp73{)9LM%t}?G!YJ3(<|i8ZC|mjZft*iwhE8a*h@Oen#hhIh zixAYJa7>HRN|OGx_UOm**sD~89Wq9Kf4r+I6&0D7-~r{@_vjd zpCXhpQGu6eKIU3I*v1#gfeio}YU3qyW!$7+qbO*71_srTUS$H*nR?I2YZ6U!{FLY`e+E zR@aa;cpw=WcoS;}H6ZA*+DKlmEyAYMe?cei;^mxJvfna^I6PiJDdP9|2TeJ1&*y=W zHx{q#@K`3zbad7h&iv-J$Y3RyRxE^<-ZjKa=E?aC))16HLG2HEbnKfv*C>-Fxet<0 zS75lYO%_*;#t1scu1%^@I!G6Fz$LAm54>9eWBPLr`-jlyszHTZX)zJSk0kGHNTvi(^>5z4C~7%Xa8)IgvvTiel77dUP{&_lmPg5F#fMFEWuj%! zj4I+sD;EF)y8YBmjlv|t78<^KxaXFILG}YCRm;=7b7CRM*%BAi(EM_{-@RdB6n*-0 z6do}H&I-of12b9~KhC%}1bdb_hmW>2^6zKs_b^2vW;yiAx5n;zYs1yeY0-fru2d}g4KEMz&W7JG2VjSh>x;u6be_KsfxE5Ao#!a|$h zwLRKF?OHZ$6<_P94YSMC5RvD)U+&mloAekjt9rGDOJ9@Kyyc zu|AVd?yygzhW&QXF_{+4mkr5TqXLT1HXsu6B4Idx0(tpB*CpEiDdf%D9nKzC2uR2` zdqvB&+7`;6o&$1oMtSKAe-z!oEnP$wKk|29OWRZM#iFA~DB`?4<_i2BVwkyp>PVb( z_PMBye{KGwxtk_`dOzcN(SCs2uoYi`f^nxOn!i6(>kS}fwHnb%+wX!iz<%><@p@9i zyfnUraAT=u*QTZsd_DY1Cpc>y)lS4~FNAP@Ihm-K^IF){?g zAK4K>$%dZcZNljtqE#RL!_!}qS<<*p#z*J!)Fi=~c$Pa;$Xv**xbEC@>XTc|2iNUN zf|%uz(wv5kI~XQGnX)6D<0j~Z&ba$_ybWA)6 zUF7UW-aD_ z+3!@v3+krmnp*P+lOALW&QUBjGo{kTo9?|q#aeDMV~_WC`-ZfF)sMc%K6G22*GL%Z z4lcK5yB#v)xy4@eK*slH-0<~@barh#k*oSBlumoF*{nT@D}!3uxC>P_D{aD!#q$)e zaAGt9Eqhy2f81oEhQS=Tr8z=SRejFLHey7{%dIZ$7hyC?cahScOsTy4<&ow=J!wX| z4j5^`VY_p{Ur2cwt8xd7qvv^LW}rC=89E;VrOU+^5Z4CYo`~+F9Y$ zMu+;1Z%qdNC;~Ze7L4i?uQ7NPnsvpr`)Knx`DOGCfs!w5NDh`_A|aqBYs?`2SNd|u z)t^RtE>9tnPu-DmU#)R$H!!QyMIokuM{F+RR+Q7cjx}<#Vm&-;v z1wo9-gwx2`CttDx0%d;YE)}}d{zT>U${bbiE_Uorym1P;prH&Y{@|yUe#n;4t;yHJ zY;P7J8Dv{}GukysuS8P;`-hV<)^QqYs(=kZ<*yI6ntOs9_f{Ca#-oI-pCk-IV!G+% zP3<`tDQgWm^A?C{+5i#LrhP&jY_FtHeDStnUTolph24C&Z#y)+!ic^(0hD0{3JOzj zoF`jrlqj}J8Zx4XAiN)X(H{K_^$1?Mv6#TH58A%N_?ZpJ(zqDY=Z8!y8^4)FZMdWI z_477(kpfpgz;;!FVA)+!DEC8?;3%{g3>S$C{P9G`1iq4iUHd^>Z4(1sVx)j6Yth0qw?M)1yeiiDFmp2ACOIRVU z7wH4Ugm1NL#0&|??_h@k2eC1n1p}Yqj)WWE>=&`8$RF)bK-?86;X`=cGVz?bpgpWk z&KpNz+GHan{aBcnd#}xDL}aWA2&s_n2;RWe9|$h?|5z zkRW2UMN=6xh+i|ik-wo|jVn`f;lZ;WouI;;^7}n|0Cd5Y*r;0}n7-?cA7AVY zVA6(@yQ=>ww)fng87B74eDnrR?aBAV8peH;?cuzY!4rNAXK;$Y6z1r$dK>ie4;HXD z1y1#`G)*M|8Js3Io-os)ivX4}w9#KYHiY-_!8BG{Mlxus@cS8yUo!~RZbPBETiCy8 z*c}Q5tWAOO<-babdu0)CgUg!j&yPMNTk)FAJ+DwrP#sUQ&+|c!1q$5`&e5{^Ek zR>0o+vJGzy5|?JyhAV&cS3U2k=1`lqG*)5zZ7bQIUvOn4z_}{!Z53yaJrj(>mX<2T zw)2T!c&=tL?G3a~2*L72y<3;zz*wmuCy)Cc`1TxJE|@fc~m8oMCtgO z6}_J0e<3iNy>4rwjJ3`ej#1`+Zt`n02PqmV+yyAP$$7>=Dyiyj9W8!CKjW7#^00*2 zc%13i^f^tRiY}BjZjpq75Z=XeRB6pZ?-=iUQg}E{cd4b*ASaYZrp-5`%@Dj&Aju4_ z&|G4_dl*0Pahy62#;U)NtKCmx(hn(siXL4pc;CE}YDh;N+emD1Atwj2Z;798U4Bu0 zx-k^61czi#xff5`v!n~p^ii^?g*u061^O(K;*1sE@OnW}&C8WJy@L9I=~|D~9-ahh z(p~$-0g9*440Qt;9J*52lsGF?%hR#QG+>k6iSfX<$CN6!A_6tR0u_xYqEfFixY>9p zpuJwj{u$mjH@wYW;MGrbmkH1zggr@?(WBEY0>tR;FLKUyM@6dr#ftW8+deXw1bUl#YOP~A@-pSS>E5$ zf|Ht}Gh6G}3sO{h1Ye^hay!S4+9%0QpTQnxSCQ8#cU7QAhg5_F>nG*pN zECHkfjTgqzIw(SvmV9bQ>yA*Nw%VfOIi5=FlGKeZN&;AHnbvsQ{i8@6iV`S|{nCJ{!^u*^M_I3U8(q32_XQj=>aUF56hted}90<;STSeiTm_ zWqnY=5ESstJ}eVFIDdAu=`7OQYx%JOOgd!Q>SRYiX5e-%7S|{U(l?+r z%e5zta`{|=TMa-?KV9v-j1BtvT`}W*hs-OxtW>RiLZ*8aZby#toto}8QdJt~`@c)j z(w4=mrGH(UD|eWpF>1~%skNX5H ze`AL}4hR&a@-6@aR#kCCH?+p>#t1`sN+M8zlpb`A0rP?FV(7uB=Dj|SnP&k5i%0x# z6;zeyvGRVq`;YtDwG})LCCLYC?E>N9|lO5Rb7ts=gd7{7A)2 z2+s>aO`Jl7Vb>f}3dnGRCG#0^;H0c`AlN=MaaIg7{(Xgc1tmk>FqIQ%QP_;HwQ*Xx z9Xnn;7G#$$2#w9|U!$NVxQoZ|#qt##>xqW2jwjtzz!n_f>yE$hxh&!%@!j8VxJ0eM zjK3x>8fGN&iLWPik|7lNN-il@QPP}vo5B8hS?W2QOXvwkCCsLem;KI%`Jxb)u{)S4 ziq#|g-bT~3wuvWQ_`RW`oU6P2mc{j7M{?=}9(L9dE#CU3%H47NXGUp42?@eGzG0Cs zQ%PEpca-)-XtXsP0<>bE;V7B?9Fg~GWi-!if}1fZhOaWaT28%AdD_H&(Ft5*Kg*ml z#q^9V>hZA2JDlTyTKO`&#M3vrEp8l?E3%VrS@g%1IvE8Xq zSi9a^a)cK?9%RJp|9;fcPWhAH4S=W~U}E{ddJU0WbU@8&p|fa8qF}%)>k!~Io62?m zZ5_9SG94pnDzeqm1)SD#m zynjX4^XOtKl*%sR;?OCqjeN2wgo%KidK!r@HL{VdtzPI=MO$pKf_Xth+#Tq7Z>sT-7TGK;nN9 zF&*-O$PJha`d`FJ|7{`l-)33=zwSkL^v71~YM03}9d#TxcOPo{asNR?q$H252Ut-eqO0KQ4F4KMMD##U z0gGe9oFRIuO#yR_gpmF(X1KZ%1iJiHJPi4Bht}l=5z$vN=%2Zd&;CL56t2R02XXBJ z_)wm*iF@=e5z#xwm{E84=N4Np^~q=F5K)5sk2f z{Gk5tqyBI26|(-?0yB`#gk6X=sAb5#0t)Hwa>PUMS72_KK;)q-5z*5%fRF~L5F(;H za72Pm0YDbuX6@&Re0g4@@=oNN4^F86e(Aqk@Lw%BHH4?GJHIz0e{_ZRZY;!}EmlA( zf*Xe$EVt$oXjY-m6P*p$)grDKwZ%&9%{+Akt>i!pqHQ+rEQ`TW&{YI({;YC41OH~Q zbo*JBlZB4gu4xvtwD^O^ppP5~3J+Bj^I)by*v48f*^I z8)Y^D(FIJ_&;8Sn6Dtkdk1o829Mkqz+H08Yuz1>#xg4xXb`=l|f!;%IvaYW#U*rPi zanGkA``w-#Yui}3`Q+W-5$r=h!b$hYe98olpEzaDb^bAZ>JL`AUS7_0lB}a!$Ob%E zIk9KkK?w<1fiANv;Z&r{*WaUCN?nwpnqp{=cdswSlDkVaXu!9B zt|($Xx9NJue5v18RF)3}&d(Hvxek%;Ra$6CF1ORj)KbLAe|-GM)Aql&qGNkQirk8& z_1eXIwmv9R9}4M>=_5-wqUf!%I3wXCBNmzwfBUq$uAqm5O!n8c*L&)?-UqhrE7`Fq z;zV7}2PV>y33v58T-blUutOusl@}54iERD3|8sZc5xy==Rj~7tIAAcU9qP9O@aw$<8$lb;e{8Qwj;A% zTsGni-(2ACe%@DXa?4qPLdjvDy(cmj61|+3sDfA5z@ZV5(yKR1uG>ea2Afw^wNKQd zo0O2<@Lo>JwXE5hpRLOLmesDQX1=9f*_KC;ZyF7(HO-wVQ!iA+#s72s(XLpFhw?F^ zbMv)vv})Gs`^v_HnYwRLm9%cQz&}LxU>+vqb#9XlUha!cgVFtmsArk<2@%$pW3^Pt zqqhYc9^8HSmp*@f;RmI1B#Q36)g#;TU1`jsd-2|jzJvoWo&8T4%s0t+Z#j&TUC!_d zhd9oA#VCy=+@W8K{2F@k-Q2(!(=j9psa2GX?^` z%!PcNaK@yyj4FGj&TP_yZ`C4qzbbRhMTqQH9+|Q_l&yqZsyk|xpr8MBms_ms$M^N= z0MXA4N1xlqf@(AGKL`lP*|%9_Sy6MFZ>|q=?`oQobV@_XPT9~aD_1rvm=;BiUdWMO zPCNUR{4eu4&>gjwrzWjvC&3SRlkW)=?;Nn!DThZ0@8!+5QgArgDgzMud4{*cW-=1D zxZ}>gSxQ(I?c213OG{{B<_p)}Gc(=a=ov=XX6}=(FLSO1XIWAk%+!W7U z;&uCHYbcfFXg!5$@US%TJLbQnwKHGacAk)!F8d_?`&)IU-);x*E_a|ma{qiU0kUX*Mf8dp%x=!mL`j90`+fh87`Yu zZ)+AmF7?6mISh*{Ojl1YPyA1dooQ;2Y8T48?Vi`}yr8kLZgUY6p9Y#lzAypYHuTP}Tzdy*Yuc+HHe$OsheQ&h1 z!DDJHwmik*T$XMGISY-O;NoraDdkI3*D^d`k9iR7fg&b?LyMFePUxs^KhF!HqY|cR z6>kzF{SaDJrSqJL9~Lg>dw-IvQ-SJ3nKK0@a83uN`%Qik9i8tpSNsDJ59^iU!iv20 zyUqxSOTmUsH^$eQ&FPgauUbCDHub823brh+TsAH%vBpdtipXhBmLjw>YOYxkEAj9B z{u~;n%aDC=_kW9G)GiPm^TWpID}pv)IvlPvr&3LTkurEj-k}-P{x6kjbN= zXX1pTvvr&8d!_>&AmX{bpAKw!71V>Jl5QV$IKkl~K9 zr~l`r2ZhqD6?-p3|G}v(o*4a8*2Vi4yp6DzHK4#H0nGq zJ|O1pjw3L>mzsVP3_-DGQnDkwb^AkisAK4c^_?vVg7z$JMwVN+CG^uPiOH>B@tY^w z0u4j|vatP%N#@0d{Od5%shP@4=~3-`xF`KwD(9oOtZ+^Jf>tg22KNL~&4CVAR3EhiG`gsaY9A*?Wt%plQykav|8cW(1p&I4R|{XYfU z95C{$*Td4w2l+iOHj?E#n(;b&Vhfe&)O}j}>>e~}%l)a5>9anFitW{%WQ#~t-^B54 zHD+!sz$hlutschmG8UGao<7}A8@_h^Y2cN=&hp;1`ijYd;z*S5%fg`!KpE}f_AZ(P zCvZt9#L@Cj)Hg+BoQdlq8wXAl`h5^lr;QvF|A)w1>gFMiu8BDhRRI3z$Bm}zX^CHD zgLwuWpnYi4eK4sZvI<|Zl@+r1Cn6Z7TP|f=(J{`+AM>9Othc|3qz+aOfGOrTd(}wL zD^4l@_8K#N^$DW>`nNEvQoFDh1nlB|J*lRN`Zo(x`S2 zi@XfWP9MO*wGw@nf3>pG+RvQ@wSryyDnSJS^LL*u`$gf&+9$TGk(c$|s@4>-P&go2E{>QG8uTp6nq#dBmZ}_FZM)_|23hIO`x|N6c9Pue+Jenhe z;A)LYf@ANL)qB4aUI}nKP)GEU)Y;V^QXpXc0Lj#XT z1=txa0yBNgQ_OY0jaLR?)Ajl;`*7EId^t$c4fER^|@y;728GYVr_O-==XHY{o5B2 z?0Yo%wntHGX=ZLS{upDyAw`qeEQM?=x{D;?^1_+FLb?qbOQr)KCzX5@#P7;wo3$b{ zmTe!$F1@*D<5F5l_y(b)_keg`%VJ-p6I7i(LSKWC zYZl*emzgb0Skk%EW@JB>_z^@cAklgfik>(p#MK`KGq=`ap4vor1JM^pwWof$0wa-j zp|1A-Gs#9erG;wK=B6WQ@)EsTCu5ssj2JoceLXmVp!lt!ZN%c@dOQiRDQ>;)DT2w; zDEH*1{B}3@RPjb8R*b;k%KpPLVgr0unYmK6HpK5eLZA7td|1@Z9-AE^K4JI>&3fOR z_F2i^EWMUw|@)@PW$bS1yvW;O-0BIdp{nislf(^UrVorUpZb2Z7X#ym6udt&{pSp< zbFVOp)&3ujTR6)iyM8>?^BSG6j!kTb9HS^8QVXY+>|NjI1Co_+tK`z_^A zEl@sB_2pOdED13%SH5iB0GOfa9H_|T}av~c$W-%7pg5hwjm(0lGywvOf)qn;?t zk;1SOzDo!?a$eB9Px-A{yF7LwdA{6>?5^zJ!7}kSCFjO)l>TCtj1doi45+_YVVaF= zlh9)oe#8_Vs2UN|@Pez*I^<@wPXsZS!LSGYcN$JaLtiK3bx3SZ{dUI$I~$`ic0n~X zz28A5-w89wnmz4$J$vNkD#I995WjL|s2s#N(8b@=dg(#=>{sxBXeN%m$*uKgI__pcZsUwWv6r^w zkiUsj;9tWsg5%V&CawCC|K4)LK@MqE&#T-cXzV`&94>E`#TorQD$`?eeY*2o%9TRS zya>W}Kl&zbFPpW?&ag0VIM{WWzWdis>m0LpL0$pU8P}>DBG@vsbJt>h>Z91vd&8_k zzg-)F45vj+9+=}~fvDiTUAx3qOvwJaCE^TE>h=Y;>tc=h8Z5w7jI@LzM{M3+y`8X_XS(?ErYNDGuQ zftNXu!=X8`Pe6vG|KBhDcMJZj1^*8WVIq(~BDN!{M(y_+bq7JnUh|YGlm$b={&yss zd+tDIIvP0}O(&fj5%odkXAB4gW?wZb*(ozEo5&p-0QWaYzY?(mod``fMsDP1CnW@Uqbe8 zpFBMm+_|r8Pso={1nmdT9I_lUIWjb3dLkz>*Eb=K!N)_c*mcV{D8Vm5-hrBT3A?u< z{07Caai8)x2%_WmY0hL^(fOdD5sPSJX3%R}lLc@aZg1KZClOL-_h3WG%^-)s-oJhq zhI0FQ>3u1EG!M?e7@2y0Tb`YnNu~XGGSkU-%Rb-00u=s^?Ka+S48B3QV1Sz_Vs{Qz zsl4M!a0UoSa=Uf{O?e^D{ulG?_VRep;^gSSGI2upbg6*Nn&aN?g+=mxDTh&047$iH zzJif+@0Jm-V^%4Hfmj*k8N^2&nSae)hjE9~XUWK^S-Ti5va*A5fcamp%3XCA)x_R8PD-9@ez=nV0#5@`( zmwTt}8Xl@s;jABu(iXn2G>`}QSjexy)b>ls&>*|v1$w#J z+TdbeK>Y6f`_bRnF(6{6EAFRx8TMS~SJ^;i@X8huBESGd>!tOQ|M(&+CSE>gD>3Md z;5nRu012(Tm6^WO)7Ed13fQS%E=R*vNaS6bk|A^W&B_b&*2 z%?2vi^e*$4HRy>drUFEenpd%?Q~Dbe_76E2_1&DP&s7gmHvoOL1(RmLU*F1he z+99YX4WfL4YO!Nz8Dg17?3y%VrnHZB%~8zi!qzujR|-RwDtoZG%} z>PV6yyfp$5p-F!ZWhdA|yOY#{Tk@`wtOn>_`;?zuq0z%oM6Dv8R?mE7{~x;eeX!Cv z_Q7@O00gX?gM>**@!DhYPf}0AA*tnkSy zQ){mmQF}xi2VVrMpU$(FS0avLP)l~}H@}uJ2Mc@_@%B>C&Ex5RSXsNV+nl7SRLY@% zP)c)qHz( z`1GtFyxM($1OCZwqzGq`tdoQz^85N3Mxkqg-bbLjAR0mP`WX}fHa#Q#TIpM7#y<9cx+!&5H4{E%DQ3>b$>5oe39<8cB=;r{ zf=`{;e_T-FUEUp&6Uks`>4=QLts03o?I$lMw?9D7A==z!W$ZCA5kUW+ivlTf>P)_; z1;9@0%Nl3=l@2X(=oDC9Pnb0yN}t+#zItf++1YDM?fe{&oH|@qY46 z0J{TL2KM?CuFEgM&?WL-(6aWBIhvXaD6E%$>gnF=^v@v|~6h z@TU;J_TIIE53J}OZ(PThMhyw7*yd;S`Pb?-f&I2zm*3R!?+Cy#B= ztPRyfXNA9a;;iaM@{d1`XrsfHIbC0ira{|C!vR@l+PW&TTh4J_8=nwZdIMJUgy`>y zxaOqCE^uPT4>47{rJe+q2q9H z^nlAC>{x5DvdQLYJ=yVY)g`L-GKRGYH9yhN?gtM$kXvJKLn6?gcanx|<~N21-q+Ny zxpZ6A#3yflTVxMfHsJcz>vcwSaf1~8q<*~d4B$Qv2Z5zb75q93_NaUARoIF9htDpN z9`Be-45Pm;OcpKL)qRg-RLzh%@;Wk3#L5n+GQiIyRU`+^9rCHu4>47JjH%2H3jDL+ zF!FNlJm@=4ClHClF$9d+jCF|YX3V|lHn-vQnDss9`D(pzlNaSvFQ*7U7oW-VQ8@B0 zGCV+rK;>yJ5rNux&j-aXp2~6KH>Ixs-DonjbV&JkOiZUs0kf&7eUOys$bS75)xH3}$aA&x=j zOY>qu)g@GB%BjajL%rU%g&qO0^P&gi>1Vo%hy7nAE2nRlx+@XC_MUiR{Je;?bO=RO zEgiC~IsvQ*9E^qy78_j_$nNG~)CCmoXUAYRa<1$SW|xTv~gxk>KyBR@? zZS{@OnNF|WSf1Nk>pI$$g(_0pPBD%)t^~1QeoCgg-R1#|C5D(B2)Jm1wG<<6BK0&p z1(G^!T|SP{JH9Dzj@Y$tzFb0;)*dPF7XEq;Y2Li0xE>V6GVqeZ`KebRu5O3EmfNMO zF!&0BVv{!jB%eV7*L>HvQ&1qyo*0+b66`;+Axj%LxVm2yxGdu7;LYp!uv9jYE4&NJ zo~~5_h%fVlI-2R^qb)v8a4C_2V`0CZ_;}e@RyzYi1p>i<~~LUME0o57W0%3 zekiJ=uea`kzFP4q^orNXIIcP>!@(c{uKHnk@nNlUAMUOpHPudH9U6)oEM6?5Cls=1pp6Ai?kj=a?Kpef-Y!Rd|mU#;vWEn}6vbdw%eq z?X^@n=PQ;YV?K=LOp6T`sUI4h?{A3PujTO64}eSC)HzIzhIbNQeZRPU0ZsoBM|kW4 zNw(l(kmwN$b|oA%db%m3%*HDaextdEbYIv85?G|1S~A+x6(hpm;#`Fs znWFpPnX?99x8N>~*K%$8eH0mbg_LR{sIgMZ9jfA3_*giRD}a)6kibBAsP zPi9umrH+W@jcx^ev$?WXv0R34`p-<<+zna;;#&eFEVuzZPI6p zEJf+0R1Yai?goIS+nYNzDMvs#9i?CO_|@xdO2tNg9j62;diffxiqA)`AE9a1--O=$ zML}G8IB4U=ni{!*GF8_ZxUQ5P1M*Jx6Hs(?Lo!2pGc@Wg#>PMGTyVxqO*V(bkfHgz(M9Z^hVg-p zx6w#{*P+P=Hw7!Q4~z@hA9RQN9GSd~pLvW+rMvxrsaI~rc1zF2A~=D|)gs3OlA3+p zndvTh{1=JzXL_yE*8$xD-tg1PkEa>Zt2SQ*U*Jahx9o5YL>{r?y%hf8*hiVR7BiAp@A>s!Nl-#4WeMjJ| z71ZOH(cUbHPIhVyxs|y$dZ6~rj7hww-K<~yr8PXpE$2-N{feDKdRM1MXIfb2r%&+6V-*?ecUe-l$XCX~_P zUR_Tg{QgDRXvo!|^BF&UXLLaqakQ;$_Mq{-x_!seW7JEuO@wVDM~%SIja`RPx)X0l zrIzw|QUOYg>q(YxOEJKk#?CcIn|*J3U}3MiTW08HgIw82MC_)9qtNvkS73*e=5g(J zw|R+aJFnX`I*nwwuj)w*$-(j-b5VG6hes-o}H?PWXtIZ;v@{i9qP{?MW$p~L!zUNke`Tlrn~99@6!xoFlM9(4$GR0}AxoQKTV?rjn>d3^-!ZFfBhPaIoc4gp}khaiKeD5E%(^W^;M9V*Izbkkm9jz)5Bo|+iw10 zlavErN(xUl-?hA(A;wl*>rIe4>5{20{rOM46KvP0VCIQIE}#srU7ylxiYdd)`XOc& z@tOT#IOCl;Xu%FhfRB(9t+lA1WAt52IZd=EZ(7(`pcPL2PI~5&j?`K2l+x50gAY8V zRLPpsN75CK3KV(1jWhTcX{F4o|I5>7C;?A@ATz?(1Fr*7N;Bv>dnhm7Z*oVRA}0sI zm~%7atuT$h0A}^eQe2l%$cqP_Uu?)`W8Z+Py$y`k`XofMuMxpZR=fhp2B-5w0mYLd z9bmuJbNm_~POsL}_54wx{~ZGB@5{{qvxKRrRZ4_&(8UOgc#bj+Hqg4Pk5z;~6VR0# z_;>uYCznTvY@ufpBboh9rqmCn)RtZI*!`KGx3@sYZkHwy^fmca73&$k0nU3at`>sk zOzX@aE)T~4V{T>4Fz;eRdjTbE=iW1`f+trqoy-7CZBlez^n3RC|X8vhYcj zhDSj+H6@%X)J&1jr#~AdwmRiHfSzIo`J!{bn`e!?9pK_W)wB&HjGfDrE$DghSx#`I zzWq2fKi;;g(X>X0FeM=jW;nxR1yVq)YJ^*TZh0Qh;88X}i;QljbCq@mxB85eTRw_@?p!i8vhiVp>aWmChMYYKGTkhD>^#YR_;?ze25^?W>iic?Qj5BG zYC<9&BJT1y-N6bF7gpo0Swx`fHYFOQA_BiYpa(_`XQ%Myi2(I+q1_Na&w9B{4P<4L zs6}fsXj`mLnE^CUN;&_zHCb{_aO{S#5fiLRDeLpBud_sYE)5pE2Q#S7U!C>0?vSW zV9wwmw(1C{W`EJH?l$MnHNPUf0tGT=cJc;ph>IdAemzzk*~?zcN0={(n8xVrf-j@6 zCcbA46sfxb;m%RmvD7s(z~-?t+Hfuw#EAi0d4CAy`t6SV{$l06?L)Nt(;88yx{LGs z)dFoS=CEN6ix+bG*c9VUAv`~d+ms?4Zvg4peE+x5eNv*5#9!DH^_*;Lb#?wnALRuz ziJFj03}P^4?66y|L&rAg@Oy*6n@#FH>`9i9*aJPE+^7%v_pTCfBdj%!TL%}%w4(<;n_{8N`E|!e^ktoRz zeBhMfNzuu3>r{uAt{l=Y*`Q6hX0f%Zh8H%tYX$mKIMo+#^~#f=oyja{OG+D!O+(Mt zzNIVP0$u9Va>W3m1tEC7fEM_K4K-oL!GdBP~Yp)88vchZPMOJ z@Q&T(_sizq(*juvU`={H-^TB&_}bo5jjbD^yOHkjTgYbtQO#(!S-9Iqif?4m5oV2ZFv{Ek}M!R|^ud zmYcu}O0Q>1o_<8_a&j+=o`Sw_g@`g8nL+*~8UXz}_Jt-pCp!c$_|c4wsqb?GBa71% ze3qTs?NpWQsv4f5$|wJmMXUuT`ue{82kz_gBhHdv0f0|-x@L3ENi|xq#bt$KCS0RWZ{!ccCq%YCD@$V^I zoqspwUP%w_{EQ`$@C(0vy!J00TLBT*QncGu1kahI!F z<6bC$D_^Hx z6JkkgG0dvDAb^5Ue)6(nz^v+A; zxbq8^vMqn)-1`in<5WtLsx2B_3SpuaGNd2Y4foKe`iNPYetAno zr7tO)W3S5;|Ii8-)F^ThP*0tlQ#k&6?s5|FlvyYH*>Cx-8=L&m^0PwGW6pkMV&Hwa z+>Csd(>zUnR95`2Map9KvZ`BD5YB>gy%Kqz=OgbSf9uCvajF#|-X~TTj%D#xL4uia zY$Q-eUo*uzj^N{%aSy-6t4rJvwGw!1J$+cS)AO953~44c9;Xv&RfyNOug?O^Q-o6k zAL6{^0bDJrs=XXyzE}r*S_639mOMJt47s|;-LAsT=FkgsEdQ;}%;-cCUh5jaX1mwa zBYR|gp}_ImeAeLPM>zJ&`_`S2FLOd49 zoPbRUs5;TR)N!Z~d&c(s9Z~H|TV+6Hi}Gc4$xQc)T=y0p+*MP+R;UIwyUIEk?0IL{ zu5&reqjheMUXYbBVn`^Pk|)-FILuEy&oujxUGg)_P-Kit?-S8u|A4uc{B&mt;|~?i z(VBf;ZmwOoKJXDc2(Q8R0JkUaNBfZtFP(l z`!q(F?EHK}Yf4Evw0SpC{GO(AX_7vRIr9_0U?d|-D&1Zww`GihVxDE9=2Nm@*X?7c zS2+G>myK3ntEcGD#+($v+4+`lL4Q9Il(GMWuEEDoA!Lm(Ny}G}XO^oP7PDUb0Rr$o zYHhslNi=jw+nKP~I2(=Oz__6QPd-j7z43l64nuy(;kkPHbN>JMIn!ZIhTEqB%kcdF z!QOjEHNA9yqk4SAbCe<~ARr(h5D3KvNLPB1s(=uR0+AAmbm=P6r3eWq2q?WIlmH1G z6=|VL4nq>R~6p9+eZvSO9$Z)8^KG%E69l&=cmp=72g1cBny&Oa!uUf1exLxJ1Ag>NoV_zY2o|IJm4ta@ptvFP z3FMSC#jE3d?C8`{UB<^3fmDxC{m~Wp&)AD!S?AvOqh*eCT9|+J`ENe@|HBFQyOh*( z55brJ2c-l4U;E#0PaKQ|e4X6Su0aAC(mDU$YWA|g|L8~ie=Dsv3moU)kpJXI9A78y z#TGyGjWs2`5FLw~8+wRlPPH0f+~lo#d0DWEUa$Y;T*(fPW&7r(DjYMSk&jr`e(#!W zOM%c2&4bOR0L%MQy8YfqNbBsI{l`;3Hq@SG+K~q_M-2%RgCGtz@Wg&l`5@Cb3v*EO zs$>&;fmN5$`p5e^%2y_Hr$7cVqj|g~h+?8<-~9PigdgZX+3~H_#U2du;n)G?E9YVn zMP!4KuTfMaB;wlRFXo}(E8+DZFn)wcYcUvvif)(k)h94L7=H;aAqEEd@-%?*6_}oj z@J+B*un9EcV3z*z^Q-?7`G1m=#P=K@M4+H1kzW)ep5bq7-g1yraM`pC^4yJWdk_SY zq|2J;o#93HF0iHl++qW5^Z4Z-(Y|}TTXyG)WN>%Y&W(b~btA>kK=f+4fYkP5CQ|Y1 zOt-8A6(aB7CRw`x4=fz`_+)Ja)ClnfY5pL4oAX;0k#nwJ^YSc&Ajwn{{C%!A)=-5=Zn;~3XbONp9-^h+xIi>#bF9NnTD?uh7g;s zE0X)jb-9y;OCpGEfqDDLrTjPHi;-QPuCXrKavYh3CKzGC<#C?wp;D%pR_ATAlUwy{7RDc?21`(xh1k}K&2eYEbviQH)LGf zzO4{5gt$E<-*o!u6QNupwLBEHBG- zeP?zq#Z5Bg8pTatMYnR?y1UnNJl|KCdAf{Et-zxW!W!`Xa+tY1e>U5uxzhal)-G1}1ezzE6|@B|F5TR^FdkTG=f!T|xCGfd&jYRVh8lp}7) z1q4ZMUCtC8I&yYemM$j}meJ#JsbZL%hOP--VlZDk&rh{}W>y1l<^XLi+Abbk`*TQ) z4xKq=hP23uT#VE)O3a99X^5=Cr=GKyig9t)&e$G%tz}cm9IWTQyuQ ztSS}KtU;LbYYo?MiXu2fs$iZ@b>qHeR|q(@D?IJ6Yq?>E;9MDvU4AiegtOB#fyau@ z@a(1H`&plRPTpcM&v-RAeJsRMSIn+8D(3T7bXQ=Kl2K%!!K>yq6T-X8VIFbIyC)MC z87CyETr$>Bv3io}@_p)2jG_yp{w>3$u6Ms`kE3VvBLB{6E zmyYoBJmoxf5gNg2aK=bUKuvI-3?*!jvos%1-T~eoFY=m)pN#FYXKdSjX6QMrq1FuH z*rZCUl=a5CXxsxKIyWkO_ktT-MX?IDPf4 z6O=^GCwcM_fRd>3X|ks=kd{qDhzF2&=)gP>DvAhucmI&atB@YP)p= zq_`%xS{pDQQq*f~@Oh4fHac6ck1A@7GW&*l$ufgu!A;D70a#`zYtM@b$*#zca1xB+_n@e)oN1z zcfAvZT@k4?&d;zRogFQIEJ4MC();oBfz;l3Px72QUDQyaXB{M|`pdg96NbdN3sh$v~&Oloyn*KU9fXgR^x85>*%I}>0vFp9gSVs5)s8=T0Y?{Nmn4PXS&_BI383X zpNq*qdoksz`;PEednPm9wGINiT!+>xau~~7bs**jY@bnc@V>rbx%1eE69uE!qcU1Eoi)oDmNZ`}k2W*l1}7CCKK?-BdwN&SeAp%a0E zmo7Mn<1gO@b=>U}A7&CL{Zy*KQru#%y`Jm7S9|T-kd2hRrcW@U$iPZU#CXKu&{e0; z6}d+kk#W^_m@Y^H9TW``@oCzumP*8}QTp(yg;*@b-S(5%!t8+JR?lG2pVNicB~JT* z2IFn_&SIx_Nu`qU5W(PJ)Nm?MmS>7M$B8=Uq^BNk}z?C{SA9UBTy-6GAYh7;4-^Pj^8)gv@Tg=1#O_Iq%go4z|_fmFW zgG>mwWJJBkRf~6ay6201V~G9;){O|P6w~GFb(?<}Cze%d;@n*)xGX4K1u5=i|EngD z=LhOaHYrdXZO`W4jx?az{I)w;Cpx-${8lwCw`^AxJdg^5cGuFMJxOmNr~kY$S+)BU zCT(up@3xVRmf3rG)XM<^t+Q{_^LhIVxx?PK;JcYiK_qSUI|*I-+E=L`Fig0fe&_DW zSawM3_s^%Hr3dN*rFQWb5V$}!i5G(&`~}TAtY33uV5ju+m#JB*%#kuD-6TjdEU4u94)wD7m> z*ikYHJ72e+vIu5)Jh@Yv#%AxsYl-@Pw^=_ZPsyc7YZywH5gl%G=(Zze>4?UCm}?Li z_Lj8{mKTcxNaC$wTJ9jS(R*y2Jixo1Vw1e?93W)@or%%Lli+qY}@7u%I??VFXAr5x4t9 z1i^cz@&U5%_GO~?6Q@rXPo30Ih}X-sC!O+9ET|Xq$~n!o?xJLqSHJ4Et_rG+v~4>!!MbR-A#3Nb|qh3RSlW!T#L=C^$U68@+uoaLP|eP`rPjO&e#Tf zD8cUWrow3X>i|vvUTp%d2J63VtrZYNwtx3>dRr-5r|eJ7q&tq24|P5KY37xX36?cE z-4-O?f>;Ni(FMcpwyp#(e^ln_(FowN2)fJ2o15W&)$>N>9O+*gdZ(CfQmncCo~hn7+{+Ho13R$>-5b^vu$Qvf;!0b)N^@`# z6go81=RW_Wqt7P^ymmoNXQ4Cp{tga}Z@oH|R#zs&?h-u&eT-+!u9~KD0b$*4{eTCz z!iR+qR=GffEKL(s6T9b=(!QD~(0A`t?y$e6!~3%O{dp!;ci;OJ4jxgTw{doF3MQd5 zqlnaUjjID$18VDLOmCQ~)KaZpcYF2j$3KJkyvx#EpJJ~z*i!=8I20lx=;E3^sErfC zZw4e*$EuS&e5n#QV&Cna4$3pK;iny;2xkBdIRe6fbMWokwe&;P>MX$f?Mq`MDZJ=7 zQ+TYAZQZf%qMXCD9qIUq_P#Pr!+5K)%=`ZI5r$+!NhHLlOOtNK+(TjtsxiQ?NIX>r zdJk-TDYMd(njQ864|(@iQJvaSLfFzP|5}%mXPjv6A1H2<4&*DR^-9wRAXQHOG^4)s zfm@@M2ZFV+{l<>Fp^IdHSmLOGLl3G3X>c`>>RSYMJTSxBWzB5vp-SZ9X-u&^Xen|^ zG>S4)rWL6F>o#8tdfhq6XYIaRT{hZyfxX=Bl%F*%(2101-&xW(TE5lo2_-Cemf%08 zpOm@ZXT5hYI#M%Fh=jse7G~@gM;1=i-^+=3_H=K*kZ7T(YW3r-v<_Eh+rhDZfs{*E zP{`|VIwXWbvBxx{u+jZrRIldVNf6g%ta0?BqmC{zN?1$!*naIc_3*sMY&!86<#izvBGokq*||>>1sVVFKw|KRD1;;gy*psFzK^=ky# zGAbsL;SOe>Yj4Eg<>+<)sGGb>$c9Yv5%5ILd}aAd@RUte%Jc= zJHD#lVCBa<{l(3B z5$^cnSgV1e$Cy{$AD%kiM}frO&$AwYS~Qi6B&{o1;_NSC%__TDpX#6Nu1HCozqMeT zq`Us%a;EM#e3Yre#})vL_DBGXw>(B$dii^-p2dcF1_mTYOQroqe3jxVhr^b`PDF(9 zjI%HwsP;F2S$Im~xU};9~cJYLN4e0*T zJ5GU%`l3PSzR|7eQtXgNl|JLbXnPbnV(tqTn4%LrS|L(1pO=n@VyCW%n9hf&PP=zH z=9W)NQ(#jz5{d17#FaL8GY+ISvmiUH)V3FNye`1^!q?UMA_rI~12jQkvid)S(*(V5 zM`1}lV&7BNmOh|4#4*|v50PXxT=%VrI~>cvWp=Y9eQtzB&@e+I>D}%Pf-y#&w#d}P z@q6~~Re2wGGCaQxw#L$|^5n*|&C>Rk9#9RwRz~9=+O(wRw|1Q%S@&3b6xIh(v}^fh zweB38JQ;M^Lh6k@s2H8;Ot%Ztk5|iKNcW^NFjJ8#70r(KQC*LmX~&g`M-XNt4!SXRm!I$#2eboys?J;VeOU`hX2yM1)uE~Qei3Z99051*Ha{mu zW+1C;?=rzS^C=`E=_~st8Su-w3X;lW2(poZCY-+QwiqfHM#k;nm0LUhqwEmS7j zuYhWY;`K5=s(vjhHpF4&>6?rvyF@mD?falf;`{@s#Pjng(6H~ADJWOi|CTy$DiT}g z-zdscEL#2=2(<{chRIN1)B7Mqe z=Q;nqtTqk9WnYKj<}RYQhZE;xaqxS7P3F2aIp_2{fY?fTqyzdE^WD#9;c)@Y9+sw+ z6ihV!rq~6!MmP7|O)P~RFCXz@d;Bs_a_jx%#@x;=8X&g79lg!cD{43cDkHspu{KSm zXk@~&@qQ7M=8pE7w(}tjOx2GFG?c(*T3zK@bF^vCE;GlMPG=i+KNbUENC?HQi9#Z! zK3{dKI}dkfXc{F7yF=xq%|MEi_wT)Y^{U|Gif8~=p+z@!xV2}FNay4UUECGH7+$|h z#Anac=RhHfrrEpG^kZ!V-0Hv^fL}8olu)DoO-9(XI!B#Bi|W97sc;IJMQ#NwqUffi zeNL3*nwk>;lj$WwfB)ON-M3zM&5Ka0Wz8}+TtUm3empdW9iKd*L|nG5)fz85WGegE zdr*D|&zKw83^eLf;WKaqLdURHUx4694$~)CHZx9~#AE++^Ip9!xGrzp@G`3AMc#v( zL+0!ckHsC+=;L|ndK&D2Iue7qWou`xF^$8<#v7*D+W=H5|3+ooZNv2#yfIg2Im#M; zJ*)--V|&=7$v!{y$UmNkKFo=6HrJFuk`m4I|^SI=_y$n0R^2Hfi0uurYpF#(w(?o;ziKfr5zJNDTVJ zH@Z7!#b;}D`r`>#GDZm$-8PdE1@uj!cGZdChz=R}!_P?{zJY=9PElBwkXm`~>K5bN z*GUDdXt>k6l=f9#9iMXumV2L`YSXSndu2LU7iKDGuP=_Sf6q=`GQ8~fYJy=%V|Q&E z=9&y48RxaHWwG9azvFvAa+o-Om206xtSHg((aaqbB*HB4Z0j^r!@|CTb^Bjk^|H)E zoPFt1Z}#4|_i@R4V_MV9(&lUD3al))g*P8~E5#>-&S6|;N}!v4h*?Qp9QLlmSHA2Y zhQ=>D99Ap0DoKC~xr6t~>Fo1;XE#AT3~zckQ@P5Tw1yNs-4VYK?nqaGVymBy?5|np~S%Mp$c(^6=mLeHKV$D z&5J{O?^^w;O_>rmTSy`8_qTC1lW)xHCfw~)t)gMi6YKL5Qf@bn{6o&pFMXw507OQ6 zkwVrCoXqAK48ySvdZy=$`uz%OYqP&)(}MNO1e5MdXjks6h4$}!ob~}7NNNgztxa_E zIe9OnLtLb3V3P9$cDc+?59#NQX|}MRKbSxz#<13*I8ItF78}@2st_;bgc9hk=%hhIjS<<&xMt6oHygTr1xqs?BZudn+4A}NaF^vLDpFU5q(6Q!G@FoCG|C@0 zGs_JFfrK*s>g~tV=D4;V?DnMKm>xB^;O>rh#_`lV;L~=E)7M+T#@!;laZ6J22x~p% zVi;5V%Knw({w~TR5>?aLzlWV`C5;Si?9wbv&_sdcd$0wrW{E0Zt~G)b3C&`#U#{;PO}I%$5X$)GXGH~3R|yYE3}A4i#dq(v>_s~C+*5Ff z0S8To#tOu-U_c33rXB^^Imf&t^l zFl!>P1HYV^BJn%O#N3_qen44mC|3LHQ@O6Ae6VGKFne`{<~f39OeFJJR#^sXub^QrKIk%H~2 zaqe$AMA-5lSB{`#?=6Q}t> zizA}qgaS{-8dBK!cX~8DZa5{DX4e#Y{ra&`YNz2AfsMzuM!~HCXXhUk7hQQ-R;Y5B zC@bFsOvK;qlM25)+6cpbT@6}E>*QwWOcS+_T}g8M;ylK% zzo##D3AD^49 z+t#rFi&2_WdDYHYDZBj>=%X|_~(yFZhW(14LJN6aIjcS)Y>K^}0! zQIA%A|4@^2bp)DB&JICRNcsRupZz|Z5)M&#N|89{&*XH+fQ;VAwg?=bJZIM)QuU&J zAth;-kvUs{Ev(T`kN8S8sa2vX?76oKS0do3X9Sj5?WP9VH=Q46NavNRgx6iYmG*+00gh4&XM)!<=;{ZE*SgwBqN}`L!Rq5S+KC& zQX?-;{eWZl#G1_mFYxp$VBGa*pQD$LKz%?|hIe~3R=$>glk`m8TU^tqihOaR)o^xz zx6Jx0f|;3A-iqP>BrtuSp9i~BXl==5TP8tj?d8tA_Lu&a`p}g53$GFgwA<6)udehA zX7Ob~PU`X^3lYD*%MQziNhU$nqVYn{Z`|nURt*TG2`eR@c~*1V=lBqVs(a&40O%aF z&?2(E4qPE3P7&U|G}1*}FA{!S*{{N#nSn+18+rWY*Vnw&Z&POYc!$rCUjew^bvwm=-Z+ z&b5XTl^bn)Y4*O~{mQo4rq#wT1&)>;(Q`j{4-MtG9gy?e!rH%MnYNF|I zYVXilj))WcH_3ce;f7yI%`g<}vmsugoq23G5*nk~4YOHMW|ZuyLI_tRG{p)`7H7Y*%-K1{7jiOV zeRZGCtqOt=eJP4ueCbme-c?k4&GG9v7G)f+-Qe49mw@8n?^mCAzkl?Cz@2_|8oJ%5 zwBc9-ew#wm(|YT(U2AgXIC-K-_csB4-a9X1j*>Vn@TsA_xUg*m4`0pxH@gS_5}aj^ z*QKg5HP=vh0cPyTfP95l{>kK^nU07Zw=2;yTM;=KadYm^2|d%_Re{7z`^461H27Si z*+&O!_It{&PkFrRs-Hk#8}vx2BZ-MX<@D^?W{BwB*%(4el^iP|tWp~%Y?qJB7>G|n zmbgPSJ)X*x<1z+J&rQI&EMvLPB}8&uj2lSTa3&{=I_H!+^*Yx3{od54kl974ZmWr) z;p}dDqktINza}Oz0&a}MUo$5>CvdPfOYPc<;q;P7c-y*s#`3Vx`aM9Sfo@B^5M(4B zEf*ufl3mz3SzcLi@pW~TqqvN2f8nVy#u(~X;P`(0h3>jE;bFcY*@OU;&AAds zTP(KOlRj@D^c`o_(ZNU4#aOV7DY0{76^M{uCyVfDxZkT7UZ_a;<1;hF8i@4NK$g1a zImuHCIO|*T2XgvtmeF+;jD>0y%DK{2JKSVJ!Zu#ahmmpdKuH*XD2fDDf-wP!;2qR|=fD#qbH;e=Oa^!q1IqUOY2K4R*Y{dH(m!jTx& z)RlG^>P4HSymF%;HhFwy5Y-U3z1QRSCq01}c7ALn-9Bjkj+m2B$ad0X%d3l|man=7 zR2ZMztvyTN9N4j{a=+VU2J_MDx40B3c{7}0zP*)KJeja2L?)GeIq^w0;CKbej#KS@ zbP@Ih2%Kfo6-&ntSa57fdjLH&&s*~d^X^S@=#@~cv9PMOD-W~R)%cyMD6BzLcYmog z8$e~2ijmQv5unvCI>C?D57y6w#Z{q)d)$hke2tvd1b1RtU}5Vyz5k3H1})d z_ZTt;tj6-9QNR_Jre5KzdrejiFZ<&#bSUzM~!2^S?NzCSzsvG~r-X0&!z}<3biVIeFBKMwG z+;!fDM0L+h{)%S`oOQ$Uqz;<*l_gMB<2^qMZl=k=2=Cu&7AOEO8K%mljl3>mo3J{r zaCnKGZJ@=mz^JeJmIucLZeKT~-)Vlz^vwo#k?gX&=D36u$ZUKAf#&1{KO!?20RO}r zdn4(!omT}``Mril-JAC#`M3>HyrzTibPIVLhE=V1sP3~`8^#1Mr?~Z*`#t>y6h~$_ zt9ahURk35C_9TMv$7j!Xo5;({UULM;f;EXMWkcp^0Q?=zxOP}8CoGp?UD$Ahd?MP5 zkFxSACKkd!Rrw7fwtA-3xW5m#oDpw&VOn(Q%h1UhA{XX5(J{XM`2l0kNk1R)BgqOL z$EVls)u7>Yoo151btWE?L>@W$4)33BCOs&oRani&xH1?=Hpy>VvzAxB=amy3g(cdx zs(B(Lqz66dsh+pDx?L19@V{gZ?HdcmIb`V-FS}8W;V)LR$rumrxF9Bx6S?rQ>yma` zAJ*u-Hx}pYO!vRaHK66kMrhuf-QKLT;=S5BbDWE9Uc%=$5mgY_NY|S#QngVKz=6@H zdz>n2)&@Xb4@VQ6aJRPa<)nmOk(DIK-oRkJ5E8v3*HlJs5+%;katJB7 zN;y2DZV)kVvk`&?7P$|{Oycs-3Dlsnf=c(}^V^NN=1bI9s4m7rAptx|9*vmaXBteA z=oTtg`iJV6zimcIg>9l%>AhhN%yKC8NHiaD-`C-7bOd%maB?xtkZ6g-^vW-@&pOB9 z4QWY3&RO@MsAD^OK?G!(9&kUs<<`Op_q#mMaK(m}Eie|Fzup-zm}+CR&9r^T^kC4) zQVZ-t(gYXz-or^J{8RhU6z8bl&kvx;NksWY)rLqy_2{%yhmv&5tWQr)Q{1Zl^um~* zQJw+&4Cu=~v4@K9@6iwo$iD{T*t%0QDAvm=1?>-~)iTeKO~hF28Kf+5b8IF^)`ux* z;RHK;ugth;6|y;~!tyrI+i3+UAJuz4Mpge9@VMCSBmSh^-Rt-k>j!U!vPsx>HA;x@ zJ%I_l>whQFmf%ZO0ltRiz_)D`>8@CM0U^v6N2EIj8TP<<=Nj19Tv~HheInkh2z& zYwT57!m+t*k)GP3CKG*uKrnP^Yw2Gb@2?S-7|0Ov*+7#K8rx`l0nMk^PZ(&cu8h#! z_qnovvOBl3>Ae%HO&?zyXxZA-eI>%o13nP}>c0(4q@&64e^k94V2l*3n-Iy);4VtS z)|FwR+eEvk1~YD5-zQ$yGx6-KoGoaQfiuqImknxT1GUyG zB?EYvl)f0eD+}sj>+qaNDf4kq1IFG={v38wwH*&d=JO(EWoFC%vlvl7<$s6>vH|NO zcr;b|A~mlzc;Dg+gEJJmBXWk%a}4v3J0=WYTYendu8_Y5v*by+JNd>DN&_&Eo@Nin zWaPTQWt<<+o4%+bJlFCQhrfwUzqDAs8gUOJ+%jaG+a|$wk;Wj!5m;4|TgIAip?!H& zlIOOE)(xMEwH)y6zDh>4>fg;&Q}tQnTAgp`6P_!x(y-V&-S1;d2DU(OKn|>>+gC3+ zGPya2Xv*dk*Up$~RH;8`BDOJBW|v#b(P8u#>LQG#TfWc9!D4x#bZbVsVm{G|OF9~2 zMYTg!<@5YZl9^v{u`&&KX<7A5fy|j$bOFkuV?h-7R@M~^_NNp^)OkuMSo0_2eX znjPdE>|!UpNws+@9KIusr}Dm=FE}jiMzvLAsy`N%nqNYA_}QRFQ61-j>Q(m1@ej>PL9SJ`WMKQ+LNBUJU%&FV*Y7V zY9gT7?_Il^Grd47V@v1q>{mgpe@xU3eeqvD$T@ct$u{*&*JB=20FM){j--uELoz;? z4iR~iJ=S{Jhj6t3IBA%|CMceaF-F~W>-{I@90Y9@Vydlg7xfpG<=wARQ0A{f6pJ_u zaDW)WPjyb6=U2Do)Hv`IlRbDqaE*5cxnBR|%{365a^DKZzAwXZmr)7$l=!z_1hTHR0;qr683-CDKi)*{- zfZFs(DT+<*)ac2#eb4`o*gcv zYCKPbB+)kGAA?}FEl!%FSF#Lkxqk_{N8$Sfvgo6$!j|G9OR_DIvZ z&k52NEB7m7=neYAeU9vW*|I`vZoG|CjpnB@=tny+Fho7VLw44(l!zuKHhN^E*0qHi z$ZT5PsX{*CpS$qeJ9GXF{8Y=?&0I)T&%w_j_}>d~Ipo|{O+*g|0-o!q&jpq(RB|XA ze!EbYX2UOl|;3PNQx%!FFJxd1(`zT%$ z^lOj~W7>%XS=3r;aizxeC%6enloMHa`cuTl((TtpY%0GO#y&7uXF!R{Fu*Z_1cfYx zZY*$U0}9ZnNQ4XO{de5~(+WqEJCM}UpWJNt?cs6*9_(RBOcy6a|O!Rzub9iglP zm!J}+uL=eRQo^x-W|OnO-ASCf1`*3|h1oL|iBIy^9*MueE*INk{TLlG2MCHrzUO}v zbu1k%`+9}r9WQ~(1QSyA09htC-%@vcy|->1ILe#8{N}(_f*nsQa3N8K@_d1fhu>)# z$#8_H3CjM{ef17BA1Nc&uf~Fk98;SYfoy@B>l&0Q%+0#{{zP`#oLXx+Xp(3)e5{V5 zQi4JFN>_`(OssDI;sR(p{S=r>SLi^Uj|l6(f7+;QgsG&9AgEydufkD7+!6@M8-(~O zDQW;gR%Ih2?>hg3ZpEa%{!06~bcoNlUl{t$h7W)&+`?bJ?U31PA9aDSl0^uz+ckyL zgWuEUBo2{8rDEjwF{^WS`K6!`Nvt3Zn-!RjUQJmMQUFlhh`?f((I#Qproi zjOyPVx#Ygb7mK`puL2vgEV-d@-JXy0%SNZ=8Q{8p3&PW2k3TP0m$Gv@{F<`ommUQ% zszG(T#`cd+>Gq$7M`sl0-9-8&fzgw8p;UT(vswqmT3%Ed16;@r_JfE^M{N}JOg4wo zsyj+JaSA&p6ybUy!;E0>G$XJKIPVN}1Imd|GRLKss)u(0ji1Gn%YHK0y$-TtE|(g1 zObizUL4;yeVi|O}2(_9WsVKwkP_l68eBiwm*fypF7nr8TqNq3U?c)IBgnJ5IuHwnr zX^M`F(#7F^d=pZ!qmUn-75AI`^YB!{F){4l2Nmx=ubtV6ty}xc-mi@9oF?$5NsH9@2DBc<+cg3_0L+ z<&&l@vu_(o>(_3JL|9>nV=gs185R>m^?BWHzmCEI5TGe9jDKis6EnM1+WP2D1%nBw zW2%Y;hnL0FA6SU@NXa@4LVDkQ5KWPg1uKo%c1PJnN6d1{Si;Yf5X6xU$ax#jODGa~ z)8<}=V(F)69=(}+>gb8h-(cE1o1$M30fZYLf%SH-T`vY@Q^O!Wm(uJND;-$Rk0WgI-2KGCiLr989nKQa#T&8SMaWY+?8~c{VZJ<;}L9~{b@s`BS zm1h4}f;)cQQU2x-a{Zz?OKuOw0@*j}H0?jXt>mYQG##y<+^Cr2yC|?X+j+*|U@MYo zpb-?j*bz2fX)tA;w!5gcev{)IyfDC;a{V_=2Vukq>Q&=^KnpW4&N5Z0pWFQ54mKxG zU^$m$AL#a>=Rq;-u-yC*Vst2W`Y%tX2R47Mjmvb5SR8Oy`XqOvBSEdIw+$K@?7Q==P_j-$9VVZW$r(Wo-PB+f-WO!N6)1!Y;Mwz>|d7 z1Xs3dwxxNxvT>U0<>N+m1EGVs_ebk#3cSI_&isuwhCsQc1~ZXx|!el*zaJBzI3syO!psMNVmq~Rx^cq zF>#pC?2+7i5;5M`SqmrEm;$PAR&;K4aSon>Rh$l<$o{=4b#O6p{|EmA~aG#rkXeRLx+uUpokn7~7pARMjfO6lwkVv2ytDW z#~D)QW8rkvN=L#39q(zLqcL>dfWX8;g4Z-@;0IKd&mpMonte=}E=hn8&6u}8)sou1 zJA03gDVk4V_6|q5o9JmEZ?KfP-S4U;J3Vm9G#hM}<{OXh6g&I~k#s32O~&FdrI=RTQzZd>7kS#|2= z2Whd-XIPn&lk}0-O}C;O#iOyPm89V6U*_m}&+nMB;({kH)@-ePk2^Z?%Ak`>a#XYX zCVuq2rc?fjOt7w>?>Z2>a&YxS|0PkVh`dlVQ$)6*d3*QkMzHxuK1HKZ zfw0a*`75*WyikOTphsm+ty94jebe z1A1A{H+bi<4$Z{~Z|akgO8IjZk?B`I60&Nc`VzhfZR!dN}U5Ns*p6y5D~FhUU-0=>+@O>qu+MKMu_xrA%C<7Y4UiD~pL|I*_rre&+NFUe(y#lojJ^m*GmX_RFbOPr8jw ztp5Vn7}dGpM|EiI9VMc=fyv4($iTBtTGUtgD)Ufm13?uQ=K>Le1-qVR4Ue?`@>Gt(XP`4FMh8RCjskKAtGYcmkfwrk*j|Rr z`uk|cJ_Oc~-$O))V_p6vz7|}bsC#F)!OjvMcjXwcumME{So%{Frp@hh!-8$DWW?G< zT_lM4`5;Zvb=41xG3>cU66&y?W#y5D`m{M*iUPt0Inq`1khBFHD4$x`fij}JPaQen zaZgUG_YJB70-iuf;ZYG?du^-*9+P+7w3SbhxV}YG1D>&~?cNB7!BEifqxje;1U405FXa$Q^pHFZ z1r;OaQt83^V|6dNX0GHUO39PN?g|))^+mo|-3ryUt&C4nA=cn6>Ag$Di@Hl=-EyUT z5DwC>qTCt{Pup#sBH-!z)((_zoZ5EcIhSogduVXFQ0(%qdFrzM+WEVrssnVJ{)^JS zXzsI|gcQp~nNpYBZkVkxkFG&TO{G{kWJc9`818_sZnmmuPSlbMq+#7dTEgn>fk7BZ z9c8O2mWtDG9*f-(pIUJX9iH2pg7gAgvT#KiO!Gd#u1$4_lv5Lz+e4@OnYKKqT1L?L zer1Pzi212^Gv+VpS|f^DI)4O^iY{NIM%0dfI0PWsiv&WB`EoBl*o2#EdZFhn|1rz> zAEKGg>E0RA`GNwP11=ko{6*c_$OJ9$oLVK-h%U6pI*Sz0kn0_hnv$6*3-Sr^NwKAZ z=)96Y52^@>hw!!i9FqU~{%7x6yhV)T`JSLG6gZa*MyB4TI`_thSb6%>qOuSbe#kO> zyO)yTrMt54?` zhIkE50YjOQ-r3->c5WQu}G+hMPs0N)AV1JEgy!;Bj=d+|BV$x`L)tb9=5$?aeO zLj8^7Kx%@bc3sGH62u+KI$FA4R!bPoNf-c)HW5;{Ju_PFPA4!|FT-1Ppq)*JSj(IM zH@DbbyYokVp&)v$Byuh(9`Bw6*?GV2QN8Ow7$1p0z?3bW*}|2z4YN1~9ik-=*a4Tq zwZpKlKnZ~Vpz)e;>yL{5j?vk)Fko6c#En#EqhLC?)7C~w4_M9NpK%LoY+>J_mOwKg zZxFdcQGmbBJikxymJqBHDAjY~kjCIMiQnxS*Z~OnH(VT9;0Gm}58$`gk0xa9rg-^J z&4TtQtN&K)z&Tf^(DuTAcW=YI-}NG~1a*bDC!(>(v$C;2U$6pJ)v7MpVOGM8@D~k-r9&B}v_6LL(frqq` zuOgDCrIuoH{1;!QG9XnAZFT!YY4a~XuT1UHtZ}!E5K_O4BxLS?VyX+71+6CjV*%DG z#QT~hCmwbNdWwUxY#^}qKgP0A1l8T`ZKRf^s2+eKgAre_KLf4*y?o0wbOZk2!g`5$ zpXD$x^8M}ScaeRaaJrp)IPVu#AhtE`5Iew9hD>|~cQ)u++ylg~El6G@T`om6ZO%F| z?~XP3yfmuoNwTk5D6FmGn`GZ^HP3^#8|?qXB>t!Q=U>aUxraopzV;K$*VV2_={o(# zmxU+0=L0g+dqypx^Il(&(P5caZr_-LIu@`tjn?Qm?hOr(S;d+PAnHVgTPYFCbpO)D z*M$jo;U((#$7_!j`lN;uPLRRv8+XRsIpAN7%!iIN%|__w(?!gcUIKSz@2># zv2rX89bENl3tNOQic-qigHT(EWUL!*8Ld-)T~!%M{nVo4+EjE2QD26miOD;NdUC-m3HLZR6*FYwmegh8 zbINaWY53Sy_`Lag*$Js_4>U?_lr>?(^ANUKnBbyg4o~&$NSA4R&+r_ZHnH0Czv)d) zlW+wid)~36cVE=3AYCX&7={)CVLm?dW$K6pn1UD+qG*}#f9yCgaa}4Gb(oyiSspW> zLzUM4py?ZVWi_yCgHnHdyHbeIh|VQtv2M=Xan(-C?~Q9gl~joT4^B%Gj6<{oZY^J# zW;tP6=Wmcmt6B+dLZ)N3bhmplpgtwZw#fF4$&Wd6_82+Qjn(^GpTn@5mkzNzE)9Ju zk&7mQ@kE?pMNF1^oQ_r$r=NehvU&dan0u@((LspDcF^s?>g$TCfmCaA@Dc_+vSj{s zoOQj2BzZiyh@X_#NW8n^XY;Kzd~u_=ul&>NOdu2Uu2PKMF~RywaA4xgn$i*%#{CZj zYl4m*s*{Pqq?h6Zj;WQ@0S5hTb`)iyKrjM{2HE zLK^4PDSgGB41qclO5lEVDUz^p3P%Sw)zW|WVP1r7xox_GmHOHs)&gl7FA=HX(pu;GgwjSx8SrgDT_~T7k!u01;WTin3R)r-{9#C!_ zMb)gMO1rOx^mAA+0_A>>9Ql%FtSffCkiP5S_ za~z~)h)<@7Qkt$E5zeEB^VDIEYJK~nl^C}mm{hr!1t)o?2uD0;Q4Vs*P)Jg|@*@ng zW3vFFc79J_GCpi`WH>RifB$2yn>Dd_qE9EPDTn=PX@vrs9yhe_ixl#lQ^TV)`xPmC zD9^Dg=)J_c0%PW2W3Em(p&%gQ1NoX}(=SNvkA4vG(DZ79J zhg|>X`E}3m+4gJ0ILpiY4QgUA<%o zvME(4&Ci!;KC*z%A001)z^I5yFHej?awm@s%c4cX^1b5SzxEDV;|;EXW2wp2Gd!yf z*J2DU!Cz=prSfSkTSz zNC!`G4?*Q#x}X|B`w>s>(YU>&OF&5eFZu6D)&ZdYRWsLLKWJe}V8}c7E*%|N8a>!M z0Ds{HcB@kqA!`GH3QUyVimyPqLt%;_U#8P#G^KR7yU(5`SV?Jq`5uJ7yZ!9`=mj+0 z&rXAsThPz$rj!32=)XMr{{@boeAwSxk^3NLaQp|w6AZ+lwx!OG`KtY16JfFYLe=H~ z%Mo9Yd{4J);3c~ zCbzLb?ttqLh~F1IVJ7&&ckQbtGY0;xa+Uw9y(@ia>dN8)6lK+l%Y+PAB9;IW1S%ki zjI0UDE|DcHg*=52Et9g8#UU+>1u|?VED{mIP6H^9wTenbz&sXP5Nx240a7r9tX%K{D`)!&KJ&C|u;bHi;nf9m~v^ zqq5ULdtS8*pFw^zrb|3YU+0nwCO`GPG3%D;-aorl5DFHIAF*)^RT&(5iKrV%?~sD!^mRvtNj~_4}VF#D=2s_YcQ)Y*NQp| zh)jS#Yb+#;&N(jGEB1*q11>($st!+0w5Mm&Sj`OttFfnd50&d29e%(0&rABQb>+4l zHmUMjOG8d29bp*kZSHJqVaPr9KyoCyBVIf+d3Uu`Kr>qucsS5X;2X6h=C9Yv|E=Mg zZ|=D}c{${k2tZ3WLg_}2NBJE|t8iFhDGd!^E0Coh3GlQt$_J!}>?lesiO7t8;hiAz z<92Rd^hBfrgfF;JY6~Zos;3?Dxh~SW`ZIt5LdVW-OoO%BDo=?H@#bWuVbKAsnv5I$ z&FX}h%>h%7z!GVb>TK3*^ka;WX$x^%32KVo5{!x6gSS+;=i*E4_q{(_o4E+yrB9uz z_4M!ZFh~)$mg{k=#fx6jsEQ6v(SRhQmyLXziBaV6G#q*|^4k#-e%Hl>s9hz@+ecz1LnmnFv)J<0p z9(_t@EPl)V;0Ku{-M_MU4~LoDjLW)Qo;TmIoOhE?>Pz|X_N3a>z96l=^)O-l9Vp<@ zD<(cdS}-KU@%Z$cG9#h5_K2J~Sin2>=YD^7h;}jE32_7f&Vee)vw+xlTSow&uRR?A zz&2hBLTvSK7r}*2@krfjy^RW|O~J9ZnUp^$(1=*4z3586RhVCfCCBhk^`Ka6`z%W> z-E!U+@R39ur?BH}OO{sxJf(d!&X!B3u;qHNl%hn4;7RcKh=p0TDJrDi`08RgA5dz& z_c6d_*khK0i);7i!Tl&<*t|Sj$~y^$_Q{zIN3{2~soUF}N92Be9X=n8l5MoywM3ir zKUHJh1S>^UcKLW5%%64W$QyeeO_9a~dNp7m$XKQbh>g{+8>X{Q!krHP>cKO%GX1ul z&>107ybAuyw%A&LRe3z_rrc?C+#cjFmptyBjQeFYl$?azDRT=&TW0OA0F5NJjZ`(w z^E9%_kIDJcJv?=t31g<*$v9;4pg9=x@4YeK*e1e}m$VmJ`4eFddcAe^PVB`>&KVgp zh}U+kQDX`ux0KWlC>a5mT~S^{L+f(dYVtn#t|iKj=!orJOoG(!)<)#hj7y2jmmEnk zU+J#r;xzl+0z)kPzcI#JA2V;yy1wkC%Bb8^^gNO^JMEF-7YM@b{#DyrP)6cztu*LvurWR$?)23R11i+w||*;I3A!_h@i6Nrz$kcK6%(+uvev_fNq)HGlfVV?lQGZef%sEF#37SL{s^;}cvQ*IdbB>;Y5 zGMI0B%alR<8<;T4A{<7VONT=?aj_1RRf5W0Ok^Wfpb~C_x*5nqdvq4iJ!S z$L+Ssds$IN{PC|a(3R+{ZGou8CjF>h*MzUP)zIWg%wREPP{>IJiocs=Ffhx z5I|I7@iR#t>VHlSkf;R4l3kgQ+u8wl(W=3JQj-4hMDe>Cp&unw=3Z{I9a{T9Wht%d z>nk121Ws9}%Mu*tY=}iB3q%`gCb7Y&`hB|^9)2F7db&2HLkb?99!>t|9@R#YbbYh` zc>FMJ@p{#XUI!~dABBZxtAwb;l$}8wCsI@$AI_PGX9DzlTk!x{$VX$5L}1CwrbS~h z=1|OQizhdr$Xcn7DJ#i~ErJuyJPUy@%qJ(u!+qJvb(D|Bo@f&U-@l&vDpT^yjCa4k zbFan#mCIX=`a zqy for example: different weight proportion on performance and modelsize.| None | + +## Examples: + +There are two built-in objective instances: performance, modelsize. Users can also build their own objective as below: -- arguments: - |Argument |Type |Description |Default value | - |:----------|:----------|:-----------------------------------------------|:----------------| - |name |string |the Objective name in [INC](https://github.com/intel-innersource/frameworks.ai.lpot.intel-lpot/blob/master/docs/objective.md#built-in-objective-support-list). Like "performance", "modelsize",......and so on| | - |greater_is_better|bool |Used to describe the usage of the objective, like: greater is better for performance, but lower is better for modelsize|True| - |weight_ratio|float |Used when there are multiple objective, for example: you want to focus on both performance and modelsize, then you will create performance objective instance and modelsize objective instance, and indicate their weight proportion|None | +```python +from intel_extension_for_transformers.objectives import performance, modelsize +``` -- example: - ```python - from nlp_toolkit import objectives - objectives.Objective(name="performance", greater_is_better=True, weight_ratio=None) - ``` +or -- Built-in Objective instance: performance, modelsize. \ No newline at end of file +```python +from intel_extension_for_transformers import objectives +performance = objectives.Objective(name="performance", greater_is_better=True, weight_ratio=None) +``` diff --git a/docs/pipeline.md b/docs/pipeline.md index 0b8e62573dc..7d7c6af5612 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -3,18 +3,18 @@ The pipeline is inherited from transformers [pipeline](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.mdx), and two more features are appended. * Use a [`pipeline`] for int8 model inference. -* Use a [`pipeline`] for inference on our [executor](../nlp_toolkit/backends/neural_engine/) backend. +* Use a [`pipeline`] for inference on our [executor](../intel_extension_for_transformers/backends/neural_engine/) backend. -Executor is a inference tool for accelerated deployment in NLP-toolkit. +Executor is a inference tool for accelerated deployment in Intel_Extension_for_Transformers. ## Pipeline usage ---- ### **INT8 model** -1. Initializer a pipeline instance with model name and specific task. +1. Initialize a pipeline instance with model name and specific task. ```py - from nlp_toolkit.optimization.pipeline import pipeline + from intel_extension_for_transformers.optimization.pipeline import pipeline text_classifier = pipeline( task="text-classification", model="Intel/distilbert-base-uncased-finetuned-sst-2-english-int8-static", @@ -34,9 +34,9 @@ Executor is a inference tool for accelerated deployment in NLP-toolkit. For executor, we only accept ONNX model now for pipeline. Users can get onnx model from PyTorch model with our existing [API](export.md). Right now, pipeline for executor only supports text-classcification task. -1. Initializer a pipeline instance with an ONNX model, model config, model tokenizer and specific backend. The MODEL_NAME is the pytorch model name you used for exporting the ONNX model. +1. Initialize a pipeline instance with an ONNX model, model config, model tokenizer and specific backend. The MODEL_NAME is the pytorch model name you used for exporting the ONNX model. ```py - from nlp_toolkit.optimization.pipeline import pipeline + from intel_extension_for_transformers.optimization.pipeline import pipeline from transformers import AutoConfig, AutoTokenizer config = AutoConfig.from_pretrained(MODEL_NAME) diff --git a/docs/profiling.md b/docs/profiling.md index faabe4a4e71..09ea0e3fad6 100644 --- a/docs/profiling.md +++ b/docs/profiling.md @@ -1,7 +1,7 @@ # Profiling ## Introduction In terms of improving the performance of the model ,we should evaluate the performance of each operator(op) during inference. -NLP Toolkit supports tracing the profiling of operator latency. +Intel Extension for Transformers supports tracing the profiling of operator latency. ## Usage ### Example run python @@ -11,7 +11,7 @@ ENGINE_PROFILING=1 python run_executor.py --input_model=./model_and_tokenizer/i or run C++ ```shell export ENGINE_PROFILING=1 -/nlp_toolkit/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin +/intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` ## Result diff --git a/docs/pruning.md b/docs/pruning.md index a1fd92f4802..8f1480649e8 100644 --- a/docs/pruning.md +++ b/docs/pruning.md @@ -1,8 +1,8 @@ # Pruning -## introduction +## Introduction Pruning is the process of removing redundant parameters of a network. The idea is from Yan Lecun in 1990: [paper](http://yann.lecun.com/exdb/publis/pdf/lecun-90b.pdf) . There are two types of pruning: Unstructured and Structured. Unstructured pruning means finding and removing the less salient connection in the model, the place could be anywhere in the matrix. Structured pruning means deleting entire blocks, filters, or channels. -## Pruning types in NLPToolkit +## Pruning types in Intel® Extension for Transformers - Magnitude (Unstructured) - The algorithm prunes the weight by the lowest absolute value at each layer with given sparsity target. @@ -12,11 +12,11 @@ Pruning is the process of removing redundant parameters of a network. The idea i - Pattern Lock (Unstructured & Structured) - The algorithm locks the sparsity pattern in fine tune phase by freezing those zero values of weight tensor during weight update of training. -## usage -### script: +## Usage +### Script: ```python -from nlp_toolkit import metric, objectives, PrunerConfig, PruningConfig, -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metric, objectives, PrunerConfig, PruningConfig, +from intel_extension_for_transformers.optimization.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(......) trainer = NLPTrainer(......) @@ -28,7 +28,7 @@ model = trainer.prune(pruning_config=p_conf) Please refer to [example](../examples/optimize/pytorch/huggingface/text-classification/pruning/run_glue.py) for the details. ### Create an instance of Metric -The Metric define which metric will used to measure the performance of tuned models. +The Metric defines which metric will be used to measure the performance of tuned models. - example: ```python metric = metrics.Metric(name="eval_accuracy") @@ -37,7 +37,7 @@ The Metric define which metric will used to measure the performance of tuned mod Please refer to [metrics document](metrics.md) for the details. ### Create list of an instance of PrunerConfig(Optional) -PrunerConfig defines which pruning algorithm is used and how to apply it during training process. NLP Toolkit supports pruning type is "BasicMagnitude", "PatternLock", and "GroupLasso". You can create different pruner for different layers. +PrunerConfig defines which pruning algorithm is used and how to apply it during training process. Intel Extension for Transformers supports pruning type is "BasicMagnitude", "PatternLock", and "GroupLasso". You can create different pruner for different layers. - arguments: |Argument |Type |Description |Default value | @@ -57,14 +57,14 @@ PrunerConfig defines which pruning algorithm is used and how to apply it during ``` ### Create an instance of PruningConfig -The PruningConfig contains all the information related to the model pruning behavior. If you created Metric and PrunerConfig instance, then you can create an instance of PruningConfig. Metric and pruner is optional. +The PruningConfig contains all the information related to the model pruning behavior. If you have created Metric and PrunerConfig instance, then you can create an instance of PruningConfig. Metric and pruner are optional. - arguments: |Argument |Type |Description |Default value | |:----------|:----------|:-----------------------------------------------|:----------------| - |framework |string |which framework you used |"pytorch" | + |framework |string |Which framework you used |"pytorch" | |initial_sparsity_ratio|float |Initial sparsity goal, if pruner_config argument is defined, it didn't need |0.0| - |target_sparsity_ratio|float |target sparsity goal, if pruner argument is defined, it didn't need |0.97| + |target_sparsity_ratio|float |Target sparsity goal, if pruner argument is defined, it didn't need |0.97| |metrics |Metric |Used to evaluate accuracy of tuning model, no need for NoTrainerOptimizer|None | |pruner_config |PrunerConfig |Defined pruning behavior, if it is None, then NLP will create a default a pruner with 'BasicMagnitude' pruning type |None | diff --git a/docs/quantization.md b/docs/quantization.md index 73ca730e82e..aeb09055e2f 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -1,7 +1,7 @@ # Quantization Quantization is a widely-used model compression technique that can reduce model size while also improving inference and training latency. The full precision data converts to low-precision, there is little degradation in model accuracy, but the inference performance of quantized model can gain higher performance by saving the memory bandwidth and accelerating computations with low precision instructions. Intel provided several lower precision instructions (ex: 8-bit or 16-bit multipliers), both training and inference can get benefits from them. Refer to the Intel article on lower numerical precision inference and training in deep learning. -## quantization approach +## Quantization Approach ### Post-Training Static Quantization performs quantization on already trained models, it requires an additional pass over the dataset to work, only activations do calibration. PTQ
@@ -14,11 +14,11 @@ Quantization is a widely-used model compression technique that can reduce model QAT -## quantization usage -### script: +## Quantization Usage +### Script: ```python -from nlp_toolkit import metric, objectives, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metric, objectives, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer # Replace transformers.Trainer with NLPTrainer # trainer = transformers.Trainer(......) trainer = NLPTrainer(......) @@ -33,10 +33,10 @@ q_config = QuantizationConfig( ) model = trainer.quantize(quant_config=q_config) ``` -Please refer to [quantization example](../examples/optimize/pytorch/huggingface/text-classification/quantization/inc/run_glue.py) for the details +Please refer to [quantization example](../examples/optimize/pytorch/huggingface/text-classification/quantization/inc/run_glue.py) for the details. ### Create an instance of Metric -The Metric define which metric will used to measure the performance of tuned models. +The Metric defines which metric will be used to measure the performance of tuned models. - example: ```python metric = metrics.Metric(name="eval_f1", greater_is_better=True, is_relative=True, criterion=0.01, weight_ratio=None) @@ -55,17 +55,17 @@ In terms of evaluating the status of a specific model during tuning, we should h Please refer to [objective document](objectives.md) for the details. ### Create an instance of QuantizationConfig -The QuantizationConfig contains all the information related to the model quantization behavior. If you created Metric and Objective instance(default Objective is "performance"), then you can create an instance of QuantizationConfig. +The QuantizationConfig contains all the information related to the model quantization behavior. If you have created Metric and Objective instance(default Objective is "performance"), then you can create an instance of QuantizationConfig. - arguments: |Argument |Type |Description |Default value | |:----------|:----------|:-----------------------------------------------|:----------------| - |framework |string |which framework you used |"pytorch" | + |framework |string |Which framework you used |"pytorch" | |approach |string |Which quantization approach you used |"PostTrainingStatic"| - |timeout |integer |Tuning timeout(seconds), 0 means early stop. combine with max_trials field to decide when to exit|0 | + |timeout |integer |Tuning timeout(seconds), 0 means early stop; combine with max_trials field to decide when to exit|0 | |max_trials |integer |Max tune times |100 | |metrics |list of Metric|Used to evaluate accuracy of tuning model, no need for NoTrainerOptimizer|None | - |objectives |list of Objective|objective with accuracy constraint guaranteed|performance| + |objectives |list of Objective|Objective with accuracy constraint guaranteed|performance| - example: ```python diff --git a/docs/tutorials/pytorch/language-modeling/benchmark.py b/docs/tutorials/pytorch/language-modeling/benchmark.py index dd52218029a..f4d237f14fe 100644 --- a/docs/tutorials/pytorch/language-modeling/benchmark.py +++ b/docs/tutorials/pytorch/language-modeling/benchmark.py @@ -2,8 +2,8 @@ import os from datasets import load_dataset, load_metric from itertools import chain -from nlp_toolkit import metrics, OptimizedModel -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, OptimizedModel +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from argparse import ArgumentParser from transformers import ( MODEL_FOR_MASKED_LM_MAPPING, diff --git a/docs/tutorials/pytorch/language-modeling/bert-base-uncased.ipynb b/docs/tutorials/pytorch/language-modeling/bert-base-uncased.ipynb index 13942895065..87d57b9059b 100644 --- a/docs/tutorials/pytorch/language-modeling/bert-base-uncased.ipynb +++ b/docs/tutorials/pytorch/language-modeling/bert-base-uncased.ipynb @@ -43,7 +43,7 @@ "id": "c1816be1", "metadata": {}, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -80,8 +80,8 @@ "from dataclasses import dataclass, field\n", "from datasets import load_dataset, load_metric\n", "from itertools import chain\n", - "from nlp_toolkit import metrics, OptimizedModel, QuantizationConfig\n", - "from nlp_toolkit.optimization.trainer import NLPTrainer\n", + "from intel_extension_for_transformers import metrics, OptimizedModel, QuantizationConfig\n", + "from intel_extension_for_transformers.optimization.trainer import NLPTrainer\n", "from transformers import (\n", " CONFIG_MAPPING,\n", " MODEL_FOR_MASKED_LM_MAPPING,\n", diff --git a/docs/tutorials/pytorch/multiple-choice/benchmark.py b/docs/tutorials/pytorch/multiple-choice/benchmark.py index cb2df70bde5..6fde6834d0f 100644 --- a/docs/tutorials/pytorch/multiple-choice/benchmark.py +++ b/docs/tutorials/pytorch/multiple-choice/benchmark.py @@ -3,8 +3,8 @@ import numpy as np from datasets import load_dataset, load_metric from itertools import chain -from nlp_toolkit import metrics, OptimizedModel -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, OptimizedModel +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from argparse import ArgumentParser from transformers import ( MODEL_FOR_MASKED_LM_MAPPING, diff --git a/docs/tutorials/pytorch/multiple-choice/bert-base-uncased_SWAG.ipynb b/docs/tutorials/pytorch/multiple-choice/bert-base-uncased_SWAG.ipynb index 629c33dbfff..b36a053fec3 100644 --- a/docs/tutorials/pytorch/multiple-choice/bert-base-uncased_SWAG.ipynb +++ b/docs/tutorials/pytorch/multiple-choice/bert-base-uncased_SWAG.ipynb @@ -57,7 +57,7 @@ } }, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -108,8 +108,8 @@ "from dataclasses import dataclass, field\n", "from datasets import load_dataset\n", "from itertools import chain\n", - "from nlp_toolkit import metrics, OptimizedModel, QuantizationConfig\n", - "from nlp_toolkit.optimization.trainer import NLPTrainer\n", + "from intel_extension_for_transformers import metrics, OptimizedModel, QuantizationConfig\n", + "from intel_extension_for_transformers.optimization.trainer import NLPTrainer\n", "from transformers import (\n", " AutoConfig,\n", " AutoModelForMultipleChoice,\n", diff --git a/docs/tutorials/pytorch/question-answering/Dynamic_MiniLM_SQuAD.ipynb b/docs/tutorials/pytorch/question-answering/Dynamic_MiniLM_SQuAD.ipynb index f6abd2c2e74..b0830c5243e 100644 --- a/docs/tutorials/pytorch/question-answering/Dynamic_MiniLM_SQuAD.ipynb +++ b/docs/tutorials/pytorch/question-answering/Dynamic_MiniLM_SQuAD.ipynb @@ -37,7 +37,7 @@ "id": "c1816be1", "metadata": {}, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -86,12 +86,12 @@ "from dataclasses import dataclass, field\n", "from datasets import load_dataset, load_metric\n", "from itertools import chain\n", - "from nlp_toolkit import metrics, OptimizedModel, QuantizationConfig, DynamicLengthConfig\n", - "from nlp_toolkit.optimization.trainer import NLPTrainer\n", + "from intel_extension_for_transformers import metrics, OptimizedModel, QuantizationConfig, DynamicLengthConfig\n", + "from intel_extension_for_transformers.optimization.trainer import NLPTrainer\n", "from transformers.trainer_utils import get_last_checkpoint\n", "from transformers.utils.versions import require_version\n", "from typing import Optional\n", - "from nlp_toolkit.optimization.utils.models.modeling_roberta_dynamic import RobertaForQuestionAnswering\n", + "from intel_extension_for_transformers.optimization.utils.models.modeling_roberta_dynamic import RobertaForQuestionAnswering\n", "# to use modeling roberta with LAT:\n", "transformers.models.roberta.modeling_roberta.RobertaForQuestionAnswering = RobertaForQuestionAnswering\n", "import collections\n", @@ -948,21 +948,21 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 46\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, component, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 804\u001b[0m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 805\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 806\u001b[0;31m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 807\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogging_nan_inf_filter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misinf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[0;31m# pylint: disable=E0401\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1024\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss_context_manager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1025\u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1026\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1027\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36mcompute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 1255\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1257\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0min_training\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"component\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, component, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 804\u001b[0m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 805\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 806\u001b[0;31m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 807\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogging_nan_inf_filter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misinf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36mtraining_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[0;31m# pylint: disable=E0401\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1024\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss_context_manager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1025\u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1026\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1027\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36mcompute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 1255\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1257\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0min_training\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"component\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 1582\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_return_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1584\u001b[0;31m outputs = self.roberta(\n\u001b[0m\u001b[1;32m 1585\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1586\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 1582\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_return_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1584\u001b[0;31m outputs = self.roberta(\n\u001b[0m\u001b[1;32m 1585\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1586\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 905\u001b[0m )\n\u001b[1;32m 906\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 907\u001b[0;31m encoder_outputs = self.encoder(\n\u001b[0m\u001b[1;32m 908\u001b[0m \u001b[0membedding_output\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mextended_attention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 905\u001b[0m )\n\u001b[1;32m 906\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 907\u001b[0;31m encoder_outputs = self.encoder(\n\u001b[0m\u001b[1;32m 908\u001b[0m \u001b[0membedding_output\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mextended_attention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 553\u001b[0m )\n\u001b[1;32m 554\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m layer_outputs, keep_indices = layer_module(\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 553\u001b[0m )\n\u001b[1;32m 554\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m layer_outputs, keep_indices = layer_module(\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions, output_length, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0;31m# decoder uni-directional self-attention cached key/values tuple is at positions 1,2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mself_attn_past_key_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpast_key_value\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpast_key_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m self_attention_outputs = self.attention(\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions, output_length, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0;31m# decoder uni-directional self-attention cached key/values tuple is at positions 1,2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mself_attn_past_key_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpast_key_value\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpast_key_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m self_attention_outputs = self.attention(\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 334\u001b[0m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 335\u001b[0m ) -> Tuple[torch.Tensor]:\n\u001b[0;32m--> 336\u001b[0;31m self_outputs = self.self(\n\u001b[0m\u001b[1;32m 337\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 334\u001b[0m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 335\u001b[0m ) -> Tuple[torch.Tensor]:\n\u001b[0;32m--> 336\u001b[0;31m self_outputs = self.self(\n\u001b[0m\u001b[1;32m 337\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m ) -> Tuple[torch.Tensor]:\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mmixed_query_layer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0;31m# If this is instantiated as a cross-attention module, the keys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 198\u001b[0m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 199\u001b[0m ) -> Tuple[torch.Tensor]:\n\u001b[0;32m--> 200\u001b[0;31m \u001b[0mmixed_query_layer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhidden_states\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 201\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[0;31m# If this is instantiated as a cross-attention module, the keys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " @@ -1120,8 +1120,8 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0;31m# train a length-adaptive model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, component, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 804\u001b[0m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 805\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 806\u001b[0;31m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 807\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogging_nan_inf_filter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misinf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36mtraining_step_length_adaptive\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 1236\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepspeed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1237\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1238\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1239\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, component, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 804\u001b[0m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 805\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 806\u001b[0;31m \u001b[0mtr_loss_step\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtraining_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 807\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogging_nan_inf_filter\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misnan\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misinf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss_step\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36mtraining_step_length_adaptive\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m 1236\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdeepspeed\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1237\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1238\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1239\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1240\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/_tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 394\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 395\u001b[0m inputs=inputs)\n\u001b[0;32m--> 396\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 398\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[0;31m# some Python versions print out the first line of a multi-line function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;31m# calls in the traceback and some print out the last line\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 173\u001b[0;31m Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n\u001b[0m\u001b[1;32m 174\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 175\u001b[0m allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " @@ -1306,23 +1306,23 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0;31m# run search\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 53\u001b[0;31m \u001b[0msearch_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_evolutionary_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36mrun_evolutionary_search\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2042\u001b[0m )\n\u001b[1;32m 2043\u001b[0m \u001b[0mupper_gene\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdynamic_config\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_length\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_hidden_layers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2044\u001b[0;31m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_gene\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlower_gene\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2045\u001b[0m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_gene\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mupper_gene\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2046\u001b[0m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower_constraint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstore\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlower_gene\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/dynamic/evolution.py\u001b[0m in \u001b[0;36madd_gene\u001b[0;34m(self, gene, macs, score, method, parents)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 156\u001b[0;31m \u001b[0meval_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 157\u001b[0m \u001b[0mevalTime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0meval_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'eval_f1'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36mrun_evolutionary_search\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2042\u001b[0m )\n\u001b[1;32m 2043\u001b[0m \u001b[0mupper_gene\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdynamic_config\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_length\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnum_hidden_layers\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2044\u001b[0;31m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_gene\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlower_gene\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2045\u001b[0m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_gene\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mupper_gene\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2046\u001b[0m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower_constraint\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevolution\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstore\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlower_gene\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/dynamic/evolution.py\u001b[0m in \u001b[0;36madd_gene\u001b[0;34m(self, gene, macs, score, method, parents)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 155\u001b[0m \u001b[0mstart_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 156\u001b[0;31m \u001b[0meval_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 157\u001b[0m \u001b[0mevalTime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtimeit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault_timer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 158\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscore\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0meval_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'eval_f1'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(self, eval_dataset, eval_examples, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0meval_loop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprediction_loop\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_prediction_loop\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_loop\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m output = eval_loop(\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0meval_dataloader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mdescription\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Evaluation\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mevaluation_loop\u001b[0;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 2796\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2797\u001b[0m \u001b[0;31m# Prediction step\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2798\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprediction_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprediction_loss_only\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_keys\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2799\u001b[0m \u001b[0minputs_decode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"input_ids\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minclude_inputs_for_metrics\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2800\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mprediction_step\u001b[0;34m(self, model, inputs, prediction_loss_only, ignore_keys)\u001b[0m\n\u001b[1;32m 3047\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3048\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss_context_manager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3049\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3050\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3051\u001b[0m \u001b[0mlogits\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mignore_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 1582\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_return_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1584\u001b[0;31m outputs = self.roberta(\n\u001b[0m\u001b[1;32m 1585\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1586\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, start_positions, end_positions, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 1582\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreturn_dict\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_return_dict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1584\u001b[0;31m outputs = self.roberta(\n\u001b[0m\u001b[1;32m 1585\u001b[0m \u001b[0minput_ids\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1586\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 905\u001b[0m )\n\u001b[1;32m 906\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 907\u001b[0;31m encoder_outputs = self.encoder(\n\u001b[0m\u001b[1;32m 908\u001b[0m \u001b[0membedding_output\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mextended_attention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 905\u001b[0m )\n\u001b[1;32m 906\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 907\u001b[0;31m encoder_outputs = self.encoder(\n\u001b[0m\u001b[1;32m 908\u001b[0m \u001b[0membedding_output\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 909\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mextended_attention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 553\u001b[0m )\n\u001b[1;32m 554\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m layer_outputs, keep_indices = layer_module(\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict, layer_config, length_config, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 553\u001b[0m )\n\u001b[1;32m 554\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 555\u001b[0;31m layer_outputs, keep_indices = layer_module(\n\u001b[0m\u001b[1;32m 556\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions, output_length, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0;31m# decoder uni-directional self-attention cached key/values tuple is at positions 1,2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mself_attn_past_key_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpast_key_value\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpast_key_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m self_attention_outputs = self.attention(\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions, output_length, always_keep_cls_token)\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0;31m# decoder uni-directional self-attention cached key/values tuple is at positions 1,2\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[0mself_attn_past_key_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpast_key_value\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpast_key_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 411\u001b[0;31m self_attention_outputs = self.attention(\n\u001b[0m\u001b[1;32m 412\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 334\u001b[0m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 335\u001b[0m ) -> Tuple[torch.Tensor]:\n\u001b[0;32m--> 336\u001b[0;31m self_outputs = self.self(\n\u001b[0m\u001b[1;32m 337\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 334\u001b[0m \u001b[0moutput_attentions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 335\u001b[0m ) -> Tuple[torch.Tensor]:\n\u001b[0;32m--> 336\u001b[0;31m self_outputs = self.self(\n\u001b[0m\u001b[1;32m 337\u001b[0m \u001b[0mhidden_states\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1128\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1129\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1131\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1132\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mattention_mask\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;31m# Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0mattention_scores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mattention_scores\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;31m# Normalize the attention scores to probabilities.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, hidden_states, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, past_key_value, output_attentions)\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mattention_mask\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[0;31m# Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 259\u001b[0;31m \u001b[0mattention_scores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mattention_scores\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mattention_mask\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;31m# Normalize the attention scores to probabilities.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } @@ -2039,7 +2039,7 @@ "2022-07-31 04:27:30 [INFO] }\n", "2022-07-31 04:27:30 [INFO] metric: 89.28118310884184\n", "2022-07-31 04:27:30 [INFO] Throughput: None samples/sec\n", - "2022-07-31 04:27:30 [INFO] Save tuning history to /ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/docs/tutorials/pytorch/question-answering/nc_workspace/2022-07-31_03-23-24/./history.snapshot.\n", + "2022-07-31 04:27:30 [INFO] Save tuning history to /ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/docs/tutorials/pytorch/question-answering/nc_workspace/2022-07-31_03-23-24/./history.snapshot.\n", "2022-07-31 04:27:30 [INFO] FP32 baseline is: [Accuracy: 89.2812, Duration (seconds): 205.1094]\n", "2022-07-31 04:27:30 [INFO] Fx trace of the entire model failed, We will conduct auto quantization\n", "2022-07-31 04:27:31 [WARNING] Please note that calibration sampling size 100 isn't divisible exactly by batch size 8. So the real sampling size is 104.\n", @@ -2070,7 +2070,7 @@ " val = self.objectives.evaluate(\n", " File \"/nfs/site/home/sguskin/anaconda3/lib/python3.8/site-packages/neural_compressor/objective.py\", line 266, in evaluate\n", " acc = eval_func(model)\n", - " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\", line 152, in builtin_eval_func\n", + " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\", line 152, in builtin_eval_func\n", " results = self.evaluate()\n", " File \"\", line 15, in evaluate\n", " output = eval_loop(\n", @@ -2080,15 +2080,15 @@ " outputs = model(**inputs)\n", " File \"/nfs/site/home/sguskin/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n", " return forward_call(*input, **kwargs)\n", - " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\", line 1584, in forward\n", + " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\", line 1584, in forward\n", " outputs = self.roberta(\n", " File \"/nfs/site/home/sguskin/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n", " return forward_call(*input, **kwargs)\n", - " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\", line 899, in forward\n", + " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\", line 899, in forward\n", " embedding_output = self.embeddings(\n", " File \"/nfs/site/home/sguskin/anaconda3/lib/python3.8/site-packages/torch/nn/modules/module.py\", line 1130, in _call_impl\n", " return forward_call(*input, **kwargs)\n", - " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py\", line 129, in forward\n", + " File \"/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py\", line 129, in forward\n", " token_type_embeddings = self.token_type_embeddings(token_type_ids)\n", " File \"/nfs/site/home/sguskin/anaconda3/lib/python3.8/site-packages/torch/fx/graph_module.py\", line 652, in call_wrapped\n", " return self._wrapped_call(self, *args, **kwargs)\n", @@ -2119,9 +2119,9 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;31m# run quantization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 51\u001b[0;31m \u001b[0mquant_dynamic_trainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquantize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquant_config\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mquantization_config\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36mquantize\u001b[0;34m(self, quant_config, provider, eval_func, train_func, calib_dataloader)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nncf_quantize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_provider\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mProvider\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mINC\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 300\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_inc_quantize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquant_config\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mquant_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprovider\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprovider\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 301\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Unsupport provider:{}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_provider\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36m_inc_quantize\u001b[0;34m(self, quant_config, provider)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menable_inc_quant\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 273\u001b[0m \u001b[0;31m# pylint: disable=E1101\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 274\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_save_inc_int8\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 275\u001b[0m \u001b[0;31m# pylint: disable=E1101\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 276\u001b[0m logger.info(\n", - "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/nlp_toolkit/optimization/trainer.py\u001b[0m in \u001b[0;36m_save_inc_int8\u001b[0;34m(self, opt_model, output_dir)\u001b[0m\n\u001b[1;32m 308\u001b[0m weights_file = os.path.join(os.path.abspath(\n\u001b[1;32m 309\u001b[0m os.path.expanduser(output_dir)), WEIGHTS_NAME)\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopt_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquantized_state_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m logger.info(\n\u001b[1;32m 312\u001b[0m \u001b[0;34m\"quantized model and configure file have saved to {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweights_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36mquantize\u001b[0;34m(self, quant_config, provider, eval_func, train_func, calib_dataloader)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nncf_quantize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_provider\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mProvider\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mINC\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 300\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_inc_quantize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquant_config\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mquant_config\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprovider\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mprovider\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 301\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Unsupport provider:{}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_provider\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36m_inc_quantize\u001b[0;34m(self, quant_config, provider)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menable_inc_quant\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 273\u001b[0m \u001b[0;31m# pylint: disable=E1101\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 274\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_save_inc_int8\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopt_model\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_dir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 275\u001b[0m \u001b[0;31m# pylint: disable=E1101\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 276\u001b[0m logger.info(\n", + "\u001b[0;32m/ec/pdx/disks/mlp_lab_home_pool_02/sguskin/dynamic/frameworks.ai.intel-extension-for-transformers.intel-intel-extension-for-transformers/intel_extension_for_transformers/optimization/trainer.py\u001b[0m in \u001b[0;36m_save_inc_int8\u001b[0;34m(self, opt_model, output_dir)\u001b[0m\n\u001b[1;32m 308\u001b[0m weights_file = os.path.join(os.path.abspath(\n\u001b[1;32m 309\u001b[0m os.path.expanduser(output_dir)), WEIGHTS_NAME)\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopt_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquantized_state_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m logger.info(\n\u001b[1;32m 312\u001b[0m \u001b[0;34m\"quantized model and configure file have saved to {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweights_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'quantized_state_dict'" ] } diff --git a/docs/tutorials/pytorch/question-answering/benchmark.py b/docs/tutorials/pytorch/question-answering/benchmark.py index 31ab5fd5bb1..be1a498e02c 100644 --- a/docs/tutorials/pytorch/question-answering/benchmark.py +++ b/docs/tutorials/pytorch/question-answering/benchmark.py @@ -3,8 +3,8 @@ import numpy as np import random from datasets import load_dataset, load_metric -from nlp_toolkit import OptimizedModel -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import OptimizedModel +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from argparse import ArgumentParser import timeit import collections diff --git a/docs/tutorials/pytorch/question-answering/bert-base-uncased_distilled-squad.ipynb b/docs/tutorials/pytorch/question-answering/bert-base-uncased_distilled-squad.ipynb index 653f0201343..ba204a0e76a 100644 --- a/docs/tutorials/pytorch/question-answering/bert-base-uncased_distilled-squad.ipynb +++ b/docs/tutorials/pytorch/question-answering/bert-base-uncased_distilled-squad.ipynb @@ -69,7 +69,7 @@ } }, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -116,7 +116,7 @@ "import transformers\n", "from dataclasses import dataclass, field\n", "from datasets import load_dataset, load_metric\n", - "from nlp_toolkit import metrics , QuantizationConfig\n", + "from intel_extension_for_transformers import metrics , QuantizationConfig\n", "from transformers import (\n", " AutoConfig,\n", " AutoModelForQuestionAnswering,\n", @@ -161,7 +161,7 @@ "outputs": [], "source": [ "from transformers import is_torch_tpu_available\n", - "from nlp_toolkit.optimization.trainer import NLPTrainer\n", + "from intel_extension_for_transformers.optimization.trainer import NLPTrainer\n", "from transformers.trainer_utils import PredictionOutput\n", "\n", "if is_torch_tpu_available():\n", diff --git a/docs/tutorials/pytorch/question-answering/distillation.ipynb b/docs/tutorials/pytorch/question-answering/distillation.ipynb index 3d69219d04e..77f2b5431c8 100644 --- a/docs/tutorials/pytorch/question-answering/distillation.ipynb +++ b/docs/tutorials/pytorch/question-answering/distillation.ipynb @@ -32,7 +32,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -73,7 +73,7 @@ "import time\n", "import torch\n", "import transformers\n", - "from nlp_toolkit import metrics, OptimizedModel, DistillationConfig\n", + "from intel_extension_for_transformers import metrics, OptimizedModel, DistillationConfig\n", "from torch.utils.data import DataLoader\n", "from tqdm import tqdm\n", "from trainer_qa import QuestionAnsweringTrainer\n", diff --git a/docs/tutorials/pytorch/summarization/benchmark.py b/docs/tutorials/pytorch/summarization/benchmark.py index a6e1fb449fa..e070db9c719 100644 --- a/docs/tutorials/pytorch/summarization/benchmark.py +++ b/docs/tutorials/pytorch/summarization/benchmark.py @@ -3,8 +3,8 @@ import numpy as np import nltk from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, OptimizedModel -from nlp_toolkit.optimization.trainer import NLPSeq2SeqTrainer +from intel_extension_for_transformers import metrics, OptimizedModel +from intel_extension_for_transformers.optimization.trainer import NLPSeq2SeqTrainer from argparse import ArgumentParser from transformers import ( AutoConfig, diff --git a/docs/tutorials/pytorch/summarization/pegasus-samsum.ipynb b/docs/tutorials/pytorch/summarization/pegasus-samsum.ipynb index ee2bc2f35c4..ca2d9da4c6b 100644 --- a/docs/tutorials/pytorch/summarization/pegasus-samsum.ipynb +++ b/docs/tutorials/pytorch/summarization/pegasus-samsum.ipynb @@ -57,7 +57,7 @@ } }, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -110,9 +110,9 @@ "from datasets import load_dataset, load_metric\n", "\n", "from filelock import FileLock\n", - "from nlp_toolkit import OptimizedModel, QuantizationConfig\n", - "from nlp_toolkit import metrics as nlp_metrics\n", - "from nlp_toolkit.optimization.trainer import NLPSeq2SeqTrainer\n", + "from intel_extension_for_transformers import OptimizedModel, QuantizationConfig\n", + "from intel_extension_for_transformers import metrics as nlp_metrics\n", + "from intel_extension_for_transformers.optimization.trainer import NLPSeq2SeqTrainer\n", "from transformers import (\n", " AutoConfig,\n", " AutoModelForSeq2SeqLM,\n", diff --git a/docs/tutorials/pytorch/text-classification/benchmark.py b/docs/tutorials/pytorch/text-classification/benchmark.py index a81c3685550..eac4d67ec7d 100644 --- a/docs/tutorials/pytorch/text-classification/benchmark.py +++ b/docs/tutorials/pytorch/text-classification/benchmark.py @@ -3,8 +3,8 @@ import numpy as np import random from datasets import load_dataset, load_metric -from nlp_toolkit import OptimizedModel -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import OptimizedModel +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from argparse import ArgumentParser from transformers import ( AutoConfig, diff --git a/docs/tutorials/pytorch/text-classification/bert-base-uncased-MRPC.ipynb b/docs/tutorials/pytorch/text-classification/bert-base-uncased-MRPC.ipynb index 415a32ee8ab..9f37babceea 100644 --- a/docs/tutorials/pytorch/text-classification/bert-base-uncased-MRPC.ipynb +++ b/docs/tutorials/pytorch/text-classification/bert-base-uncased-MRPC.ipynb @@ -65,7 +65,7 @@ } }, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -113,8 +113,8 @@ "import transformers\n", "from dataclasses import dataclass, field\n", "from datasets import load_dataset, load_metric\n", - "from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig\n", - "from nlp_toolkit.optimization.trainer import NLPTrainer\n", + "from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig\n", + "from intel_extension_for_transformers.optimization.trainer import NLPTrainer\n", "from transformers import (\n", " AutoConfig,\n", " AutoModelForSequenceClassification,\n", diff --git a/docs/tutorials/pytorch/text-classification/distillation.ipynb b/docs/tutorials/pytorch/text-classification/distillation.ipynb index 069a9397665..37aed3889f8 100644 --- a/docs/tutorials/pytorch/text-classification/distillation.ipynb +++ b/docs/tutorials/pytorch/text-classification/distillation.ipynb @@ -32,7 +32,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -70,12 +70,12 @@ "import transformers\n", "from dataclasses import dataclass, field\n", "from datasets import load_dataset, load_metric\n", - "from nlp_toolkit import (\n", + "from intel_extension_for_transformers import (\n", " metrics,\n", " DistillationConfig,\n", " OptimizedModel,\n", ")\n", - "from nlp_toolkit.optimization.trainer import NLPTrainer\n", + "from intel_extension_for_transformers.optimization.trainer import NLPTrainer\n", "from torch.utils.data import DataLoader\n", "from tqdm.auto import tqdm\n", "from transformers import (\n", diff --git a/docs/tutorials/pytorch/token-classification/benchmark.py b/docs/tutorials/pytorch/token-classification/benchmark.py index fe07594f53b..9c2c083c672 100644 --- a/docs/tutorials/pytorch/token-classification/benchmark.py +++ b/docs/tutorials/pytorch/token-classification/benchmark.py @@ -2,8 +2,8 @@ import os import numpy as np from datasets import ClassLabel, load_dataset, load_metric -from nlp_toolkit import OptimizedModel -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import OptimizedModel +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from argparse import ArgumentParser from transformers import ( AutoConfig, diff --git a/docs/tutorials/pytorch/token-classification/distilbert_base_ner.ipynb b/docs/tutorials/pytorch/token-classification/distilbert_base_ner.ipynb index 911ac76624b..b9ec35d3000 100644 --- a/docs/tutorials/pytorch/token-classification/distilbert_base_ner.ipynb +++ b/docs/tutorials/pytorch/token-classification/distilbert_base_ner.ipynb @@ -57,7 +57,7 @@ } }, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -105,12 +105,12 @@ "import transformers\n", "from dataclasses import dataclass, field\n", "from datasets import ClassLabel, load_dataset, load_metric\n", - "from nlp_toolkit import(\n", + "from intel_extension_for_transformers import(\n", " metrics,\n", " OptimizedModel,\n", " QuantizationConfig,\n", ")\n", - "from nlp_toolkit.optimization.trainer import NLPTrainer\n", + "from intel_extension_for_transformers.optimization.trainer import NLPTrainer\n", "from transformers import (\n", " AutoConfig,\n", " AutoModelForTokenClassification,\n", diff --git a/docs/tutorials/pytorch/translation/benchmark.py b/docs/tutorials/pytorch/translation/benchmark.py index 408cc419083..30fbc5b2e59 100644 --- a/docs/tutorials/pytorch/translation/benchmark.py +++ b/docs/tutorials/pytorch/translation/benchmark.py @@ -2,8 +2,8 @@ import os import numpy as np from datasets import load_dataset, load_metric -from nlp_toolkit import OptimizedModel -from nlp_toolkit.optimization.trainer import NLPSeq2SeqTrainer +from intel_extension_for_transformers import OptimizedModel +from intel_extension_for_transformers.optimization.trainer import NLPSeq2SeqTrainer from argparse import ArgumentParser from transformers import ( AutoConfig, diff --git a/docs/tutorials/pytorch/translation/t5-small.ipynb b/docs/tutorials/pytorch/translation/t5-small.ipynb index 9fef071f66b..8af965a2c23 100644 --- a/docs/tutorials/pytorch/translation/t5-small.ipynb +++ b/docs/tutorials/pytorch/translation/t5-small.ipynb @@ -57,7 +57,7 @@ } }, "source": [ - "* Follow [installation](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit#installation) to install **nlp-toolkit**. " + "* Follow [installation](https://github.com/intel/intel-extension-for-transformers#installation) to install **intel-extension-for-transformers**. " ] }, { @@ -108,9 +108,9 @@ "import numpy as np\n", "from datasets import load_dataset, load_metric\n", "\n", - "from nlp_toolkit import OptimizedModel, QuantizationConfig\n", - "from nlp_toolkit import metrics as nlp_metrics\n", - "from nlp_toolkit.optimization.trainer import NLPSeq2SeqTrainer\n", + "from intel_extension_for_transformers import OptimizedModel, QuantizationConfig\n", + "from intel_extension_for_transformers import metrics as nlp_metrics\n", + "from intel_extension_for_transformers.optimization.trainer import NLPSeq2SeqTrainer\n", "import transformers\n", "from transformers import (\n", " AutoConfig,\n", diff --git a/examples/deployment/README.md b/examples/deployment/README.md index 85f32c9b636..4518b6f6120 100644 --- a/examples/deployment/README.md +++ b/examples/deployment/README.md @@ -1,19 +1,13 @@ # Deployment -NLP Toolkit provides multiple reference deployments: 1) [**Neural Engine**](neural_engine); 2) [**IPEX**](ipex/). +Intel Extension for Transformers provides multiple reference deployments: 1) [**Neural Engine**](neural_engine); 2) [IPEX](ipex/). ## Neural Engine -Neural Engine can provide the optimal performance of extremely compressed NLP models, the optimization is both from HW and SW.It's a reference deployment for NLPToolkit, we will enable other backends. +Neural Engine can provide the optimal performance of extremely compressed transformer based models, the optimization is both from HW and SW. It's a reference deployment for Intel Extension for Transformers, we will enable other backends. Supported Examples | Question-Answering | Text-Classification | |:---:|:---:| -|[Bert-large (SQUAD)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/squad/bert_large)|[Bert-mini (SST2)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/sst2/bert_mini)
[MiniLM (SST2)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased)
[Distilbert (SST2)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/sst2/distilbert_base_uncased)
[Distilbert (Emotion)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/emotion/distilbert_base_uncased)
[Bert-base (MRPC)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/mrpc/bert_base)
[Bert-mini (MRPC)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/mrpc/bert_mini)
[Distilbert (MRPC)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/mrpc/distilbert_base_uncased)
[Roberta-base (MRPC)](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/examples/deployment/neural_engine/mrpc/roberta_base)
| - -### Architecture -Here is the architecture of reference deployment: - - Infrastructure - +|[Bert-large (SQUAD)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/squad/bert_large)|[Bert-mini (SST2)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/sst2/bert_mini)
[MiniLM (SST2)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased)
[Distilbert (SST2)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/sst2/distilbert_base_uncased)
[Distilbert (Emotion)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/emotion/distilbert_base_uncased)
[Bert-base (MRPC)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/mrpc/bert_base)
[Bert-mini (MRPC)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/mrpc/bert_mini)
[Distilbert (MRPC)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/mrpc/distilbert_base_uncased)
[Roberta-base (MRPC)](https://github.com/intel/intel-extension-for-transformers/tree/develop/examples/deployment/neural_engine/mrpc/roberta_base)
| #### Installation Linux is supported only. @@ -37,15 +31,15 @@ pip install neural-compressor ##### 2. Build neural engine ``` -cd +cd python setup.py install/develop ``` -After succesful build, you will see `neural_engine` in the nlp_toolkit/build folder. +After succesful build, you will see `neural_engine` in the intel_extension_for_transformers/build folder. ##### 3. Generate optimal BERT model ``` -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile model = compile('/path/to/your/model') model.save('/ir/path') ``` @@ -68,14 +62,16 @@ Open/Close Log:(GLOG_minloglevel=1/GLOG_minloglevel=2) ###### 4.2. Python API -If you use pip install -e . to install the neural engine in your current folder, please make sure to export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/your/libneural_engine.so. +If you use python setup.py install to install the neural engine in your current folder, then you can use python api as following. ```python -from neural_engine_py import Model -# load the model, config_path:path of generated yaml, weight_path: path of generated bin -model = Model(config_path, weight_path) -# use model.forward to do inference -out = model.forward([input_ids, segment_ids, input_mask]) +from intel_extension_for_transformers.backends.neural_engine.compile import compile +# load the model +graph = compile('./model_and_tokenizer/int8-model.onnx') +# use model.inference to do inference +out = graph.inference([input_ids, segment_ids, input_mask]) +# dump the neural engine IR to file +graph.save('./ir') ``` The `input_ids`, `segment_ids` and `input_mask` are the input numpy array data of BERT model, and the input dimension is (batch_size x seq_len). @@ -89,12 +85,3 @@ It will dump latency of each operator to /engine_profiling/profiling_ ## IPEX Intel® Extension for PyTorch* extends PyTorch with optimizations for extra performance boost on Intel hardware. -### Validated Question-Answering model list - -|Dataset|Pretrained model|PostTrainingDynamic | PostTrainingStatic | QuantizationAwareTraining -|---|------------------------------------|---|---|--- -|squad|distilbert-base-uncased-distilled-squad| N/A| ✅| N/A -|squad|bert-large-uncased-whole-word-masking-finetuned-squad| N/A| ✅| N/A - - - diff --git a/examples/deployment/ipex/common.py b/examples/deployment/ipex/common.py index 79fcf5ac888..292a17d7875 100644 --- a/examples/deployment/ipex/common.py +++ b/examples/deployment/ipex/common.py @@ -19,8 +19,8 @@ import logging import sys import numpy as np -from nlp_toolkit.backends.neural_engine.compile import compile -from nlp_toolkit.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph from tqdm import tqdm import time @@ -41,7 +41,7 @@ def set_log_file(log, log_file): def load_graph(model_path): if os.path.exists(model_path): if os.path.isdir(model_path): - from nlp_toolkit import OptimizedModel + from intel_extension_for_transformers import OptimizedModel graph = OptimizedModel.from_pretrained(model_path) else: graph = compile(model_path) diff --git a/examples/deployment/ipex/squad/bert_large/README.md b/examples/deployment/ipex/squad/bert_large/README.md index 363bcf63fe5..513e0007fd9 100644 --- a/examples/deployment/ipex/squad/bert_large/README.md +++ b/examples/deployment/ipex/squad/bert_large/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/squad/bert_large +cd /examples/deployment/neural_engine/squad/bert_large pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` diff --git a/examples/deployment/ipex/squad/bert_large/run_qa.py b/examples/deployment/ipex/squad/bert_large/run_qa.py index 50cec38f5fd..da39c77af02 100644 --- a/examples/deployment/ipex/squad/bert_large/run_qa.py +++ b/examples/deployment/ipex/squad/bert_large/run_qa.py @@ -26,7 +26,7 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics , OptimizedModel, QuantizationConfig +from intel_extension_for_transformers import metrics , OptimizedModel, QuantizationConfig from trainer_qa import QuestionAnsweringTrainer from transformers import ( AutoConfig, diff --git a/examples/deployment/ipex/squad/bert_large/trainer_qa.py b/examples/deployment/ipex/squad/bert_large/trainer_qa.py index 467777ed906..21d1f7ec3bf 100644 --- a/examples/deployment/ipex/squad/bert_large/trainer_qa.py +++ b/examples/deployment/ipex/squad/bert_large/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/deployment/ipex/squad/distillbert_base_uncased/README.md b/examples/deployment/ipex/squad/distillbert_base_uncased/README.md index 26017e51e3c..99ca732d59a 100644 --- a/examples/deployment/ipex/squad/distillbert_base_uncased/README.md +++ b/examples/deployment/ipex/squad/distillbert_base_uncased/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/squad/bert_large +cd /examples/deployment/neural_engine/squad/bert_large pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` diff --git a/examples/deployment/ipex/squad/distillbert_base_uncased/run_qa.py b/examples/deployment/ipex/squad/distillbert_base_uncased/run_qa.py index 50cec38f5fd..da39c77af02 100644 --- a/examples/deployment/ipex/squad/distillbert_base_uncased/run_qa.py +++ b/examples/deployment/ipex/squad/distillbert_base_uncased/run_qa.py @@ -26,7 +26,7 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics , OptimizedModel, QuantizationConfig +from intel_extension_for_transformers import metrics , OptimizedModel, QuantizationConfig from trainer_qa import QuestionAnsweringTrainer from transformers import ( AutoConfig, diff --git a/examples/deployment/ipex/squad/distillbert_base_uncased/trainer_qa.py b/examples/deployment/ipex/squad/distillbert_base_uncased/trainer_qa.py index 467777ed906..21d1f7ec3bf 100644 --- a/examples/deployment/ipex/squad/distillbert_base_uncased/trainer_qa.py +++ b/examples/deployment/ipex/squad/distillbert_base_uncased/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/deployment/neural_engine/README.md b/examples/deployment/neural_engine/README.md index f944700fd55..309fc1ddcb9 100644 --- a/examples/deployment/neural_engine/README.md +++ b/examples/deployment/neural_engine/README.md @@ -106,7 +106,7 @@ P99 latency: the 99th latency percentile (namely 99% of the requests faster than Please note that the output file records all the commands during automatic tuning, and therefore users can reproduce easily. ``` -UNIFIED_BUFFER=1 OMP_NUM_THREADS=2 numactl --localalloc --physcpubind=18,19 /home/xxx/anaconda3/envs/bin/python -u mrpc/bert_mini/run_executor.py --input_model=mrpc/bert_mini/ref_model --mode=performance --batch_size=8 --seq_len=128 2>&1|tee /home/xxx/test/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/examples/deploy/ref_executor/12_2_9_disabled_default_unified_buffer.log & +UNIFIED_BUFFER=1 OMP_NUM_THREADS=2 numactl --localalloc --physcpubind=18,19 /home/xxx/anaconda3/envs/bin/python -u mrpc/bert_mini/run_executor.py --input_model=mrpc/bert_mini/ref_model --mode=performance --batch_size=8 --seq_len=128 2>&1|tee /home/xxx/test/intel/intel-extension-for-transformers/examples/deploy/ref_executor/12_2_9_disabled_default_unified_buffer.log & ``` For more sample outputs, please refer to the [example](details.csv).
diff --git a/examples/deployment/neural_engine/common.py b/examples/deployment/neural_engine/common.py index 364663c994e..38614651fea 100644 --- a/examples/deployment/neural_engine/common.py +++ b/examples/deployment/neural_engine/common.py @@ -19,8 +19,8 @@ import logging import sys import numpy as np -from nlp_toolkit.backends.neural_engine.compile import compile -from nlp_toolkit.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph from tqdm import tqdm import time diff --git a/examples/deployment/neural_engine/emotion/distilbert_base_uncased/README.md b/examples/deployment/neural_engine/emotion/distilbert_base_uncased/README.md index 0539b8f006e..7ea2114d2e5 100644 --- a/examples/deployment/neural_engine/emotion/distilbert_base_uncased/README.md +++ b/examples/deployment/neural_engine/emotion/distilbert_base_uncased/README.md @@ -20,20 +20,20 @@ cmake --version conda install cmake sudo apt install autoconf ``` -Install NLP Toolkit from source code +Install Intel Extension for Transformers from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/emotion/distilbert_base_uncased +cd /examples/deployment/neural_engine/emotion/distilbert_base_uncased pip install -r requirements.txt ``` 1.2 Set environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=bhadresh-savani/distilbert-base-uncased-emot ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=bhadresh-savani/distilbert-base-uncased-emot export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/emotion/distilbert_base_uncased/run_emotion.py b/examples/deployment/neural_engine/emotion/distilbert_base_uncased/run_emotion.py index 35bee612e09..f43eebc2888 100644 --- a/examples/deployment/neural_engine/emotion/distilbert_base_uncased/run_emotion.py +++ b/examples/deployment/neural_engine/emotion/distilbert_base_uncased/run_emotion.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/imagenet/vit/README.md b/examples/deployment/neural_engine/imagenet/vit/README.md index c68acc85348..2b489592c88 100644 --- a/examples/deployment/neural_engine/imagenet/vit/README.md +++ b/examples/deployment/neural_engine/imagenet/vit/README.md @@ -22,7 +22,7 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` diff --git a/examples/deployment/neural_engine/imagenet/vit/model_eval.py b/examples/deployment/neural_engine/imagenet/vit/model_eval.py index 4ecf94e2bb8..8019aaae098 100644 --- a/examples/deployment/neural_engine/imagenet/vit/model_eval.py +++ b/examples/deployment/neural_engine/imagenet/vit/model_eval.py @@ -35,7 +35,7 @@ from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer import time os.environ["WANDB_DISABLED"] = "true" diff --git a/examples/deployment/neural_engine/imagenet/vit/model_quant_convert.py b/examples/deployment/neural_engine/imagenet/vit/model_quant_convert.py index 9c9bae374ac..d60ce0e0df8 100644 --- a/examples/deployment/neural_engine/imagenet/vit/model_quant_convert.py +++ b/examples/deployment/neural_engine/imagenet/vit/model_quant_convert.py @@ -47,8 +47,8 @@ from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer os.environ["WANDB_DISABLED"] = "true" diff --git a/examples/deployment/neural_engine/launcher.py b/examples/deployment/neural_engine/launcher.py index f521b06bea1..459a978de1e 100644 --- a/examples/deployment/neural_engine/launcher.py +++ b/examples/deployment/neural_engine/launcher.py @@ -318,7 +318,7 @@ def add_instance_num_flag(prefix_list): def get_memory_settings(path, args): """append memory setting.""" memory_prefix_list = [] - jemalloc_prefix = "LD_PRELOAD={}/nlp_toolkit/backends/neural_engine/executor/"\ + jemalloc_prefix = "LD_PRELOAD={}/intel_extension_for_transformers/backends/neural_engine/executor/"\ "third_party/jemalloc/lib/libjemalloc.so:$LD_PRELOAD ".format(path) if args.memory_allocator == "jemalloc": memory_prefix_list.append(jemalloc_prefix) @@ -351,7 +351,7 @@ def set_numactl_env(env_cmd, core_list): def set_jemalloc_env(env_cmd, memory_allocator, path): """set jemalloc env""" if memory_allocator == "jemalloc": - env_cmd["LD_PRELOAD"] = "{}/nlp_toolkit/backends/neural_engine/executor/" \ + env_cmd["LD_PRELOAD"] = "{}/intel_extension_for_transformers/backends/neural_engine/executor/" \ "third_party/jemalloc/lib/libjemalloc.so:$".format(path) \ + env_cmd["LD_PRELOAD"] diff --git a/examples/deployment/neural_engine/mrpc/bert_base/README.md b/examples/deployment/neural_engine/mrpc/bert_base/README.md index f06d6a0e1eb..f406d8f73d1 100644 --- a/examples/deployment/neural_engine/mrpc/bert_base/README.md +++ b/examples/deployment/neural_engine/mrpc/bert_base/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/mrpc/bert_base +cd /examples/deployment/neural_engine/mrpc/bert_base pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=textattack/bert-base-uncased-MRPC --task_na ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=textattack/bert-base-uncased-MRPC --task_na export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/mrpc/bert_base/run_glue.py b/examples/deployment/neural_engine/mrpc/bert_base/run_glue.py index 1253d2f983c..d58c1930018 100644 --- a/examples/deployment/neural_engine/mrpc/bert_base/run_glue.py +++ b/examples/deployment/neural_engine/mrpc/bert_base/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/mrpc/bert_base_cased/README.md b/examples/deployment/neural_engine/mrpc/bert_base_cased/README.md index 6dec9d2e303..2ca90271594 100644 --- a/examples/deployment/neural_engine/mrpc/bert_base_cased/README.md +++ b/examples/deployment/neural_engine/mrpc/bert_base_cased/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/mrpc/bert_base_cased +cd /examples/deployment/neural_engine/mrpc/bert_base_cased pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=gchhablani/bert-base-cased-finetuned-mrpc ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=gchhablani/bert-base-cased-finetuned-mrpc export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/mrpc/bert_base_cased/run_glue.py b/examples/deployment/neural_engine/mrpc/bert_base_cased/run_glue.py index 3007358bdb3..b6f46816373 100644 --- a/examples/deployment/neural_engine/mrpc/bert_base_cased/run_glue.py +++ b/examples/deployment/neural_engine/mrpc/bert_base_cased/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/mrpc/bert_mini/README.md b/examples/deployment/neural_engine/mrpc/bert_mini/README.md index b005f7894a2..9d9d08114ce 100644 --- a/examples/deployment/neural_engine/mrpc/bert_mini/README.md +++ b/examples/deployment/neural_engine/mrpc/bert_mini/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/mrpc/bert_mini +cd /examples/deployment/neural_engine/mrpc/bert_mini pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=M-FAC/bert-mini-finetuned-mrpc --task_name=m ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=M-FAC/bert-mini-finetuned-mrpc --task_name=m export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/mrpc/bert_mini/run_glue.py b/examples/deployment/neural_engine/mrpc/bert_mini/run_glue.py index 1253d2f983c..d58c1930018 100644 --- a/examples/deployment/neural_engine/mrpc/bert_mini/run_glue.py +++ b/examples/deployment/neural_engine/mrpc/bert_mini/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/README.md b/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/README.md index 4d0a658df9b..979fd89dc50 100644 --- a/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/README.md +++ b/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/mrpc/distilbert_base_uncased +cd /examples/deployment/neural_engine/mrpc/distilbert_base_uncased pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=textattack/distilbert-base-uncased-MRPC --t ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=textattack/distilbert-base-uncased-MRPC --t export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/run_glue.py b/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/run_glue.py index 1253d2f983c..d58c1930018 100644 --- a/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/run_glue.py +++ b/examples/deployment/neural_engine/mrpc/distilbert_base_uncased/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/mrpc/roberta_base/README.md b/examples/deployment/neural_engine/mrpc/roberta_base/README.md index be5a4b95e7b..b9a93dbea49 100644 --- a/examples/deployment/neural_engine/mrpc/roberta_base/README.md +++ b/examples/deployment/neural_engine/mrpc/roberta_base/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/mrpc/roberta_base +cd /examples/deployment/neural_engine/mrpc/roberta_base pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=Intel/roberta-base-mrpc --task_name=mrpc -- ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=Intel/roberta-base-mrpc --task_name=mrpc -- export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/mrpc/roberta_base/run_glue.py b/examples/deployment/neural_engine/mrpc/roberta_base/run_glue.py index f9b988a5934..490e4a70f96 100644 --- a/examples/deployment/neural_engine/mrpc/roberta_base/run_glue.py +++ b/examples/deployment/neural_engine/mrpc/roberta_base/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/sparse/bert_mini/README.md b/examples/deployment/neural_engine/sparse/bert_mini/README.md index 84323e9f3a3..17d7d08ce97 100644 --- a/examples/deployment/neural_engine/sparse/bert_mini/README.md +++ b/examples/deployment/neural_engine/sparse/bert_mini/README.md @@ -1,6 +1,6 @@ # Sparse model Step-by-Step Here is an example of blocked sparsity and quantization of Bert Mini, sparse ratio is 90%. -NLPToolkit provided a high-performance sparse matrix multiplication library – SparseLib and demonstrated the performance improvement of sparse outweigh the accuracy loss. +Intel® Extension for Transformers provided a high-performance sparse matrix multiplication library – Transformers-accelerated Libraries and demonstrated the performance improvement of sparse outweigh the accuracy loss. This sparse solution is a software-based solution and utilizes the Intel instructions. More sparse examples will be released in the future. # Prerequisite @@ -30,20 +30,20 @@ sudo apt install autoconf Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for example ```shell -cd /examples/deployment/neural_engine/sst2/bert_mini +cd /examples/deployment/neural_engine/sst2/bert_mini pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instances. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instances. @@ -64,18 +64,18 @@ python prepare_dataset.py --dataset_name=glue --task_name=sst2 --output_dir=./da ### 2.2 Get sparse model Neural Engine can parse Sparse ONNX model and Neural Engine IR. -You can train a Bert mini sst2 sparse model with distillation through Neural Compressor [example](https://github.com/intel-innersource/frameworks.ai.lpot.intel-lpot/blob/28e9b1e66c23f4443a2be8f2926fee1e919f5a14/examples/pytorch/nlp/huggingface_models/text-classification/pruning_while_distillation/group_lasso/eager/README.md). Or use the [sparse model](https://huggingface.co/Intel/bert-mini-sst2-distilled-sparse-90-1X4-block) we publiced on huggingface which is bert mini on sst2 with sparse ratio 90% 1X4 block. -You can get INT8 ONNX sparse model from optimization module by setting precision=int8, command as follows: +You can use the [sparse model](https://huggingface.co/Intel/bert-mini-sst2-distilled-sparse-90-1X4-block) we publiced on huggingface which is bert mini on sst2 with sparse ratio 90% 1X4 block(include int8 onnx model and int8 Neural Engine IR). +You can also get INT8 ONNX sparse model from optimization module by setting precision=int8, command as follows: ```shell bash prepare_model.sh --input_model=Intel/bert-mini-sst2-distilled-sparse-90-1X4-block --task_name=sst2 --output_dir=./model_and_tokenizer --precision=int8 ``` -Then you can generate tranposed sparse model to get better performance, command as follows: +Then you can generate transposed sparse model to get better performance, command as follows: ```shell -python export_tranpose_ir.py --input_model=./model_and_tokenizer/int8-model.onnx +python export_transpose_ir.py --input_model=./model_and_tokenizer/int8-model.onnx ``` ### Benchmark -Neural Engine will automatically detect weight structured sparse ratio, as long as it beyond 70% (since normaly get performance gain when sparse ratio beyond 70%), Neural Engine will call [SparseLib](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/nlp_toolkit/backends/neural_engine/SparseLib) kernels and high performance layernorm op with transpose mode to improve inference performance. +Neural Engine will automatically detect weight structured sparse ratio, as long as it beyond 70% (since normaly get performance gain when sparse ratio beyond 70%), Neural Engine will call [Transformers-accelerated Libraries](https://github.com/intel/intel-extension-for-transformers/tree/develop/intel_extension_for_transformers/backends/neural_engine/kernels) and high performance layernorm op with transpose mode to improve inference performance. 2.1 accuracy run python @@ -100,7 +100,7 @@ Neural Engine will automatically detect weight structured sparse ratio, as long bash run_benchmark.sh --input_model=./sparse_int8_ir --mode=performance --batch_size=8 --seq_len=128 ``` - Or run C++ + or run C++ The warmup below is recommended to be 1/10 of iterations and no less than 3. ``` @@ -108,7 +108,7 @@ Neural Engine will automatically detect weight structured sparse ratio, as long export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./sparse_int8_ir/conf.yaml --weight=./sparse_int8_ir/model.bin ``` diff --git a/examples/deployment/neural_engine/sparse/bert_mini/export_tranpose_ir.py b/examples/deployment/neural_engine/sparse/bert_mini/export_transpose_ir.py similarity index 78% rename from examples/deployment/neural_engine/sparse/bert_mini/export_tranpose_ir.py rename to examples/deployment/neural_engine/sparse/bert_mini/export_transpose_ir.py index 175eda9e1c3..3dc4edbe61c 100644 --- a/examples/deployment/neural_engine/sparse/bert_mini/export_tranpose_ir.py +++ b/examples/deployment/neural_engine/sparse/bert_mini/export_transpose_ir.py @@ -1,5 +1,5 @@ -from nlp_toolkit.backends.neural_engine.compile import compile -from nlp_toolkit.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph import os import argparse diff --git a/examples/deployment/neural_engine/sparse/bert_mini/run_glue.py b/examples/deployment/neural_engine/sparse/bert_mini/run_glue.py index 00f2f844cdb..98149e86e4f 100644 --- a/examples/deployment/neural_engine/sparse/bert_mini/run_glue.py +++ b/examples/deployment/neural_engine/sparse/bert_mini/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/README.md b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/README.md index 1f3d5b00eae..dfe0b0021ba 100644 --- a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/README.md +++ b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/README.md @@ -1,6 +1,6 @@ # Sparse model Step-by-Step Here is a example from pruning a distilbert base model using group lasso during a distillation process to get sparse model, and then -inference with SparseLib which is a high-performance operator computing library. Overall, get performance improvement. +inference with Transformers-accelerated Library which is a high-performance operator computing library. Overall, get performance improvement. # Prerequisite ### 1\. Installation @@ -29,20 +29,20 @@ sudo apt install autoconf Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/sparse/distilbert_base_uncased +cd /examples/deployment/neural_engine/sparse/distilbert_base_uncased pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. @@ -62,18 +62,18 @@ python prepare_dataset.py --dataset_name=squad --output_dir=./data ### 2.2 Get sparse model -Use the [sparse model](https://huggingface.co/Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa) we publiced on huggingface which is distilbert base on Squad1.1 with sparse ration 80 on 1X4 block. -You can get INT8 ONNX sparse model from optimization module by setting precision=int8, command as follows: +Use the [sparse model](https://huggingface.co/Intel/distilbert-base-uncased-squadv1.1-sparse-80-1X4-block) we publiced on huggingface which is distilbert base on SQuADv1.1 with sparse ration 80 on 1X4 block(include int8 onnx model and int8 Neural Engine IR). +You can also get INT8 ONNX sparse model from optimization module by setting precision=int8, command as follows: ```shell bash prepare_model.sh --input_model=Intel/distilbert-base-uncased-squadv1.1-sparse-80-1x4-block-pruneofa --dataset_name=squad --task_name=squad --output_dir=./model_and_tokenizer --precision=int8 ``` -Then you can generate tranposed sparse model to get better performance, command as follows: +Then you can generate transposed sparse model to get better performance, command as follows: ```shell -python export_tranpose_ir.py --input_model=./model_and_tokenizer/int8-model.onnx +python export_transpose_ir.py --input_model=./model_and_tokenizer/int8-model.onnx ``` ### Benchmark -Neural Engine will automatically detect weight structured sparse ratio, as long as it beyond 70% (since normaly get performance gain when sparse ratio beyond 70%), Neural Engine will call [SparseLib](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/nlp_toolkit/backends/neural_engine/SparseLib) kernels and high performance layernorm op with transpose mode to improve inference performance. +Neural Engine will automatically detect weight structured sparse ratio, as long as it beyond 70% (since normaly get performance gain when sparse ratio beyond 70%), Neural Engine will call [Transformers-accelerated Libraries](https://github.com/intel/intel-extension-for-transformers/tree/develop/intel_extension_for_transformers/backends/neural_engine/kernels) and high performance layernorm op with transpose mode to improve inference performance. 2.1 accuracy run python @@ -98,7 +98,7 @@ Neural Engine will automatically detect weight structured sparse ratio, as long bash run_benchmark.sh --input_model=./sparse_int8_ir --mode=performance --batch_size=8 --seq_len=128 ``` - Or run C++ + or run C++ The warmup below is recommended to be 1/10 of iterations and no less than 3. ``` @@ -106,7 +106,7 @@ Neural Engine will automatically detect weight structured sparse ratio, as long export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./sparse_int8_ir/conf.yaml --weight=./sparse_int8_ir/model.bin ``` diff --git a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/export_tranpose_ir.py b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/export_transpose_ir.py similarity index 78% rename from examples/deployment/neural_engine/sparse/distilbert_base_uncased/export_tranpose_ir.py rename to examples/deployment/neural_engine/sparse/distilbert_base_uncased/export_transpose_ir.py index 175eda9e1c3..3dc4edbe61c 100644 --- a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/export_tranpose_ir.py +++ b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/export_transpose_ir.py @@ -1,5 +1,5 @@ -from nlp_toolkit.backends.neural_engine.compile import compile -from nlp_toolkit.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph import os import argparse diff --git a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/run_qa.py b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/run_qa.py index e9718a8d7bd..25d157a5852 100644 --- a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/run_qa.py +++ b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/run_qa.py @@ -26,7 +26,7 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics , OptimizedModel, QuantizationConfig +from intel_extension_for_transformers import metrics , OptimizedModel, QuantizationConfig from trainer_qa import QuestionAnsweringTrainer from transformers import ( AutoConfig, diff --git a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/trainer_qa.py b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/trainer_qa.py index 467777ed906..21d1f7ec3bf 100644 --- a/examples/deployment/neural_engine/sparse/distilbert_base_uncased/trainer_qa.py +++ b/examples/deployment/neural_engine/sparse/distilbert_base_uncased/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/deployment/neural_engine/squad/bert_large/README.md b/examples/deployment/neural_engine/squad/bert_large/README.md index 60ac714ea5a..5f16e67766e 100644 --- a/examples/deployment/neural_engine/squad/bert_large/README.md +++ b/examples/deployment/neural_engine/squad/bert_large/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/squad/bert_large +cd /examples/deployment/neural_engine/squad/bert_large pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -91,7 +91,7 @@ bash prepare_model.sh --input_model=bert-large-uncased-whole-word-masking-finetu ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -102,7 +102,7 @@ bash prepare_model.sh --input_model=bert-large-uncased-whole-word-masking-finetu export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=384 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/squad/bert_large/run_qa.py b/examples/deployment/neural_engine/squad/bert_large/run_qa.py index e45657a4172..c276e038124 100644 --- a/examples/deployment/neural_engine/squad/bert_large/run_qa.py +++ b/examples/deployment/neural_engine/squad/bert_large/run_qa.py @@ -26,7 +26,7 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics , OptimizedModel, QuantizationConfig +from intel_extension_for_transformers import metrics , OptimizedModel, QuantizationConfig from trainer_qa import QuestionAnsweringTrainer from transformers import ( AutoConfig, diff --git a/examples/deployment/neural_engine/squad/bert_large/trainer_qa.py b/examples/deployment/neural_engine/squad/bert_large/trainer_qa.py index 467777ed906..21d1f7ec3bf 100644 --- a/examples/deployment/neural_engine/squad/bert_large/trainer_qa.py +++ b/examples/deployment/neural_engine/squad/bert_large/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/deployment/neural_engine/sst2/bert_mini/README.md b/examples/deployment/neural_engine/sst2/bert_mini/README.md index 8f0cad38b74..6fee5360216 100644 --- a/examples/deployment/neural_engine/sst2/bert_mini/README.md +++ b/examples/deployment/neural_engine/sst2/bert_mini/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/sst2/bert_mini +cd /examples/deployment/neural_engine/sst2/bert_mini pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=moshew/bert-mini-sst2-distilled --task_name ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=moshew/bert-mini-sst2-distilled --task_name export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/sst2/bert_mini/run_glue.py b/examples/deployment/neural_engine/sst2/bert_mini/run_glue.py index f9b988a5934..490e4a70f96 100644 --- a/examples/deployment/neural_engine/sst2/bert_mini/run_glue.py +++ b/examples/deployment/neural_engine/sst2/bert_mini/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/sst2/distilbert_base_uncased/README.md b/examples/deployment/neural_engine/sst2/distilbert_base_uncased/README.md index af10caa94ca..54017401fbb 100644 --- a/examples/deployment/neural_engine/sst2/distilbert_base_uncased/README.md +++ b/examples/deployment/neural_engine/sst2/distilbert_base_uncased/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/sst2/distilbert_base_uncased +cd /examples/deployment/neural_engine/sst2/distilbert_base_uncased pip install -r requirements.txt ``` 1.2 Environment Variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=distilbert-base-uncased-finetuned-sst-2-engl ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=distilbert-base-uncased-finetuned-sst-2-engl export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/sst2/distilbert_base_uncased/run_glue.py b/examples/deployment/neural_engine/sst2/distilbert_base_uncased/run_glue.py index 1253d2f983c..d58c1930018 100644 --- a/examples/deployment/neural_engine/sst2/distilbert_base_uncased/run_glue.py +++ b/examples/deployment/neural_engine/sst2/distilbert_base_uncased/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/README.md b/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/README.md index 73d29ad04d3..a7c564095d3 100644 --- a/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/README.md +++ b/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/README.md @@ -22,18 +22,18 @@ sudo apt install autoconf ``` Install NLPTookit from source code ```shell -cd +cd git submodule update --init --recursive python setup.py install ``` Install package for examples ```shell -cd /examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased +cd /examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased pip install -r requirements.txt ``` 1.2 Environment variables Preload libjemalloc.so can improve the performance when multi instance. ``` -export LD_PRELOAD=/nlp_toolkit/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so +export LD_PRELOAD=/intel_extension_for_transformers/backends/neural_engine/executor/third_party/jemalloc/lib/libjemalloc.so ``` Using weight sharing can save memory and improve the performance when multi instance. ``` @@ -82,7 +82,7 @@ bash prepare_model.sh --input_model=philschmid/MiniLM-L6-H384-uncased-sst2 --t ``` or compile framwork model to IR using python API ``` - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile graph = compile('./model_and_tokenizer/int8-model.onnx') graph.save('./ir') ``` @@ -93,7 +93,7 @@ bash prepare_model.sh --input_model=philschmid/MiniLM-L6-H384-uncased-sst2 --t export OMP_NUM_THREADS= export DNNL_MAX_CPU_ISA=AVX512_CORE_AMX export UNIFIED_BUFFER=1 - numactl -C 0- /nlp_toolkit/backends/neural_engine/bin/neural_engine + numactl -C 0- /intel_extension_for_transformers/backends/neural_engine/bin/neural_engine --batch_size= --iterations= --w= --seq_len=128 --config=./ir/conf.yaml --weight=./ir/model.bin ``` diff --git a/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/run_glue.py b/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/run_glue.py index 00f2f844cdb..98149e86e4f 100644 --- a/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/run_glue.py +++ b/examples/deployment/neural_engine/sst2/minilm_l6_h384_uncased/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/optimization/pytorch/huggingface/language-modeling/auto_distillation/run_mlm_autodistillation.py b/examples/optimization/pytorch/huggingface/language-modeling/auto_distillation/run_mlm_autodistillation.py index 9c24ebb8ce8..1ad52652386 100644 --- a/examples/optimization/pytorch/huggingface/language-modeling/auto_distillation/run_mlm_autodistillation.py +++ b/examples/optimization/pytorch/huggingface/language-modeling/auto_distillation/run_mlm_autodistillation.py @@ -46,12 +46,12 @@ from collections import defaultdict from dataclasses import dataclass, field -from nlp_toolkit import ( +from intel_extension_for_transformers import ( AutoDistillationConfig, FlashDistillationConfig, metrics ) -from nlp_toolkit.optimization.trainer import NLPTrainer as Trainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer as Trainer from torch.utils.data import DataLoader from tqdm.auto import tqdm from transformers import ( @@ -264,7 +264,7 @@ class OptimizationArguments: def main(): if int(os.environ.get("LOCAL_RANK", -1)) != -1 and '--no_cuda' in sys.argv: - from nlp_toolkit.optimization.utils.utility import distributed_init + from intel_extension_for_transformers.optimization.utils.utility import distributed_init distributed_init() # See all possible arguments in src/transformers/training_args.py diff --git a/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_clm.py b/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_clm.py index 56ae9ef8f90..357b85096fb 100644 --- a/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_clm.py +++ b/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_clm.py @@ -28,8 +28,8 @@ from dataclasses import dataclass, field from datasets import load_dataset, load_metric from itertools import chain -from nlp_toolkit import metrics, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( CONFIG_MAPPING, MODEL_FOR_CAUSAL_LM_MAPPING, diff --git a/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_mlm.py b/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_mlm.py index 675a5a2e9a7..cbcac93d23b 100644 --- a/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_mlm.py +++ b/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_mlm.py @@ -28,8 +28,8 @@ from dataclasses import dataclass, field from datasets import load_dataset, load_metric from itertools import chain -from nlp_toolkit import metrics, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( CONFIG_MAPPING, MODEL_FOR_MASKED_LM_MAPPING, diff --git a/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_plm.py b/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_plm.py index d61a4abe92c..a9db4636724 100644 --- a/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_plm.py +++ b/examples/optimization/pytorch/huggingface/language-modeling/quantization/inc/run_plm.py @@ -27,8 +27,8 @@ from datasets import load_dataset from itertools import chain -from nlp_toolkit import metrics, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoTokenizer, diff --git a/examples/optimization/pytorch/huggingface/multiple-choice/quantization/inc/run_swag.py b/examples/optimization/pytorch/huggingface/multiple-choice/quantization/inc/run_swag.py index bbf1b9abcac..9218cc22554 100644 --- a/examples/optimization/pytorch/huggingface/multiple-choice/quantization/inc/run_swag.py +++ b/examples/optimization/pytorch/huggingface/multiple-choice/quantization/inc/run_swag.py @@ -28,8 +28,8 @@ from dataclasses import dataclass, field from datasets import load_dataset from itertools import chain -from nlp_toolkit import metrics, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForMultipleChoice, diff --git a/examples/optimization/pytorch/huggingface/question-answering/distillation/run_qa.py b/examples/optimization/pytorch/huggingface/question-answering/distillation/run_qa.py index 5705a3fbacc..b7791a0560f 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/distillation/run_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/distillation/run_qa.py @@ -32,7 +32,7 @@ import time import torch import transformers -from nlp_toolkit import metrics, OptimizedModel, DistillationConfig +from intel_extension_for_transformers import metrics, OptimizedModel, DistillationConfig from torch.utils.data import DataLoader from tqdm import tqdm from trainer_qa import QuestionAnsweringTrainer @@ -249,7 +249,7 @@ class OptimizationArguments: def main(): if int(os.environ.get("LOCAL_RANK", -1)) != -1 and '--no_cuda' in sys.argv: - from nlp_toolkit.optimization.utils.utility import distributed_init + from intel_extension_for_transformers.optimization.utils.utility import distributed_init distributed_init() # See all possible arguments in src/transformers/training_args.py diff --git a/examples/optimization/pytorch/huggingface/question-answering/distillation/trainer_qa.py b/examples/optimization/pytorch/huggingface/question-answering/distillation/trainer_qa.py index 4cfa6ad6d8e..d4a049348df 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/distillation/trainer_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/distillation/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/optimization/pytorch/huggingface/question-answering/dynamic/README.md b/examples/optimization/pytorch/huggingface/question-answering/dynamic/README.md index e2fc7ac44f9..4ad60d53f03 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/dynamic/README.md +++ b/examples/optimization/pytorch/huggingface/question-answering/dynamic/README.md @@ -62,7 +62,6 @@ python run_qa.py \ ``` - ### Step 4: Quantization ``` @@ -92,3 +91,203 @@ python run_qa.py \ --per_device_eval_batch_size 32 \ --length_config "(315, 251, 242, 159, 142, 33)" ``` + + +## Performance Data +Performance results test on ​​07/10/2022 with Intel Xeon Platinum 8280 Scalable processor, batchsize = 32 +Performance varies by use, configuration and other factors. See platform configuration for configuration details. For more complete information about performance and benchmark results, visit www.intel.com/benchmarks + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Model Name
Datatype
Optimization Method



Modelsize (MB)

InferenceResult

Accuracy(F1)

Latency(ms)

GFLOPS**

Speedup

(comparedwith BERT Base)

BERT Base
fp32
None

415.47

88.58

56.56

35.3

1x

TinyBERT
fp32
Distillation

253.20

88.39

32.40

17.7

1.75x

QuaTinyBERT
int8
Distillation + quantization

132.06

87.67

15.58

17.7

3.63x

MiniLMv2
fp32
Distillation

115.04

88.70

18.23

4.76

3.10x

QuaMiniLMv2
int8
Distillation + quantization

84.85

88.54

9.14

4.76

6.18x

LA-MiniLM
fp32
Drop and restore base MiniLMv2

115.04

89.28

16.99

4.76

3.33x

LA-MiniLM(269, 253, 252, 202, 104, 34)*
fp32
Evolution search (best config)

115.04

87.76

11.44

2.49

4.94x

QuaLA-MiniLM
int8
Quantization base LA-MiniLM

84.85

88.85

7.84

4.76

7.21x

QuaLA-MiniLM(315,251,242,159,142,33)*
int8
Evolution search (best config)

84.86

87.68

6.41

2.55

8.82x
+NOTES: * length config apply to LA model + + +NOTES: ** the multiplication and addition operation amount when model inference (GFLOPS is obtained from torchprofile tool) + + +### platform configuration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ManufacturerIntel Corporation
Product NameS2600WFD
BIOS Version1SE5C620.86B.02.01.0008.031920191559
OSCentOS Linux release 8.4.2105
Kernel4.18.0-305.3.1.el8.x86_64
Microcode0x5003006
IRQ BalanceEabled
CPU ModelIntel(R) Xeon Platinum 8280 CPU @ 2.70GHz
Base Frequency2.7GHz
Maximum Frequency4.0GHz
All-core Maximum Frequency3.3GHz
CPU(s)112
Thread(s) per Core2
Core(s) per Socket28
Socket(s)2
NUMA Node(s)2
TurboEnabled
FrequencyGovernerPerformance
\ No newline at end of file diff --git a/examples/optimization/pytorch/huggingface/question-answering/dynamic/run_qa.py b/examples/optimization/pytorch/huggingface/question-answering/dynamic/run_qa.py index 121da0b0240..fa0832364e7 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/dynamic/run_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/dynamic/run_qa.py @@ -29,11 +29,11 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics , OptimizedModel, QuantizationConfig, DynamicLengthConfig +from intel_extension_for_transformers import metrics , OptimizedModel, QuantizationConfig, DynamicLengthConfig from trainer_qa import QuestionAnsweringTrainer -from nlp_toolkit.optimization.utils.models.modeling_roberta_dynamic import RobertaForQuestionAnswering -from nlp_toolkit.optimization.utils.models.modeling_bert_dynamic import BertForQuestionAnswering +from intel_extension_for_transformers.optimization.utils.models.modeling_roberta_dynamic import RobertaForQuestionAnswering +from intel_extension_for_transformers.optimization.utils.models.modeling_bert_dynamic import BertForQuestionAnswering # to use modeling with LAT: transformers.models.roberta.modeling_roberta.RobertaForQuestionAnswering = RobertaForQuestionAnswering transformers.models.bert.modeling_bert.BertForQuestionAnswering = BertForQuestionAnswering diff --git a/examples/optimization/pytorch/huggingface/question-answering/dynamic/trainer_qa.py b/examples/optimization/pytorch/huggingface/question-answering/dynamic/trainer_qa.py index 3e38ea7a654..ef770971d8a 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/dynamic/trainer_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/dynamic/trainer_qa.py @@ -20,7 +20,7 @@ from typing import Any, Callable, Dict, List, Optional, Union from transformers import is_torch_tpu_available, __version__ -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import ( PREFIX_CHECKPOINT_DIR, BestRun, diff --git a/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/run_qa.py b/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/run_qa.py index 22d6c897bc0..afafaf7677f 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/run_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/run_qa.py @@ -31,7 +31,7 @@ import numpy as np import torch import transformers -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, PrunerConfig, PruningConfig, @@ -293,7 +293,7 @@ class OptimizationArguments: def main(): if int(os.environ.get("LOCAL_RANK", -1)) != -1 and '--no_cuda' in sys.argv: - from nlp_toolkit.optimization.utils.utility import distributed_init + from intel_extension_for_transformers.optimization.utils.utility import distributed_init distributed_init() # See all possible arguments in src/transformers/training_args.py diff --git a/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/trainer_qa.py b/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/trainer_qa.py index 4cfa6ad6d8e..d4a049348df 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/trainer_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/orchestrate_optimizations/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/run_qa.py b/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/run_qa.py index b951d76191b..bf1fda5087a 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/run_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/run_qa.py @@ -26,7 +26,7 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, OptimizedModel, PrunerConfig, PruningConfig, PruningMode +from intel_extension_for_transformers import metrics, OptimizedModel, PrunerConfig, PruningConfig, PruningMode from trainer_qa import QuestionAnsweringTrainer from transformers import ( AutoConfig, @@ -241,7 +241,7 @@ class OptimizationArguments: def main(): if int(os.environ.get("LOCAL_RANK", -1)) != -1 and '--no_cuda' in sys.argv: - from nlp_toolkit.optimization.utils.utility import distributed_init + from intel_extension_for_transformers.optimization.utils.utility import distributed_init distributed_init() # See all possible arguments in src/transformers/training_args.py diff --git a/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/trainer_qa.py b/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/trainer_qa.py index 4cfa6ad6d8e..d4a049348df 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/trainer_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/pruning/basic_magnitude/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/README.md b/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/README.md index 3e5b4f7f445..f8268403523 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/README.md +++ b/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/README.md @@ -11,9 +11,9 @@ This document is used to list steps of reproducing PyTorch BERT pruning result. Recommend python 3.7 or higher version. -#### Install [nlp-toolkit]() +#### Install [intel-extension-for-transformers]() ``` -pip install nlp-toolkit +pip install intel-extension-for-transformers ``` #### Install PyTorch diff --git a/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/run_squad_sparse.py b/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/run_squad_sparse.py index 3ebfa8bbfa2..a6b6671b7a7 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/run_squad_sparse.py +++ b/examples/optimization/pytorch/huggingface/question-answering/pruning/group_lasso/run_squad_sparse.py @@ -1198,7 +1198,7 @@ def main(): if args.do_prune: # Pruning! - from nlp_toolkit import NoTrainerOptimizer, PrunerConfig, PruningConfig + from intel_extension_for_transformers import NoTrainerOptimizer, PrunerConfig, PruningConfig pruner_config = PrunerConfig( prune_type="GroupLasso", target_sparsity_ratio=0.7, diff --git a/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/README.md b/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/README.md index 2168b0cd503..b225171b628 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/README.md +++ b/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/README.md @@ -29,6 +29,7 @@ python run_qa.py \ --overwrite_output_dir --framework ipex ``` +**Note**: support IPEX version >= 1.12 ### Validated model list |Dataset|Pretrained model|PostTrainingDynamic | PostTrainingStatic | QuantizationAwareTraining diff --git a/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/run_qa.py b/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/run_qa.py index 9bc5ddb645c..ad4f179ccb9 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/run_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/run_qa.py @@ -26,7 +26,7 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics , OptimizedModel, QuantizationConfig +from intel_extension_for_transformers import metrics , OptimizedModel, QuantizationConfig from trainer_qa import QuestionAnsweringTrainer from transformers import ( AutoConfig, diff --git a/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/trainer_qa.py b/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/trainer_qa.py index 467777ed906..21d1f7ec3bf 100644 --- a/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/trainer_qa.py +++ b/examples/optimization/pytorch/huggingface/question-answering/quantization/inc/trainer_qa.py @@ -17,7 +17,7 @@ """ from transformers import is_torch_tpu_available -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers.trainer_utils import PredictionOutput diff --git a/examples/optimization/pytorch/huggingface/summarization/quantization/README.md b/examples/optimization/pytorch/huggingface/summarization/quantization/README.md index 0cb72f3a711..d884cae6feb 100644 --- a/examples/optimization/pytorch/huggingface/summarization/quantization/README.md +++ b/examples/optimization/pytorch/huggingface/summarization/quantization/README.md @@ -36,7 +36,7 @@ For the old `finetune_trainer.py` and related utils, see [`examples/legacy/seq2s For custom datasets in `jsonlines` format please see: https://huggingface.co/docs/datasets/loading_datasets.html#json-files and you also will find examples of these below. -## tune a quantized model with NLP_toolkit +## tune a quantized model with intel_extension_for_transformers Here is an example on a summarization task: ```bash diff --git a/examples/optimization/pytorch/huggingface/summarization/quantization/run_summarization.py b/examples/optimization/pytorch/huggingface/summarization/quantization/run_summarization.py index 486e8d75e52..ee443f660de 100755 --- a/examples/optimization/pytorch/huggingface/summarization/quantization/run_summarization.py +++ b/examples/optimization/pytorch/huggingface/summarization/quantization/run_summarization.py @@ -30,9 +30,9 @@ from datasets import load_dataset, load_metric from filelock import FileLock -from nlp_toolkit import OptimizedModel, QuantizationConfig -from nlp_toolkit import metrics as nlp_metrics -from nlp_toolkit.optimization.trainer import NLPSeq2SeqTrainer +from intel_extension_for_transformers import OptimizedModel, QuantizationConfig +from intel_extension_for_transformers import metrics as nlp_metrics +from intel_extension_for_transformers.optimization.trainer import NLPSeq2SeqTrainer from transformers import ( AutoConfig, AutoModelForSeq2SeqLM, diff --git a/examples/optimization/pytorch/huggingface/text-classification/distillation/run_glue.py b/examples/optimization/pytorch/huggingface/text-classification/distillation/run_glue.py index d8a10313c36..157fc559c02 100644 --- a/examples/optimization/pytorch/huggingface/text-classification/distillation/run_glue.py +++ b/examples/optimization/pytorch/huggingface/text-classification/distillation/run_glue.py @@ -28,12 +28,12 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, DistillationConfig, OptimizedModel, ) -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from torch.utils.data import DataLoader from tqdm.auto import tqdm from transformers import ( @@ -232,7 +232,7 @@ class OptimizationArguments: def main(): if int(os.environ.get("LOCAL_RANK", -1)) != -1 and '--no_cuda' in sys.argv: - from nlp_toolkit.optimization.utils.utility import distributed_init + from intel_extension_for_transformers.optimization.utils.utility import distributed_init distributed_init() # See all possible arguments in src/transformers/training_args.py diff --git a/examples/optimization/pytorch/huggingface/text-classification/orchestrate_optimizations/run_glue.py b/examples/optimization/pytorch/huggingface/text-classification/orchestrate_optimizations/run_glue.py index 1b62be26e23..da7ca531328 100644 --- a/examples/optimization/pytorch/huggingface/text-classification/orchestrate_optimizations/run_glue.py +++ b/examples/optimization/pytorch/huggingface/text-classification/orchestrate_optimizations/run_glue.py @@ -28,7 +28,7 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, PrunerConfig, PruningConfig, @@ -37,7 +37,7 @@ OptimizedModel, objectives ) -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from torch.utils.data import DataLoader from tqdm.auto import tqdm from transformers import ( @@ -275,7 +275,7 @@ class OptimizationArguments: def main(): if int(os.environ.get("LOCAL_RANK", -1)) != -1 and '--no_cuda' in sys.argv: - from nlp_toolkit.optimization.utils.utility import distributed_init + from intel_extension_for_transformers.optimization.utils.utility import distributed_init distributed_init() # See all possible arguments in src/transformers/training_args.py diff --git a/examples/optimization/pytorch/huggingface/text-classification/pruning/run_glue.py b/examples/optimization/pytorch/huggingface/text-classification/pruning/run_glue.py index 4d6bd28b6bb..3f8037c1e79 100644 --- a/examples/optimization/pytorch/huggingface/text-classification/pruning/run_glue.py +++ b/examples/optimization/pytorch/huggingface/text-classification/pruning/run_glue.py @@ -26,13 +26,13 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, OptimizedModel, PrunerConfig, PruningConfig, ) -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, @@ -233,7 +233,7 @@ class OptimizationArguments: def main(): if int(os.environ.get("LOCAL_RANK", -1)) != -1 and '--no_cuda' in sys.argv: - from nlp_toolkit.optimization.utils.utility import distributed_init + from intel_extension_for_transformers.optimization.utils.utility import distributed_init distributed_init() # See all possible arguments in src/transformers/training_args.py diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue.py b/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue.py index 19461003a3d..c9def13a202 100644 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue.py +++ b/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue.py @@ -26,8 +26,8 @@ import transformers from dataclasses import dataclass, field from datasets import load_dataset, load_metric -from nlp_toolkit import metrics, objectives, OptimizedModel, QuantizationConfig -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers import metrics, objectives, OptimizedModel, QuantizationConfig +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForSequenceClassification, diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue_no_trainer.py b/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue_no_trainer.py index b305cf8bc77..f50d2f51228 100644 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue_no_trainer.py +++ b/examples/optimization/pytorch/huggingface/text-classification/quantization/inc/run_glue_no_trainer.py @@ -30,7 +30,7 @@ import transformers from accelerate import Accelerator from huggingface_hub import Repository -from nlp_toolkit import (metrics, NoTrainerOptimizer, objectives, OptimizedModel, +from intel_extension_for_transformers import (metrics, NoTrainerOptimizer, objectives, OptimizedModel, QuantizationConfig) from transformers import ( AdamW, @@ -169,7 +169,7 @@ def parse_args(): type=str, help="The name of the repository to keep in sync with the local `output_dir`.") parser.add_argument("--hub_token", type=str, help="The token to use to push to the Model Hub.") - parser.add_argument("--tune", action="store_true", help="tune a best model with nlp toolkit.") + parser.add_argument("--tune", action="store_true", help="tune a best model with Intel Extension for Transformers.") parser.add_argument("--quantization_approach", type=str, default="PostTrainingStatic", diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_conll.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_conll.json deleted file mode 100644 index 0f73fe36833..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_conll.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 128], - "type": "long" - }, - { - "sample_size": [1, 128], - "type": "long" - }, - { - "sample_size": [1, 128], - "type": "long" - } - ], - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 32, - "type": "percentile", - "params": - { - "min_percentile": 0.01, - "max_percentile": 99.99 - } - }, - - "batchnorm_adaptation": { - "num_bn_adaptation_samples": 200 - } - }, - "activations": - { - "mode": "symmetric" - }, - "weights": - { - "mode": "symmetric", - "signed": true, - "per_channel": false - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_mrpc.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_mrpc.json deleted file mode 100644 index f4ecbeeedbf..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_mrpc.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 128], - "type": "long" - }, - { - "sample_size": [1, 128], - "type": "long" - }, - { - "sample_size": [1, 128], - "type": "long" - } - ], - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 64, - "type": "percentile", - "params": - { - "min_percentile": 0.01, - "max_percentile": 99.99 - } - }, - "batchnorm_adaptation": { - "num_bn_adaptation_samples": 200 - } - }, - "activations": - { - "mode": "symmetric" - }, - "weights": - { - "mode": "symmetric", - "per_channel": false - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad.json deleted file mode 100644 index 12e0440f712..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 384], - "type": "long" - }, - { - "sample_size": [1, 384], - "type": "long" - }, - { - "sample_size": [1, 384], - "type": "long" - } - ], - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 32, - "type": "percentile", - "params": - { - "min_percentile": 0.01, - "max_percentile": 99.99 - } - }, - - "batchnorm_adaptation": { - "num_bn_adaptation_samples": 200 - } - }, - "activations": - { - "mode": "symmetric" - }, - "weights": - { - "mode": "symmetric", - "signed": true, - "per_channel": false - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad_magnitude_sparsity_cubic.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad_magnitude_sparsity_cubic.json deleted file mode 100644 index b4452e8d4de..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_squad_magnitude_sparsity_cubic.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 384], - "type": "long" - }, - { - "sample_size": [1, 384], - "type": "long" - }, - { - "sample_size": [1, 384], - "type": "long" - } - ], - "compression": { - "algorithm": "magnitude_sparsity", - "params": { - "schedule": "polynomial", - "power": 3, - "sparsity_init": 0.0, - "sparsity_target": 0.8, - "sparsity_target_epoch": 40, - "sparsity_freeze_epoch": 60, - "update_per_optimizer_step": true, - "steps_per_epoch": 1109, - "weight_importance": "abs" - }, - "ignored_scopes": ["{re}.*NNCFEmbedding"] - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_xnli.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_xnli.json deleted file mode 100644 index a21a522fc40..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_bert_config_xnli.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 128], - "type": "long" - }, - { - "sample_size": [1, 128], - "type": "long" - }, - { - "sample_size": [1, 128], - "type": "long" - } - ], - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 96 - } - }, - "ignored_scopes": ["{re}BertSelfAttention\\[self\\]/__add___0", - "{re}BertIntermediate\\[intermediate\\]/__mul___0", - "{re}BertIntermediate\\[intermediate\\]/NNCFLinear\\[dense\\]/linear_0" - ], - "activations": - { - "mode": "asymmetric" - }, - "weights": - { - "mode": "symmetric" - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_distilbert_config_sst2.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_distilbert_config_sst2.json deleted file mode 100644 index 6b648ca5e97..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_distilbert_config_sst2.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 128], - "type": "long" - }, - { - "sample_size": [1, 128], - "type": "long" - } - ], - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 32 - } - }, - "ignored_scopes": [ - "{re}TransformerBlock\\[[0-9]*\\]/FFN\\[ffn\\]/__mul___0", - "{re}TransformerBlock\\[[0-9]*\\]/FFN\\[ffn\\]/NNCFLinear\\[lin1\\]/linear_0" - ], - "activations": - { - "mode": "symmetric" - }, - "weights": - { - "mode": "symmetric", - "signed": true - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_gpt2_config_wikitext_hw_config.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_gpt2_config_wikitext_hw_config.json deleted file mode 100644 index 4b2376133c0..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_gpt2_config_wikitext_hw_config.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 1024], - "type": "long" - } - ], - "hw_config_type": "cpu", - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 16, - "type": "percentile", - "params": - { - "min_percentile": 0.01, - "max_percentile": 99.99 - } - } - }, - "ignored_scopes": [ - //gelu_new with fusing into previous GEMM - "{re}.*MLP\\[mlp\\]/__rmul___0", - "{re}.*MLP\\[mlp\\]/__add___0", - "{re}.*MLP\\[mlp\\]/__rmul___1", - "{re}.*MLP\\[mlp\\]/tanh_0", - "{re}.*MLP\\[mlp\\]/__radd___0", - "{re}.*MLP\\[mlp\\]/__mul___0", - - // Intermediate embedding sum results - "GPT2LMHeadModel/GPT2Model[transformer]/__add___0", - "GPT2LMHeadModel/GPT2Model[transformer]/__add___1", - - // Scaling in attention - "{re}.*Attention\\[attn\\]/__truediv___0", - - // Pre-LayerNorm additions - "{re}.*Block\\[[0-9]*\\]/__add___0", - "{re}.*Block\\[[0-9]*\\]/__add___1", - - // Final LayerNorm inputs - "GPT2LMHeadModel/GPT2Model[transformer]/LayerNorm[ln_f]", - - // LM head - "GPT2LMHeadModel/NNCFLinear[lm_head]" - ], - "activations": - { - "mode": "symmetric" - }, - "weights": - { - "mode": "symmetric", - "signed": true - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_mobilebert_config_squad_int8.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_mobilebert_config_squad_int8.json deleted file mode 100644 index 89504d2c719..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_mobilebert_config_squad_int8.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "input_info": [ - { - "sample_size": [1, 384], - "type": "long" - }, - { - "sample_size": [1, 384], - "type": "long" - }, - { - "sample_size": [1, 384], - "type": "long" - } - ], - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 64, - "type": "percentile", - "params": - { - "min_percentile": 0.01, - "max_percentile": 99.99 - } - } - }, - "ignored_scopes": ["{re}MobileBertSelfAttention\\[self\\]/__add___0", - "{re}MobileBertIntermediate\\[intermediate\\]/NNCFLinear\\[dense\\]/linear_0"], - "activations": - { - "mode": "symmetric", - "ignored_scopes": [ - "{re}MobileBertForQuestionAnswering/MobileBertModel\\[mobilebert\\]/MobileBertEmbeddings\\[embeddings\\]/__add___0", - "{re}MobileBertForQuestionAnswering/MobileBertModel\\[mobilebert\\]/MobileBertEmbeddings\\[embeddings\\]/__add___1", - "{re}MobileBertOutput\\[output\\]/__add___0", - "{re}NoNorm\\[LayerNorm\\]/__mul___0"] - }, - "weights": - { - "mode": "symmetric", - "signed": true - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_roberta_config_mnli.json b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_roberta_config_mnli.json deleted file mode 100644 index edbe0f84d82..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/configs/nncf_roberta_config_mnli.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "input_info": [ - { - "keyword": "input_ids", - "sample_size": [1, 128], - "type": "long", - "filler": "ones" - }, - { - "keyword": "attention_mask", - "sample_size": [1, 128], - "type": "long", - "filler": "ones" - } - ], - "compression": { - "algorithm": "quantization", - "initializer": { - "range": { - "num_init_samples": 24 - } - }, - "ignored_scopes": ["{re}BertSelfAttention\\[self\\]/__add___0", - "RobertaForSequenceClassification/RobertaClassificationHead[classifier]/Linear[out_proj]", - "RobertaForSequenceClassification/RobertaClassificationHead[classifier]/Linear[dense]" - ], - "activations": - { - "mode": "asymmetric" - }, - "weights": - { - "mode": "asymmetric" - } - } -} diff --git a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/run_glue.py b/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/run_glue.py deleted file mode 100755 index 1364dfa0c8a..00000000000 --- a/examples/optimization/pytorch/huggingface/text-classification/quantization/nncf/run_glue.py +++ /dev/null @@ -1,733 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# Copyright 2020 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Finetuning the library models for sequence classification on GLUE.""" -# You can also adapt this script on your own text classification task. Pointers for this are left as comments. - -import logging -import os -import random -import sys -from dataclasses import dataclass, field -from typing import Optional - -import datasets -import numpy as np -from datasets import load_dataset, load_metric - -import transformers -from nlp_toolkit import OptimizedModel -from nlp_toolkit.backends.openvino.nncf_utils import get_nncf_train_dataloader_for_init -from nlp_toolkit.optimization.trainer import NLPTrainer -from nncf import NNCFConfig -from nncf.config.structures import BNAdaptationInitArgs -from nncf.config.structures import QuantizationRangeInitArgs -from nncf.torch.initialization import PTInitializingDataLoader -from transformers import ( - AutoConfig, - AutoModelForSequenceClassification, - AutoTokenizer, - DataCollatorWithPadding, - EvalPrediction, - HfArgumentParser, - PretrainedConfig, - Trainer, - TrainingArguments, - default_data_collator, - set_seed, -) -from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version -from transformers.utils.versions import require_version - - -# Will error if the minimal version of Transformers is not installed. Remove at your own risks. -check_min_version("4.9.0") - -require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt") - -task_to_keys = { - "cola": ("sentence", None), - "mnli": ("premise", "hypothesis"), - "mrpc": ("sentence1", "sentence2"), - "qnli": ("question", "sentence"), - "qqp": ("question1", "question2"), - "rte": ("sentence1", "sentence2"), - "sst2": ("sentence", None), - "stsb": ("sentence1", "sentence2"), - "wnli": ("sentence1", "sentence2"), -} - -logger = logging.getLogger(__name__) - - -@dataclass -class DataTrainingArguments: - """ - Arguments pertaining to what data we are going to input our model for training and eval. - - Using `HfArgumentParser` we can turn this class - into argparse arguments to be able to specify them on - the command line. - """ - - task_name: Optional[str] = field( - default=None, - metadata={"help": "The name of the task to train on: " + ", ".join(task_to_keys.keys())}, - ) - dataset_name: Optional[str] = field( - default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} - ) - dataset_config_name: Optional[str] = field( - default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} - ) - max_seq_length: int = field( - default=128, - metadata={ - "help": "The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded." - }, - ) - overwrite_cache: bool = field( - default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."} - ) - pad_to_max_length: bool = field( - default=True, - metadata={ - "help": "Whether to pad all samples to `max_seq_length`. " - "If False, will pad the samples dynamically when batching to the maximum length in the batch." - }, - ) - max_train_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of training examples to this " - "value if set." - }, - ) - max_eval_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " - "value if set." - }, - ) - max_predict_samples: Optional[int] = field( - default=None, - metadata={ - "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " - "value if set." - }, - ) - train_file: Optional[str] = field( - default=None, metadata={"help": "A csv or a json file containing the training data."} - ) - validation_file: Optional[str] = field( - default=None, metadata={"help": "A csv or a json file containing the validation data."} - ) - test_file: Optional[str] = field(default=None, metadata={"help": "A csv or a json file containing the test data."}) - - def __post_init__(self): - if self.task_name is not None: - self.task_name = self.task_name.lower() - if self.task_name not in task_to_keys.keys(): - raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys())) - elif self.dataset_name is not None: - pass - elif self.train_file is None or self.validation_file is None: - raise ValueError("Need either a GLUE task, a training/validation file or a dataset name.") - else: - train_extension = self.train_file.split(".")[-1] - assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file." - validation_extension = self.validation_file.split(".")[-1] - assert ( - validation_extension == train_extension - ), "`validation_file` should have the same extension (csv or json) as `train_file`." - - -@dataclass -class ModelArguments: - """ - Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. - """ - - model_name_or_path: str = field( - metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} - ) - config_name: Optional[str] = field( - default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"} - ) - tokenizer_name: Optional[str] = field( - default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} - ) - cache_dir: Optional[str] = field( - default=None, - metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"}, - ) - use_fast_tokenizer: bool = field( - default=True, - metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, - ) - model_revision: str = field( - default="main", - metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, - ) - use_auth_token: bool = field( - default=False, - metadata={ - "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " - "with private models)." - }, - ) - - -@dataclass -class OptimizationArguments: - """ - Arguments pertaining to what type of optimization we are going to apply on the model. - """ - - tune: bool = field( - default=False, - metadata={"help": "Whether or not to apply quantization."}, - ) - quantization_approach: Optional[str] = field( - default="dynamic_quantization", - metadata={"help": "Quantization approach. Supported approach are static_quantization, dynamic_quantization and qat."}, - ) - metric_name: Optional[str] = field( - default=None, - metadata={"help": "Metric used for the tuning strategy."}, - ) - tolerance_mode: Optional[str] = field( - default="absolute", - metadata={"help": "Metric tolerance model, expected to be relative or absolute."}, - ) - perf_tol: Optional[float] = field( - default=0.02, - metadata={"help": "Performance tolerance when optimizing the model."}, - ) - benchmark: bool = field( - default=False, - metadata={"help": "run benchmark."}) - int8: bool = field( - default=False, - metadata={"help":"run benchmark."}) - accuracy_only: bool = field( - default=False, - metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."}) - - -def main(): - # See all possible arguments in src/transformers/training_args.py - # or by passing the --help flag to this script. - # We now keep distinct sets of args, for a cleaner separation of concerns. - - parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, OptimizationArguments)) - if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): - # If we pass only one argument to the script and it's the path to a json file, - # let's parse it to get our arguments. - model_args, data_args, training_args, optim_args = parser.parse_json_file( - json_file=os.path.abspath(sys.argv[1]) - ) - else: - model_args, data_args, training_args, optim_args = parser.parse_args_into_dataclasses() - - # Setup logging - logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - handlers=[logging.StreamHandler(sys.stdout)], - ) - - log_level = training_args.get_process_log_level() - logger.setLevel(log_level) - datasets.utils.logging.set_verbosity(log_level) - transformers.utils.logging.set_verbosity(log_level) - transformers.utils.logging.enable_default_handler() - transformers.utils.logging.enable_explicit_format() - - # Log on each process the small summary: - logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" - + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" - ) - logger.info(f"Training/evaluation parameters {training_args}") - - # Detecting last checkpoint. - last_checkpoint = None - if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: - last_checkpoint = get_last_checkpoint(training_args.output_dir) - if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: - raise ValueError( - f"Output directory ({training_args.output_dir}) already exists and is not empty. " - "Use --overwrite_output_dir to overcome." - ) - elif last_checkpoint is not None and training_args.resume_from_checkpoint is None: - logger.info( - f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " - "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." - ) - - # Set seed before initializing model. - set_seed(training_args.seed) - - # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) - # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). - # - # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the - # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named - # label if at least two columns are provided. - # - # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this - # single column. You can easily tweak this behavior (see below) - # - # In distributed training, the load_dataset function guarantee that only one local process can concurrently - # download the dataset. - if data_args.task_name is not None: - # Downloading and loading a dataset from the hub. - raw_datasets = load_dataset("glue", data_args.task_name, cache_dir=model_args.cache_dir) - elif data_args.dataset_name is not None: - # Downloading and loading a dataset from the hub. - raw_datasets = load_dataset( - data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir - ) - else: - # Loading a dataset from your local files. - # CSV/JSON training and evaluation files are needed. - data_files = {"train": data_args.train_file, "validation": data_args.validation_file} - - # Get the test dataset: you can provide your own CSV/JSON test file (see below) - # when you use `do_predict` without specifying a GLUE benchmark task. - if training_args.do_predict: - if data_args.test_file is not None: - train_extension = data_args.train_file.split(".")[-1] - test_extension = data_args.test_file.split(".")[-1] - assert ( - test_extension == train_extension - ), "`test_file` should have the same extension (csv or json) as `train_file`." - data_files["test"] = data_args.test_file - else: - raise ValueError("Need either a GLUE task or a test file for `do_predict`.") - - for key in data_files.keys(): - logger.info(f"load a local file for {key}: {data_files[key]}") - - if data_args.train_file.endswith(".csv"): - # Loading a dataset from local csv files - raw_datasets = load_dataset("csv", data_files=data_files, cache_dir=model_args.cache_dir) - else: - # Loading a dataset from local json files - raw_datasets = load_dataset("json", data_files=data_files, cache_dir=model_args.cache_dir) - # See more about loading any type of standard or custom dataset at - # https://huggingface.co/docs/datasets/loading_datasets.html. - - # Labels - if data_args.task_name is not None: - is_regression = data_args.task_name == "stsb" - if not is_regression: - label_list = raw_datasets["train"].features["label"].names - num_labels = len(label_list) - else: - num_labels = 1 - else: - # Trying to have good defaults here, don't hesitate to tweak to your needs. - is_regression = raw_datasets["train"].features["label"].dtype in ["float32", "float64"] - if is_regression: - num_labels = 1 - else: - # A useful fast method: - # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique - label_list = raw_datasets["train"].unique("label") - label_list.sort() # Let's sort it for determinism - num_labels = len(label_list) - - # Load pretrained model and tokenizer - # - # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently - # download model & vocab. - config = AutoConfig.from_pretrained( - model_args.config_name if model_args.config_name else model_args.model_name_or_path, - num_labels=num_labels, - finetuning_task=data_args.task_name, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - tokenizer = AutoTokenizer.from_pretrained( - model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, - cache_dir=model_args.cache_dir, - use_fast=model_args.use_fast_tokenizer, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - model = AutoModelForSequenceClassification.from_pretrained( - model_args.model_name_or_path, - from_tf=bool(".ckpt" in model_args.model_name_or_path), - config=config, - cache_dir=model_args.cache_dir, - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) - - # Preprocessing the raw_datasets - if data_args.task_name is not None: - sentence1_key, sentence2_key = task_to_keys[data_args.task_name] - else: - # Again, we try to have some nice defaults but don't hesitate to tweak to your use case. - non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"] - if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names: - sentence1_key, sentence2_key = "sentence1", "sentence2" - else: - if len(non_label_column_names) >= 2: - sentence1_key, sentence2_key = non_label_column_names[:2] - else: - sentence1_key, sentence2_key = non_label_column_names[0], None - - # Padding strategy - if data_args.pad_to_max_length: - padding = "max_length" - else: - # We will pad later, dynamically at batch creation, to the max sequence length in each batch - padding = False - - # Some models have set the order of the labels to use, so let's make sure we do use it. - label_to_id = None - if ( - model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id - and data_args.task_name is not None - and not is_regression - ): - # Some have all caps in their config, some don't. - label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): - label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} - else: - logger.warning( - "Your model seems to have been trained with labels, but they don't match the dataset: ", - f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." - "\nIgnoring the model labels as a result.", - ) - elif data_args.task_name is None and not is_regression: - label_to_id = {v: i for i, v in enumerate(label_list)} - - if label_to_id is not None: - model.config.label2id = label_to_id - model.config.id2label = {id: label for label, id in config.label2id.items()} - - if data_args.max_seq_length > tokenizer.model_max_length: - logger.warning( - f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the" - f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}." - ) - max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length) - - def preprocess_function(examples): - # Tokenize the texts - args = ( - (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key]) - ) - result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True) - - # Map labels to IDs (not necessary for GLUE tasks) - if label_to_id is not None and "label" in examples: - result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]] - return result - - with training_args.main_process_first(desc="dataset map pre-processing"): - raw_datasets = raw_datasets.map( - preprocess_function, - batched=True, - load_from_cache_file=not data_args.overwrite_cache, - desc="Running tokenizer on dataset", - ) - if training_args.do_train: - if "train" not in raw_datasets: - raise ValueError("--do_train requires a train dataset") - train_dataset = raw_datasets["train"] - if data_args.max_train_samples is not None: - train_dataset = train_dataset.select(range(data_args.max_train_samples)) - - nncf_config = None - if training_args.nncf_config is not None: - nncf_config = NNCFConfig.from_json(training_args.nncf_config) - - if nncf_config.get("log_dir") is None: - nncf_config["log_dir"] = training_args.output_dir - - if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]: - os.makedirs(nncf_config["log_dir"]) - - if training_args.do_train: - train_dataloader = get_nncf_train_dataloader_for_init( - training_args, - train_dataset, - data_collator=default_data_collator - ) - - class SST2InitializingDataLoader(PTInitializingDataLoader): - def get_inputs(self, dataloader_output): - return (), { - "labels": dataloader_output["labels"], - "attention_mask": dataloader_output["attention_mask"], - "input_ids": dataloader_output["input_ids"] - } - - class MRPCInitializingDataLoader(PTInitializingDataLoader): - def get_inputs(self, dataloader_output): - return (), { - "labels": dataloader_output["labels"], - "attention_mask": dataloader_output["attention_mask"], - "input_ids": dataloader_output["input_ids"], - "token_type_ids": dataloader_output["token_type_ids"] - } - - class MNLIInitializingDataLoader(PTInitializingDataLoader): - def get_inputs(self, dataloader_output): - return (), { - "labels": dataloader_output["labels"], - "attention_mask": dataloader_output["attention_mask"], - "input_ids": dataloader_output["input_ids"] - } - - if data_args.task_name == "sst2": - initializing_data_loader_cls = SST2InitializingDataLoader - elif data_args.task_name == "mrpc": - initializing_data_loader_cls = MRPCInitializingDataLoader - elif data_args.task_name == "mnli": - initializing_data_loader_cls = MNLIInitializingDataLoader - initializing_data_loader = initializing_data_loader_cls(train_dataloader) - nncf_config.register_extra_structs([QuantizationRangeInitArgs(initializing_data_loader), - BNAdaptationInitArgs(initializing_data_loader)]) - - - # model = AutoModelForSequenceClassification.from_pretrained( - # model_args.model_name_or_path, - # from_tf=bool(".ckpt" in model_args.model_name_or_path), - # config=config, - # cache_dir=model_args.cache_dir, - # revision=model_args.model_revision, - # use_auth_token=True if model_args.use_auth_token else None, - # ) - - # if nncf_config is None: - # model = retval - # compression_ctrl = None - # else: - # compression_ctrl, model = retval - - # if training_args.to_onnx: - # # Expecting the following forward signature: - # # (input_ids, attention_mask, token_type_ids, ...) - # if nncf_config is not None: - # compression_ctrl.export_model(training_args.to_onnx) - # else: - # model.to('cpu') - # import torch - # from torch import onnx - # dummy_tensor = torch.ones([1, 128], dtype=torch.long) - # onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), - # training_args.to_onnx, opset_version=10) - - if training_args.do_eval: - if "validation" not in raw_datasets and "validation_matched" not in raw_datasets: - raise ValueError("--do_eval requires a validation dataset") - eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"] - if data_args.max_eval_samples is not None: - eval_dataset = eval_dataset.select(range(data_args.max_eval_samples)) - - if training_args.do_predict or data_args.task_name is not None or data_args.test_file is not None: - if "test" not in raw_datasets and "test_matched" not in raw_datasets: - raise ValueError("--do_predict requires a test dataset") - predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"] - if data_args.max_predict_samples is not None: - predict_dataset = predict_dataset.select(range(data_args.max_predict_samples)) - - # Log a few random samples from the training set: - if training_args.do_train: - for index in random.sample(range(len(train_dataset)), 3): - logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") - - # Get the metric function - if data_args.task_name is not None: - metric = load_metric("glue", data_args.task_name) - else: - metric = load_metric("accuracy") - - # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a - # predictions and label_ids field) and has to return a dictionary string to float. - def compute_metrics(p: EvalPrediction): - preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions - preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1) - if data_args.task_name is not None: - result = metric.compute(predictions=preds, references=p.label_ids) - if len(result) > 1: - result["combined_score"] = np.mean(list(result.values())).item() - return result - elif is_regression: - return {"mse": ((preds - p.label_ids) ** 2).mean().item()} - else: - return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()} - - # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding. - if data_args.pad_to_max_length: - data_collator = default_data_collator - elif training_args.fp16: - data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8) - else: - data_collator = None - - # Initialize our Trainer - trainer = NLPTrainer( - model=model, - args=training_args, - train_dataset=train_dataset if training_args.do_train else None, - eval_dataset=eval_dataset if training_args.do_eval else None, - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=data_collator, - ) - - if optim_args.tune: - if not training_args.do_eval: - raise ValueError("do_eval must be set to True for quantization.") - - model.config.save_pretrained(training_args.output_dir) - trainer.save_model(training_args.output_dir) - trainer.provider = "nncf" - trainer.provider_arguments = { - "nncf_config": nncf_config, - "to_onnx": training_args.to_onnx, - "distributed": True if not (training_args.local_rank == -1 or training_args.no_cuda) else False, - } - model = trainer.quantize() - - if optim_args.benchmark or optim_args.accuracy_only: - - if optim_args.tune or optim_args.int8: - # Load the model obtained after Intel Neural Compressor (INC) quantization - model = OptimizedModel.from_pretrained( - training_args.output_dir, - ) - model.eval() - trainer.model = model - metrics = trainer.evaluate() - logger.info("metrics keys: {}".format(metrics.keys())) - bert_task_acc_keys = ['eval_f1', 'eval_accuracy', 'eval_matthews_correlation', - 'eval_pearson', 'eval_mcc', 'eval_spearmanr'] - ret = False - for key in bert_task_acc_keys: - if key in metrics.keys(): - ret = True - throughput = metrics.get("eval_samples_per_second") - print('Batch size = ', training_args.per_device_eval_batch_size) - print("Finally Eval {} Accuracy: {}".format(key, metrics[key])) - print("Latency: {:.5f} ms".format(1000 / throughput)) - print("Throughput: {:.5f} samples/sec".format(throughput)) - assert ret, "No metric returned, Please check inference metric!" - - return - if nncf_config is not None: - if not (training_args.local_rank == -1 or training_args.no_cuda): - compression_ctrl.distributed() - - # Training - if training_args.do_train: - checkpoint = None - if training_args.resume_from_checkpoint is not None: - checkpoint = training_args.resume_from_checkpoint - elif last_checkpoint is not None: - checkpoint = last_checkpoint - train_result = trainer.train(resume_from_checkpoint=checkpoint) - metrics = train_result.metrics - max_train_samples = ( - data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) - ) - metrics["train_samples"] = min(max_train_samples, len(train_dataset)) - - trainer.save_model() # Saves the tokenizer too for easy upload - - trainer.log_metrics("train", metrics) - trainer.save_metrics("train", metrics) - trainer.save_state() - - # Evaluation - if training_args.do_eval: - logger.info("*** Evaluate ***") - - # Loop to handle MNLI double evaluation (matched, mis-matched) - tasks = [data_args.task_name] - eval_datasets = [eval_dataset] - if data_args.task_name == "mnli": - tasks.append("mnli-mm") - eval_datasets.append(raw_datasets["validation_mismatched"]) - - for eval_dataset, task in zip(eval_datasets, tasks): - metrics = trainer.evaluate(eval_dataset=eval_dataset) - - max_eval_samples = ( - data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) - ) - metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) - - trainer.log_metrics("eval", metrics) - trainer.save_metrics("eval", metrics) - - if training_args.do_predict: - logger.info("*** Predict ***") - - # Loop to handle MNLI double evaluation (matched, mis-matched) - tasks = [data_args.task_name] - predict_datasets = [predict_dataset] - if data_args.task_name == "mnli": - tasks.append("mnli-mm") - predict_datasets.append(raw_datasets["test_mismatched"]) - - for predict_dataset, task in zip(predict_datasets, tasks): - # Removing the `label` columns because it contains -1 and Trainer won't like that. - predict_dataset = predict_dataset.remove_columns("label") - predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions - predictions = np.squeeze(predictions) if is_regression else np.argmax(predictions, axis=1) - - output_predict_file = os.path.join(training_args.output_dir, f"predict_results_{task}.txt") - if trainer.is_world_process_zero(): - with open(output_predict_file, "w") as writer: - logger.info(f"***** Predict results {task} *****") - writer.write("index\tprediction\n") - for index, item in enumerate(predictions): - if is_regression: - writer.write(f"{index}\t{item:3.3f}\n") - else: - item = label_list[item] - writer.write(f"{index}\t{item}\n") - - if training_args.push_to_hub: - kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"} - if data_args.task_name is not None: - kwargs["language"] = "en" - kwargs["dataset_tags"] = "glue" - kwargs["dataset_args"] = data_args.task_name - kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}" - - trainer.push_to_hub(**kwargs) - - -def _mp_fn(index): - # For xla_spawn (TPUs) - main() - - -if __name__ == "__main__": - main() diff --git a/examples/optimization/pytorch/huggingface/token-classification/quantization/inc/run_ner.py b/examples/optimization/pytorch/huggingface/token-classification/quantization/inc/run_ner.py index 7db5cbe9f9a..c9b0443f687 100644 --- a/examples/optimization/pytorch/huggingface/token-classification/quantization/inc/run_ner.py +++ b/examples/optimization/pytorch/huggingface/token-classification/quantization/inc/run_ner.py @@ -27,12 +27,12 @@ import transformers from dataclasses import dataclass, field from datasets import ClassLabel, load_dataset, load_metric -from nlp_toolkit import( +from intel_extension_for_transformers import( metrics, OptimizedModel, QuantizationConfig, ) -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoConfig, AutoModelForTokenClassification, diff --git a/examples/optimization/pytorch/huggingface/translation/quantization/README.md b/examples/optimization/pytorch/huggingface/translation/quantization/README.md index 13b8a6ee360..302c14425c9 100644 --- a/examples/optimization/pytorch/huggingface/translation/quantization/README.md +++ b/examples/optimization/pytorch/huggingface/translation/quantization/README.md @@ -37,9 +37,9 @@ For custom datasets in `jsonlines` format please see: https://huggingface.co/doc and you also will find examples of these below. -## tune a quantized model with NLP_toolkit +## tune a quantized model with intel_extension_for_transformers -Here is an example of a translation MarianMT model to tune a quantized model with NLP toolkit: +Here is an example of a translation MarianMT model to tune a quantized model with Intel Extension for Transformers: ```bash python examples/pytorch/translation/run_translation.py \ diff --git a/examples/optimization/pytorch/huggingface/translation/quantization/run_translation.py b/examples/optimization/pytorch/huggingface/translation/quantization/run_translation.py index 3def9c99b95..7b462bd6532 100755 --- a/examples/optimization/pytorch/huggingface/translation/quantization/run_translation.py +++ b/examples/optimization/pytorch/huggingface/translation/quantization/run_translation.py @@ -28,9 +28,9 @@ import numpy as np from datasets import load_dataset, load_metric -from nlp_toolkit import OptimizedModel, QuantizationConfig -from nlp_toolkit import metrics as nlp_metrics -from nlp_toolkit.optimization.trainer import NLPSeq2SeqTrainer +from intel_extension_for_transformers import OptimizedModel, QuantizationConfig +from intel_extension_for_transformers import metrics as nlp_metrics +from intel_extension_for_transformers.optimization.trainer import NLPSeq2SeqTrainer import transformers from transformers import ( AutoConfig, diff --git a/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/README.md b/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/README.md index 365c6dbeda4..0fa36f85f53 100644 --- a/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/README.md +++ b/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/README.md @@ -2,7 +2,7 @@ ## GLUE task -The script `run_glue.py` provides the pruning approach (Magnitude) based on [NLP toolkit]. +The script `run_glue.py` provides the pruning approach (Magnitude) based on [Intel Extension for Transformers]. Here is how to run the script: diff --git a/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/run_glue.py b/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/run_glue.py index e15c47a398d..03d0869fdc2 100644 --- a/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/run_glue.py +++ b/examples/optimization/tensorflow/huggingface/text-classification/auto_distillation/run_glue.py @@ -531,7 +531,7 @@ def compute_metrics(preds, label_ids): # endregion if distributed_args.multinode: logger.info('*** using multinode mode... ***') - from nlp_toolkit.optimization.utils.utility_tf import distributed_init, get_filepath + from intel_extension_for_transformers.optimization.utils.utility_tf import distributed_init, get_filepath assert distributed_args.worker is not None, "worker address list should not be empty" distributed_args.worker = distributed_args.worker.strip().split(',') distributed_init(distributed_args.worker, @@ -627,7 +627,7 @@ def compute_metrics(preds, label_ids): # endregion if optim_args.autodistill: logger.info('*** start distillation... ***') - from nlp_toolkit import metrics, TFOptimization, AutoDistillationConfig, TFDistillationConfig + from intel_extension_for_transformers import metrics, TFOptimization, AutoDistillationConfig, TFDistillationConfig optimization = TFOptimization( model=model, args=training_args, diff --git a/examples/optimization/tensorflow/huggingface/text-classification/distillation/README.md b/examples/optimization/tensorflow/huggingface/text-classification/distillation/README.md index 5d4b016ca7f..49583e7192e 100644 --- a/examples/optimization/tensorflow/huggingface/text-classification/distillation/README.md +++ b/examples/optimization/tensorflow/huggingface/text-classification/distillation/README.md @@ -2,7 +2,7 @@ ## GLUE task -The script `run_glue.py` provides the pruning approach (Magnitude) based on [NLP toolkit]. +The script `run_glue.py` provides the pruning approach (Magnitude) based on [Intel Extension for Transformers]. Here is how to run the script: diff --git a/examples/optimization/tensorflow/huggingface/text-classification/distillation/run_glue.py b/examples/optimization/tensorflow/huggingface/text-classification/distillation/run_glue.py index a8f9c3827a6..e9c6b14ae4a 100644 --- a/examples/optimization/tensorflow/huggingface/text-classification/distillation/run_glue.py +++ b/examples/optimization/tensorflow/huggingface/text-classification/distillation/run_glue.py @@ -532,7 +532,7 @@ def compute_metrics(preds, label_ids): # endregion if distributed_args.multinode: logger.info('*** using multinode mode... ***') - from nlp_toolkit.optimization.utils.utility_tf import distributed_init, get_filepath + from intel_extension_for_transformers.optimization.utils.utility_tf import distributed_init, get_filepath assert distributed_args.worker is not None, "worker address list should not be empty" distributed_args.worker = distributed_args.worker.strip().split(',') distributed_init(distributed_args.worker, @@ -628,8 +628,8 @@ def compute_metrics(preds, label_ids): # endregion if optim_args.distill: logger.info('*** start distillation... ***') - from nlp_toolkit import metrics, DistillationConfig, TFOptimization - from nlp_toolkit.optimization.distillation import Criterion + from intel_extension_for_transformers import metrics, DistillationConfig, TFOptimization + from intel_extension_for_transformers.optimization.distillation import Criterion optimization = TFOptimization( model=model, args=training_args, diff --git a/examples/optimization/tensorflow/huggingface/text-classification/pruning/README.md b/examples/optimization/tensorflow/huggingface/text-classification/pruning/README.md index 3084cc58b91..e89469da4e2 100644 --- a/examples/optimization/tensorflow/huggingface/text-classification/pruning/README.md +++ b/examples/optimization/tensorflow/huggingface/text-classification/pruning/README.md @@ -2,7 +2,7 @@ ## GLUE task -The script `run_glue.py` provides the pruning approach (Magnitude) based on [NLP toolkit]. +The script `run_glue.py` provides the pruning approach (Magnitude) based on [Intel Extension for Transformers]. Here is how to run the script: diff --git a/examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py b/examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py index 57ff0a5cc5a..a2244ea3a85 100644 --- a/examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py +++ b/examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py @@ -260,11 +260,11 @@ def main(): worker_list = distributed_args.worker.split(",") - from nlp_toolkit.optimization.utils.utility_tf import distributed_init + from intel_extension_for_transformers.optimization.utils.utility_tf import distributed_init distributed_init(worker_list, "worker", distributed_args.task_index) strategy = tf.distribute.MultiWorkerMirroredStrategy() - from nlp_toolkit.optimization.utils.utility_tf import get_filepath + from intel_extension_for_transformers.optimization.utils.utility_tf import get_filepath training_args.output_dir = get_filepath(training_args.output_dir, strategy.cluster_resolver.task_type, strategy.cluster_resolver.task_id) else: strategy = training_args.strategy @@ -513,7 +513,7 @@ def compute_metrics(preds, label_ids): # region Pruning if optim_args.prune: - from nlp_toolkit import metrics, PrunerConfig, PruningConfig, TFOptimization + from intel_extension_for_transformers import metrics, PrunerConfig, PruningConfig, TFOptimization optimization = TFOptimization( model=model, args=training_args, diff --git a/examples/optimization/tensorflow/huggingface/text-classification/quantization/inc/run_glue.py b/examples/optimization/tensorflow/huggingface/text-classification/quantization/inc/run_glue.py index bb24179740d..66393678330 100644 --- a/examples/optimization/tensorflow/huggingface/text-classification/quantization/inc/run_glue.py +++ b/examples/optimization/tensorflow/huggingface/text-classification/quantization/inc/run_glue.py @@ -258,11 +258,11 @@ def main(): worker_list = distributed_args.worker.split(",") - from nlp_toolkit.optimization.utils.utility_tf import distributed_init + from intel_extension_for_transformers.optimization.utils.utility_tf import distributed_init distributed_init(worker_list, "worker", distributed_args.task_index) strategy = tf.distribute.MultiWorkerMirroredStrategy() - from nlp_toolkit.optimization.utils.utility_tf import get_filepath + from intel_extension_for_transformers.optimization.utils.utility_tf import get_filepath training_args.output_dir = get_filepath(training_args.output_dir, strategy.cluster_resolver.task_type, strategy.cluster_resolver.task_id) else: strategy = training_args.strategy @@ -510,7 +510,7 @@ def compute_metrics(preds, label_ids): # endregion if optim_args.tune: - from nlp_toolkit import metrics, objectives, QuantizationConfig, TFOptimization + from intel_extension_for_transformers import metrics, objectives, QuantizationConfig, TFOptimization optimization = TFOptimization( model=model, args=training_args, diff --git a/nlp_toolkit/__init__.py b/intel_extension_for_transformers/__init__.py similarity index 100% rename from nlp_toolkit/__init__.py rename to intel_extension_for_transformers/__init__.py diff --git a/nlp_toolkit/backends/__init__.py b/intel_extension_for_transformers/backends/__init__.py similarity index 100% rename from nlp_toolkit/backends/__init__.py rename to intel_extension_for_transformers/backends/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/.clang-format b/intel_extension_for_transformers/backends/neural_engine/.clang-format similarity index 100% rename from nlp_toolkit/backends/neural_engine/.clang-format rename to intel_extension_for_transformers/backends/neural_engine/.clang-format diff --git a/nlp_toolkit/backends/neural_engine/.editorconfig b/intel_extension_for_transformers/backends/neural_engine/.editorconfig similarity index 100% rename from nlp_toolkit/backends/neural_engine/.editorconfig rename to intel_extension_for_transformers/backends/neural_engine/.editorconfig diff --git a/nlp_toolkit/backends/neural_engine/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/CMakeLists.txt similarity index 97% rename from nlp_toolkit/backends/neural_engine/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/CMakeLists.txt index 0d9086b1720..c7df9f35d22 100644 --- a/nlp_toolkit/backends/neural_engine/CMakeLists.txt +++ b/intel_extension_for_transformers/backends/neural_engine/CMakeLists.txt @@ -112,7 +112,7 @@ if(WIN32) endif() if(NE_WITH_SPARSELIB) - add_subdirectory(SparseLib) + add_subdirectory(kernels) add_compile_definitions(WITH_SPARSELIB) endif() @@ -127,10 +127,10 @@ if(NE_WITH_TESTS) if(NOT NE_WITH_SPARSELIB_ONLY) add_subdirectory(test/gtest) else() - add_subdirectory(test/gtest/SparseLib) + add_subdirectory(test/gtest/kernels) endif() endif() if(NE_WITH_SPARSELIB_BENCHMARK) - add_subdirectory(test/SparseLib/benchmark) + add_subdirectory(test/kernels/benchmark) endif() diff --git a/nlp_toolkit/backends/neural_engine/CMakePresets.json b/intel_extension_for_transformers/backends/neural_engine/CMakePresets.json similarity index 95% rename from nlp_toolkit/backends/neural_engine/CMakePresets.json rename to intel_extension_for_transformers/backends/neural_engine/CMakePresets.json index 84488f350df..93e792ffe36 100644 --- a/nlp_toolkit/backends/neural_engine/CMakePresets.json +++ b/intel_extension_for_transformers/backends/neural_engine/CMakePresets.json @@ -18,7 +18,7 @@ "lhs": "${hostSystemName}", "rhs": "Linux" }, - "vendor": { "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": { "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}/nlp_toolkit/backends/neural_engine" } } + "vendor": { "microsoft.com/VisualStudioRemoteSettings/CMake/1.0": { "sourceDir": "$env{HOME}/.vs/$ms{projectDirName}/intel_extension_for_transformers/backends/neural_engine" } } }, { "name": "linux-release", diff --git a/nlp_toolkit/backends/neural_engine/__init__.py b/intel_extension_for_transformers/backends/neural_engine/__init__.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/__init__.py rename to intel_extension_for_transformers/backends/neural_engine/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/bin/neural_engine b/intel_extension_for_transformers/backends/neural_engine/bin/neural_engine similarity index 89% rename from nlp_toolkit/backends/neural_engine/bin/neural_engine rename to intel_extension_for_transformers/backends/neural_engine/bin/neural_engine index 59019af37a0..c18246e4af6 100644 --- a/nlp_toolkit/backends/neural_engine/bin/neural_engine +++ b/intel_extension_for_transformers/backends/neural_engine/bin/neural_engine @@ -16,10 +16,10 @@ import subprocess import sys -import nlp_toolkit +import intel_extension_for_transformers if __name__ == '__main__': - path = nlp_toolkit.__path__[0] + path = intel_extension_for_transformers.__path__[0] command = [path + '/../neural_engine_bin'] command.extend(sys.argv) subprocess.call(command) diff --git a/nlp_toolkit/backends/neural_engine/cmake/PresetOs.cmake b/intel_extension_for_transformers/backends/neural_engine/cmake/PresetOs.cmake similarity index 100% rename from nlp_toolkit/backends/neural_engine/cmake/PresetOs.cmake rename to intel_extension_for_transformers/backends/neural_engine/cmake/PresetOs.cmake diff --git a/nlp_toolkit/backends/neural_engine/compile/__init__.py b/intel_extension_for_transformers/backends/neural_engine/compile/__init__.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/__init__.py rename to intel_extension_for_transformers/backends/neural_engine/compile/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/compile/compile.py b/intel_extension_for_transformers/backends/neural_engine/compile/compile.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/compile.py rename to intel_extension_for_transformers/backends/neural_engine/compile/compile.py diff --git a/nlp_toolkit/backends/neural_engine/compile/extractors/__init__.py b/intel_extension_for_transformers/backends/neural_engine/compile/extractors/__init__.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/extractors/__init__.py rename to intel_extension_for_transformers/backends/neural_engine/compile/extractors/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/compile/extractors/extractor.py b/intel_extension_for_transformers/backends/neural_engine/compile/extractors/extractor.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/extractors/extractor.py rename to intel_extension_for_transformers/backends/neural_engine/compile/extractors/extractor.py diff --git a/nlp_toolkit/backends/neural_engine/compile/extractors/onnx_extractor.py b/intel_extension_for_transformers/backends/neural_engine/compile/extractors/onnx_extractor.py similarity index 95% rename from nlp_toolkit/backends/neural_engine/compile/extractors/onnx_extractor.py rename to intel_extension_for_transformers/backends/neural_engine/compile/extractors/onnx_extractor.py index 098a7043230..6aae06221ff 100644 --- a/nlp_toolkit/backends/neural_engine/compile/extractors/onnx_extractor.py +++ b/intel_extension_for_transformers/backends/neural_engine/compile/extractors/onnx_extractor.py @@ -21,7 +21,7 @@ from ..ops.op import OPERATORS from ..onnx_utils import graph_node_names_details from ..graph_utils import names_from_input -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor class ONNXExtractor(object): @@ -62,7 +62,7 @@ def __call__(self, model): if op_type == 'Constant': continue else: - import nlp_toolkit.backends.neural_engine.compile.graph_utils\ + import intel_extension_for_transformers.backends.neural_engine.compile.graph_utils\ as util input_tensor_names = inner_node.input for input_tensor_name in input_tensor_names: diff --git a/nlp_toolkit/backends/neural_engine/compile/extractors/tf_extractor.py b/intel_extension_for_transformers/backends/neural_engine/compile/extractors/tf_extractor.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/extractors/tf_extractor.py rename to intel_extension_for_transformers/backends/neural_engine/compile/extractors/tf_extractor.py diff --git a/nlp_toolkit/backends/neural_engine/compile/graph/__init__.py b/intel_extension_for_transformers/backends/neural_engine/compile/graph/__init__.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/graph/__init__.py rename to intel_extension_for_transformers/backends/neural_engine/compile/graph/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/compile/graph/graph.py b/intel_extension_for_transformers/backends/neural_engine/compile/graph/graph.py similarity index 99% rename from nlp_toolkit/backends/neural_engine/compile/graph/graph.py rename to intel_extension_for_transformers/backends/neural_engine/compile/graph/graph.py index 5f91d6f3f2c..85230d4ae82 100644 --- a/nlp_toolkit/backends/neural_engine/compile/graph/graph.py +++ b/intel_extension_for_transformers/backends/neural_engine/compile/graph/graph.py @@ -387,7 +387,7 @@ def inference(self, input_data): def graph_init(self, config, weight_data=None): ''' example: - from nlp_toolkit.backends.neural_engine.compile.graph import Graph + from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph newgraph = Graph() newgraph.graph_init('./ir/conf.yaml', './ir/model.bin') out = newgraph.inference([input_0, input_1, input_2]) diff --git a/nlp_toolkit/backends/neural_engine/compile/graph_utils.py b/intel_extension_for_transformers/backends/neural_engine/compile/graph_utils.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/graph_utils.py rename to intel_extension_for_transformers/backends/neural_engine/compile/graph_utils.py diff --git a/nlp_toolkit/backends/neural_engine/compile/loaders/__init__.py b/intel_extension_for_transformers/backends/neural_engine/compile/loaders/__init__.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/loaders/__init__.py rename to intel_extension_for_transformers/backends/neural_engine/compile/loaders/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/compile/loaders/loader.py b/intel_extension_for_transformers/backends/neural_engine/compile/loaders/loader.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/loaders/loader.py rename to intel_extension_for_transformers/backends/neural_engine/compile/loaders/loader.py diff --git a/nlp_toolkit/backends/neural_engine/compile/logger.py b/intel_extension_for_transformers/backends/neural_engine/compile/logger.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/logger.py rename to intel_extension_for_transformers/backends/neural_engine/compile/logger.py diff --git a/nlp_toolkit/backends/neural_engine/compile/onnx_utils.py b/intel_extension_for_transformers/backends/neural_engine/compile/onnx_utils.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/onnx_utils.py rename to intel_extension_for_transformers/backends/neural_engine/compile/onnx_utils.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/__init__.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/__init__.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/__init__.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/all.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/all.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/all.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/all.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/assert.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/assert.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/assert.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/assert.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/batch_matmul.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/batch_matmul.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/batch_matmul.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/batch_matmul.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/batch_matmul_v2.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/batch_matmul_v2.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/batch_matmul_v2.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/batch_matmul_v2.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/bias_add.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/bias_add.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/bias_add.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/bias_add.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/cast.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/cast.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/cast.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/cast.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/concat.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/concat.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/concat.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/concat.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/conv.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/conv.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/conv.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/conv.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/empty_ops.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/empty_ops.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/empty_ops.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/empty_ops.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/expand_dims.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/expand_dims.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/expand_dims.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/expand_dims.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/fused_batch_matmul_v2.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_batch_matmul_v2.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/fused_batch_matmul_v2.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_batch_matmul_v2.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/fused_batch_norm_v3.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_batch_norm_v3.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/fused_batch_norm_v3.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_batch_norm_v3.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/fused_gemm.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_gemm.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/fused_gemm.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_gemm.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/fused_matmul.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_matmul.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/fused_matmul.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/fused_matmul.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/gather.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/gather.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/gather.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/gather.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/gather_elements.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/gather_elements.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/gather_elements.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/gather_elements.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/gelu.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/gelu.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/gelu.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/gelu.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/gemm.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/gemm.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/gemm.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/gemm.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/iterator_get_next.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/iterator_get_next.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/iterator_get_next.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/iterator_get_next.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/iterator_v2.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/iterator_v2.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/iterator_v2.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/iterator_v2.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/layer_normalization.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/layer_normalization.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/layer_normalization.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/layer_normalization.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/map_and_batch_dataset.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/map_and_batch_dataset.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/map_and_batch_dataset.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/map_and_batch_dataset.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/matmul.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/matmul.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/matmul.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/matmul.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/mean.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/mean.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/mean.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/mean.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/mkl_layer_norm.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/mkl_layer_norm.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/mkl_layer_norm.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/mkl_layer_norm.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/model_dataset.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/model_dataset.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/model_dataset.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/model_dataset.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/one_hot.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/one_hot.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/one_hot.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/one_hot.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/onnx_input.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/onnx_input.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/onnx_input.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/onnx_input.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/op.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/op.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/op.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/op.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/optimize_dataset.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/optimize_dataset.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/optimize_dataset.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/optimize_dataset.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/pack.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/pack.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/pack.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/pack.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/placeholder.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/placeholder.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/placeholder.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/placeholder.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/quantize_linear.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/quantize_linear.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/quantize_linear.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/quantize_linear.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/quantize_v2.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/quantize_v2.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/quantize_v2.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/quantize_v2.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/quantized_fused_matmul_and_dequantize.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/quantized_fused_matmul_and_dequantize.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/quantized_fused_matmul_and_dequantize.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/quantized_fused_matmul_and_dequantize.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/quantized_matmul_with_bias_and_dequantize.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/quantized_matmul_with_bias_and_dequantize.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/quantized_matmul_with_bias_and_dequantize.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/quantized_matmul_with_bias_and_dequantize.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/reduce_mean.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/reduce_mean.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/reduce_mean.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/reduce_mean.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/reduce_sum.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/reduce_sum.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/reduce_sum.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/reduce_sum.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/reshape.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/reshape.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/reshape.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/reshape.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/scatter_elements.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/scatter_elements.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/scatter_elements.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/scatter_elements.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/softmax.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/softmax.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/softmax.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/softmax.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/split.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/split.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/split.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/split.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/squeeze.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/squeeze.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/squeeze.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/squeeze.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/strided_slice.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/strided_slice.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/strided_slice.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/strided_slice.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/tensor.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/tensor.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/tensor.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/tensor.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/top_k.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/top_k.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/top_k.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/top_k.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/transpose.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/transpose.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/transpose.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/transpose.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/unpack.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/unpack.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/unpack.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/unpack.py diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/unsqueeze.py b/intel_extension_for_transformers/backends/neural_engine/compile/ops/unsqueeze.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/ops/unsqueeze.py rename to intel_extension_for_transformers/backends/neural_engine/compile/ops/unsqueeze.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/__init__.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/__init__.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/__init__.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/__init__.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/add_cls_token.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/add_cls_token.py similarity index 98% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/add_cls_token.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/add_cls_token.py index 5ab3719280f..29d1380037b 100644 --- a/nlp_toolkit/backends/neural_engine/compile/sub_graph/add_cls_token.py +++ b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/add_cls_token.py @@ -59,7 +59,7 @@ def __call__(self, model): 'returns': [0, 12] }, - # vit model generated by nlp-toolkit API + # vit model generated by intel-extension-for-transformers API { 'patterns': { 'in': [[(1, 'Shape'), (2, 'Gather'), (3, 'Unsqueeze'), (4, 'Concat'), diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/add_embeddings.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/add_embeddings.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/add_embeddings.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/add_embeddings.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/attention_mask_length_adaptive_keep_indices.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/attention_mask_length_adaptive_keep_indices.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/attention_mask_length_adaptive_keep_indices.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/attention_mask_length_adaptive_keep_indices.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/attention_reshape.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/attention_reshape.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/attention_reshape.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/attention_reshape.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/collect_quant_info.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/collect_quant_info.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/collect_quant_info.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/collect_quant_info.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/conv_reshape.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/conv_reshape.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/conv_reshape.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/conv_reshape.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/embeddingbag.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/embeddingbag.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/embeddingbag.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/embeddingbag.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/gelu.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/gelu.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/gelu.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/gelu.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/generate_sequence.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/generate_sequence.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/generate_sequence.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/generate_sequence.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/input_data.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/input_data.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/input_data.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/input_data.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/input_file.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/input_file.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/input_file.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/input_file.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/insert_bf16_node.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/insert_bf16_node.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/insert_bf16_node.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/insert_bf16_node.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/insert_quant_node.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/insert_quant_node.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/insert_quant_node.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/insert_quant_node.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/interact_features.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/interact_features.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/interact_features.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/interact_features.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/last_layer_shape.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/last_layer_shape.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/last_layer_shape.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/last_layer_shape.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/layer_norm.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/layer_norm.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/layer_norm.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/layer_norm.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/layer_norm_with_reduce_mean.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/layer_norm_with_reduce_mean.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/layer_norm_with_reduce_mean.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/layer_norm_with_reduce_mean.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_add.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_add.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_add.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_add.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_gelu.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_gelu.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_gelu.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_gelu.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_relu.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_relu.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_relu.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_relu.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_sigmoid.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_sigmoid.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_sigmoid.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_sigmoid.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_tanh.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_tanh.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/matmul_with_bias_tanh.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/matmul_with_bias_tanh.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/merged_embeddingbag.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/merged_embeddingbag.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/merged_embeddingbag.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/merged_embeddingbag.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/output_data.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/output_data.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/output_data.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/output_data.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/padding_sequence.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/padding_sequence.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/padding_sequence.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/padding_sequence.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/pattern.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/pattern.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/pattern.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/pattern.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/position_embeddings.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/position_embeddings.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/position_embeddings.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/position_embeddings.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/position_embeddings_v1.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/position_embeddings_v1.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/position_embeddings_v1.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/position_embeddings_v1.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/qkv_merge.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/qkv_merge.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/qkv_merge.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/qkv_merge.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/qkv_reshape.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/qkv_reshape.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/qkv_reshape.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/qkv_reshape.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/quantize_fusion.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/quantize_fusion.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/quantize_fusion.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/quantize_fusion.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_after_restore_hidden_states.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_after_restore_hidden_states.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_after_restore_hidden_states.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_after_restore_hidden_states.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_before_and_after_attention_out_layer_norm_gather_elements.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_before_and_after_attention_out_layer_norm_gather_elements.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_before_and_after_attention_out_layer_norm_gather_elements.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_before_and_after_attention_out_layer_norm_gather_elements.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_before_restore_hidden_states.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_before_restore_hidden_states.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_before_restore_hidden_states.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_before_restore_hidden_states.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_fusion.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_fusion.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/reshape_fusion.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/reshape_fusion.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/restore_hidden_states_in_length_adaptive_update_indices.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/restore_hidden_states_in_length_adaptive_update_indices.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/restore_hidden_states_in_length_adaptive_update_indices.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/restore_hidden_states_in_length_adaptive_update_indices.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/start_end_logits.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/start_end_logits.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/start_end_logits.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/start_end_logits.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/subgraph_matcher.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/subgraph_matcher.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/subgraph_matcher.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/subgraph_matcher.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/token_type_embeddings.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/token_type_embeddings.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/token_type_embeddings.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/token_type_embeddings.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/token_type_embeddings_v1.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/token_type_embeddings_v1.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/token_type_embeddings_v1.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/token_type_embeddings_v1.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/transpose_batch_matmul.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/transpose_batch_matmul.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/transpose_batch_matmul.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/transpose_batch_matmul.py diff --git a/nlp_toolkit/backends/neural_engine/compile/sub_graph/word_embeddings.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/word_embeddings.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/sub_graph/word_embeddings.py rename to intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/word_embeddings.py diff --git a/nlp_toolkit/backends/neural_engine/compile/tf_utils.py b/intel_extension_for_transformers/backends/neural_engine/compile/tf_utils.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/compile/tf_utils.py rename to intel_extension_for_transformers/backends/neural_engine/compile/tf_utils.py diff --git a/nlp_toolkit/backends/neural_engine/docs/Deploy and Integration.md b/intel_extension_for_transformers/backends/neural_engine/docs/Deploy and Integration.md similarity index 94% rename from nlp_toolkit/backends/neural_engine/docs/Deploy and Integration.md rename to intel_extension_for_transformers/backends/neural_engine/docs/Deploy and Integration.md index 259ab156104..fdb562759e1 100644 --- a/nlp_toolkit/backends/neural_engine/docs/Deploy and Integration.md +++ b/intel_extension_for_transformers/backends/neural_engine/docs/Deploy and Integration.md @@ -9,7 +9,7 @@ In this tutorial, we will deploy a TF/ONNX model using Engine inference OR throu Only support TensorFlow and ONNX model for now. ``` -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile model = compile('/path/to/your/model') model.save('/ir/path') # Engine graph could be saved to path ``` @@ -82,7 +82,7 @@ All input tensors are in an operator typed Input. But slightly difference is som Parse the yaml and weight bin to Engine Graph ``` -from nlp_toolkit.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph model = Graph() model.graph_init('./ir/conf.yaml', './ir/model.bin') input_data = [input_0, input_1, input_2] diff --git a/nlp_toolkit/backends/neural_engine/docs/Installation.md b/intel_extension_for_transformers/backends/neural_engine/docs/Installation.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/Installation.md rename to intel_extension_for_transformers/backends/neural_engine/docs/Installation.md diff --git a/nlp_toolkit/backends/neural_engine/docs/add_customized_pattern.md b/intel_extension_for_transformers/backends/neural_engine/docs/add_customized_pattern.md similarity index 97% rename from nlp_toolkit/backends/neural_engine/docs/add_customized_pattern.md rename to intel_extension_for_transformers/backends/neural_engine/docs/add_customized_pattern.md index e48f19f573c..099ff562cba 100644 --- a/nlp_toolkit/backends/neural_engine/docs/add_customized_pattern.md +++ b/intel_extension_for_transformers/backends/neural_engine/docs/add_customized_pattern.md @@ -14,7 +14,7 @@ First you should check whether the nodes' op_types in the pattern are registered ```python # make sure you have cloned neural_compressor repo and installed neural_compressor -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS # All the op_type names and objects are stored in `OPERATORS` print(OPERATORS) ``` @@ -87,7 +87,7 @@ python setup.py install ```python # check your code changes -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS 'Sqrt' and 'ReduceMean' in OPERATORS ``` @@ -142,7 +142,7 @@ In `Engine`, we treat the pattern fusion as the process of pattern mapping: from Like the node op_type, the new pattern also need to be registered. You can check the existing pattern classes by the commands below. ```python - from nlp_toolkit.backends.neural_engine.compile.sub_graph.pattern import PATTERNS + from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.pattern import PATTERNS print(PATTERNS) ``` @@ -199,7 +199,7 @@ In `Engine`, we treat the pattern fusion as the process of pattern mapping: from After save this python file, you can check it by retrieving the `PATTERNS` ```python - from nlp_toolkit.backends.neural_engine.compile.sub_graph.pattern import PATTERNS + from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.pattern import PATTERNS 'LayerNorm' in PATTERNS ``` diff --git a/nlp_toolkit/backends/neural_engine/docs/engine_inferencer.md b/intel_extension_for_transformers/backends/neural_engine/docs/engine_inferencer.md similarity index 98% rename from nlp_toolkit/backends/neural_engine/docs/engine_inferencer.md rename to intel_extension_for_transformers/backends/neural_engine/docs/engine_inferencer.md index 05c2470d5fc..11e119c040a 100644 --- a/nlp_toolkit/backends/neural_engine/docs/engine_inferencer.md +++ b/intel_extension_for_transformers/backends/neural_engine/docs/engine_inferencer.md @@ -40,7 +40,7 @@ pip install neural-compressor ```python # import compile api form engine -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile # get the engine intermediate graph (input onnx or tf model) graph = compile() # save the graph and get the final ir diff --git a/nlp_toolkit/backends/neural_engine/docs/graph_fusion.md b/intel_extension_for_transformers/backends/neural_engine/docs/graph_fusion.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/graph_fusion.md rename to intel_extension_for_transformers/backends/neural_engine/docs/graph_fusion.md diff --git a/nlp_toolkit/backends/neural_engine/docs/imgs/compile_workflow.png b/intel_extension_for_transformers/backends/neural_engine/docs/imgs/compile_workflow.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/imgs/compile_workflow.png rename to intel_extension_for_transformers/backends/neural_engine/docs/imgs/compile_workflow.png diff --git a/nlp_toolkit/backends/neural_engine/docs/imgs/engine_adaptor_example.png b/intel_extension_for_transformers/backends/neural_engine/docs/imgs/engine_adaptor_example.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/imgs/engine_adaptor_example.png rename to intel_extension_for_transformers/backends/neural_engine/docs/imgs/engine_adaptor_example.png diff --git a/nlp_toolkit/backends/neural_engine/docs/imgs/engine_adaptor_workflow.png b/intel_extension_for_transformers/backends/neural_engine/docs/imgs/engine_adaptor_workflow.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/imgs/engine_adaptor_workflow.png rename to intel_extension_for_transformers/backends/neural_engine/docs/imgs/engine_adaptor_workflow.png diff --git a/nlp_toolkit/backends/neural_engine/docs/imgs/infrastructure.png b/intel_extension_for_transformers/backends/neural_engine/docs/imgs/infrastructure.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/imgs/infrastructure.png rename to intel_extension_for_transformers/backends/neural_engine/docs/imgs/infrastructure.png diff --git a/nlp_toolkit/backends/neural_engine/docs/imgs/layernorm_bert_large_tf.png b/intel_extension_for_transformers/backends/neural_engine/docs/imgs/layernorm_bert_large_tf.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/imgs/layernorm_bert_large_tf.png rename to intel_extension_for_transformers/backends/neural_engine/docs/imgs/layernorm_bert_large_tf.png diff --git a/nlp_toolkit/backends/neural_engine/docs/imgs/layernorm_distilbert_base_onnx.png b/intel_extension_for_transformers/backends/neural_engine/docs/imgs/layernorm_distilbert_base_onnx.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/imgs/layernorm_distilbert_base_onnx.png rename to intel_extension_for_transformers/backends/neural_engine/docs/imgs/layernorm_distilbert_base_onnx.png diff --git a/nlp_toolkit/backends/neural_engine/docs/imgs/layernorm_with_index.png b/intel_extension_for_transformers/backends/neural_engine/docs/imgs/layernorm_with_index.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/imgs/layernorm_with_index.png rename to intel_extension_for_transformers/backends/neural_engine/docs/imgs/layernorm_with_index.png diff --git a/nlp_toolkit/backends/neural_engine/docs/onnx_compile.md b/intel_extension_for_transformers/backends/neural_engine/docs/onnx_compile.md similarity index 97% rename from nlp_toolkit/backends/neural_engine/docs/onnx_compile.md rename to intel_extension_for_transformers/backends/neural_engine/docs/onnx_compile.md index ef35eb4aafd..9046bdabd52 100644 --- a/nlp_toolkit/backends/neural_engine/docs/onnx_compile.md +++ b/intel_extension_for_transformers/backends/neural_engine/docs/onnx_compile.md @@ -49,7 +49,7 @@ Then you will get the `distilbert_base` model `model.onnx` without task layer ```python # import compile api form engine -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile # get the engine intermediate graph (if trained on MRPC task) graph = compile("distilbert_base_uncased_mrpc.onnx") # get the engine intermediate graph (if not trained on MRPC task) diff --git a/intel_extension_for_transformers/backends/neural_engine/docs/onnx_quantize.md b/intel_extension_for_transformers/backends/neural_engine/docs/onnx_quantize.md new file mode 100644 index 00000000000..804fbda8a3b --- /dev/null +++ b/intel_extension_for_transformers/backends/neural_engine/docs/onnx_quantize.md @@ -0,0 +1,37 @@ +# Quantize a ONNX model to engine low precision/int8 IR + +## Design +Quantizing a ONNX model to engine low precision/int8 IR has two steps: 1. Convert ONNX model to engine float IR; 2. Quantize float IR to low precision/int8 IR. The first step will be finished in engine compile. We focus on the second step how to quantize a float engine IR to low precision IR in INC. The whole is in examples/engine/nlp/bert_base_mrpc. + +## Prerequisite +### Install environment +```shell +cd /examples/deepengine/nlp/distilbert_base_uncased_mrpc +conda create -n python=3.7 +conda activate +pip install -r requirements.txt +``` +### Prepare Dataset +```python +python prepare_dataset.py --tasks='MRPC' --output_dir=./data +``` +### Prepare ONNX model +```shell +bash prepare_model.sh +``` + +## Run tuning and benchmark +Users can run shell to tune model by optimization module and get its accuracy and output onnx model. +### 1. To get the tuned model and its accuracy: +```shell +bash prepare_model.sh --input_model=moshew/bert-mini-sst2-distilled --task_name=sst2 --output_dir=./model_and_tokenizer --precision=int8 +``` + +### 2. To get the benchmark of tuned model: +```shell +GLOG_minloglevel=2 python run_executor.py --input_model=./model_and_tokenizer/int8-model.onnx --tokenizer_dir=./model_and_tokenizer --mode=accuracy --data_dir=./data --batch_size=8 +``` + +```shell +GLOG_minloglevel=2 python run_executor.py --input_model=./model_and_tokenizer/int8-model.onnx --mode=performance --batch_size=8 --seq_len=128 +``` diff --git a/nlp_toolkit/backends/neural_engine/docs/operator_register.md b/intel_extension_for_transformers/backends/neural_engine/docs/operator_register.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/operator_register.md rename to intel_extension_for_transformers/backends/neural_engine/docs/operator_register.md diff --git a/nlp_toolkit/backends/neural_engine/docs/pattern_recognize.md b/intel_extension_for_transformers/backends/neural_engine/docs/pattern_recognize.md similarity index 96% rename from nlp_toolkit/backends/neural_engine/docs/pattern_recognize.md rename to intel_extension_for_transformers/backends/neural_engine/docs/pattern_recognize.md index 0b743fba1cd..b59504afe15 100644 --- a/nlp_toolkit/backends/neural_engine/docs/pattern_recognize.md +++ b/intel_extension_for_transformers/backends/neural_engine/docs/pattern_recognize.md @@ -58,8 +58,8 @@ ret = [[A_node_name_1, B_node_name_1, C_node_name_1, [A, B, C]], [A_node_name_2, Assume you want to find the match results of pattern `['MatMul', 'BiasAdd', ['Add', 'AddV2']]` in bert_large TensorFlow model (you can get this model from this [link](https://github.com/intel/neural-compressor/tree/master/examples/engine/nlp/squad/bert_large#2-prepare-dataset-and-model) and make sure the tf version is `intel-tensorflow-1.15-up2`). ```python -from nlp_toolkit.backends.neural_engine.compile import COMPILES -from nlp_toolkit.backends.neural_engine.compile.graph_utils import search_straight_pattern +from intel_extension_for_transformers.backends.neural_engine.compile import COMPILES +from intel_extension_for_transformers.backends.neural_engine.compile.graph_utils import search_straight_pattern graph = COMPILES['loader']()(bert_large_model_path) graph = COMPILES['extractor']()(graph) input_pattern = ['MatMul', 'BiasAdd', ['Add', 'AddV2']] @@ -82,8 +82,8 @@ Each sub-chain pattern matched results would find their attached main chain by c In the end, here is the example shows how to get the `LayerNorm` pattern matched results in bert_large TensorFlow model. ```python -from nlp_toolkit.backends.neural_engine.compile import COMPILES -from nlp_toolkit.backends.neural_engine.compile.graph_utils import search_pattern +from intel_extension_for_transformers.backends.neural_engine.compile import COMPILES +from intel_extension_for_transformers.backends.neural_engine.compile.graph_utils import search_pattern graph = COMPILES['loader']()(bert_large_model_path) graph = COMPILES['extractor']()(graph) ln_pattern = [[(0, 'Mean'), (1, 'SquaredDifference'), (2, 'Mean'), (3, ['Add', 'AddV2']), (4, 'Rsqrt'), (5, 'Mul'), (7, 'Mul'), (8, 'Sub'), (9, ['Add', 'AddV2'])], [(5, 'Mul'), (6, 'Mul'), (9, ['Add', 'AddV2'])]] diff --git a/nlp_toolkit/backends/neural_engine/docs/tensorflow_compile.md b/intel_extension_for_transformers/backends/neural_engine/docs/tensorflow_compile.md similarity index 96% rename from nlp_toolkit/backends/neural_engine/docs/tensorflow_compile.md rename to intel_extension_for_transformers/backends/neural_engine/docs/tensorflow_compile.md index 5aa2d432a02..5f5c49205c6 100644 --- a/nlp_toolkit/backends/neural_engine/docs/tensorflow_compile.md +++ b/intel_extension_for_transformers/backends/neural_engine/docs/tensorflow_compile.md @@ -42,7 +42,7 @@ Here is one example show that how to use `Engine` to compile `TensorFlow` model ```python # import compile api form engine -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile # get the engine intermediate graph graph = compile("./model/bert_base_mrpc.pb") # save the graph and get the final ir diff --git a/nlp_toolkit/backends/neural_engine/docs/tensorflow_quantize.md b/intel_extension_for_transformers/backends/neural_engine/docs/tensorflow_quantize.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/docs/tensorflow_quantize.md rename to intel_extension_for_transformers/backends/neural_engine/docs/tensorflow_quantize.md diff --git a/intel_extension_for_transformers/backends/neural_engine/docs/validated_model.md b/intel_extension_for_transformers/backends/neural_engine/docs/validated_model.md new file mode 100644 index 00000000000..1be33651f4f --- /dev/null +++ b/intel_extension_for_transformers/backends/neural_engine/docs/validated_model.md @@ -0,0 +1,1504 @@ +Performance results test on ​​07/10/2022 with Intel(R) Platinum 8375C processor on AWS c6i.12xlarge instance. + +Performance varies by use, configuration and other factors. See platform configuration for configuration details. For more complete information about performance and benchmark results, visit www.intel.com/benchmarks + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelSparsitySequencelengthMAX Throughput (samples/sec) Latency (ms)BatchSizeInstanceCores/InstanceRequirement
BERT Large90%16265236.5138122BatchSize <= 8 && Latency <= 50ms
85%218143.6288122
80%171941.916122
75%148748.2556122
70%122039.1434122
90%32130446.0665122
85%110243.94122
80%87041.4873122
75%75047.823122
70%69046.188483
90%4891039.9213122
85%72949.713122
80%61938.821464
75%49947.9332122
70%42941.936364
90%6468035.3182122
85%55743.2622122
80%46738.589364
75%40544.415364
70%33935.473246
90%8051347.1192122
85%38940.945283
80%36049.936364
75%27443.843264
70%25946.242364
90%9644236.241283
85%36444.016283
80%30340.13264
75%26645.05264
70%22135.973264
90%11234945.938283
85%27543.614264
80%21737.039246
75%18842.301246
70%16648.033264
90%12832049.769283
85%26345.724264
80%20738.819246
75%18144.021246
70%16049.979264
90%3847441.235138
85%6348.0912212
80%5138.9141212
75%4643.6981212
70%4247.9051212
BERT Base90%16897216.0766241BatchSize <= 8 && Latency <= 20ms
85%719219.9066241
80%548217.5094241
75%480819.9524241
70%367819.4093241
90%32475019.9753241
85%364719.8673241
80%292116.5874122
75%257618.7684122
70%213116.9133122
90%48280417.0964122
85%226316.0373122
80%193818.7563122
75%158115.1192122
70%140917.0152122
90%64211617.1133122
85%177719.9723122
80%147416.3922122
75%127818.74464
70%113715.752364
90%80159415.124464
85%134717.9464
80%112616364
75%99318.258364
70%89019.877364
90%96131918.28464
85%108616.63364
80%93119.545364
75%83314.332264
70%74716.141264
90%112110616.31364
85%92419.446464
80%71916.698446
75%63319.124264
70%50115.816246
90%12896118.549364
85%80714.868264
80%70117.117446
75%61319.618264
70%51515.484246
BERT Mini90%16753840.9893241BatchSize <= 8 && Latency <= 1ms
85%556280.9172241
80%491200.9574122
75%415980.853122
70%392180.9133122
90%32312110.7881241
85%283990.8481241
80%249100.966464
75%205050.88364
70%185750.865283
90%48254850.9871241
85%174680.924283
80%167710.971283
75%160040.996283
70%133810.894264
90%64139900.8631122
85%131540.917264
80%125350.973264
75%121020.998264
70%84260.961246
90%8087360.917183
85%82820.967264
80%66590.898238
75%64770.933238
70%62900.974238
90%9685980.931183
85%64490.935238
80%62630.965238
75%60800.988238
70%36840.817138
90%11262460.978164
85%63740.947164
80%60260.998164
75%33000.926138
70%32260.935138
90%12862210.958164
85%63220.96164
80%60810.985164
75%33680.894138
70%32640.924138
DistillBERT90%16154606.2968122BatchSize <= 8 && Latency <= 10ms
85%16131297.3638122
80%16113238.5788122
75%16100729.5998122
70%1686897.44883
90%3279018.205883
85%3267379.562883
80%3254408.904864
75%3249209.786864
70%3244608.053664
90%4852809.179683
85%4843399.244583
80%4836349.928564
75%4832559.211564
70%4829288.197464
90%6436819.527664
85%6429658.106646
80%6427418.862383
75%6423518.527546
70%6421509.282546
90%8029188.316464
85%8025059.602464
80%8020107.995446
75%8018728.609446
70%8017069.442446
90%9624099.983464
85%9619938.013446
80%9616719.627446
75%9614848.131438
70%9613428.967438
90%11219108.456446
85%11216599.687446
80%11212939.245438
75%11212309.798438
70%11210918.261338
90%12816279.908446
85%12813978.684346
80%12811307.957338
75%12810818.353338
70%1289829.187338
+ + +## platform configuration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ManufacturerAmazon EC2
Product Namec6i.12xlarge
BIOS Version1
OSUbuntu   20.04.3 LTS
Kernel5.15.0-1021-aws
Microcode0xd000331
IRQ BalanceDisabled
CPU ModelIntel(R) XeonPlatinum 8375C CPU @ 2.90GHz
Base Frequency2.9GHz
Maximum Frequency3.9GHz
All-core Maximum Frequency3.5GHz
CPU(s)48
Thread(s) per CoreN/A
Core(s) per Socket24
Socket(s)1
NUMA Node(s)1
TurboEnabled
FrequencyGovernerDefault
Max C-State9
\ No newline at end of file diff --git a/nlp_toolkit/backends/neural_engine/executor/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/executor/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/executor/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/executor/include/common.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/common.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/common.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/common.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/conf.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/conf.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/conf.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/conf.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/dataloader.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/dataloader.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/dataloader.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/dataloader.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/dispatch_table.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/dispatch_table.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/dispatch_table.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/dispatch_table.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/dispatcher.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/dispatcher.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/dispatcher.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/dispatcher.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/execution_options.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/execution_options.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/execution_options.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/execution_options.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/executor.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/executor.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/executor.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/executor.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/i_malloc.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/i_malloc.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/i_malloc.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/i_malloc.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/isa.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/isa.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/isa.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/isa.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/llga_info.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/llga_info.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/llga_info.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/llga_info.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/llga_operators/inner_product_graph.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/inner_product_graph.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/llga_operators/inner_product_graph.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/inner_product_graph.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/llga_operators/llga_kernel.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/llga_kernel.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/llga_operators/llga_kernel.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/llga_kernel.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/llga_operators/llga_op_creator.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/llga_op_creator.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/llga_operators/llga_op_creator.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/llga_op_creator.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/llga_operators/softmax_graph.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/softmax_graph.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/llga_operators/softmax_graph.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/llga_operators/softmax_graph.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/memory_allocator.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/memory_allocator.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/memory_allocator.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/memory_allocator.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/model.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/model.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/model.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/model.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/op_tuning.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/op_tuning.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/op_tuning.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/op_tuning.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operator.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operator.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operator.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operator.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operator_registry.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operator_registry.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operator_registry.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operator_registry.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/binary_add.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/binary_add.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/binary_add.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/binary_add.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/concat.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/concat.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/concat.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/concat.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/convolution.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/convolution.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/convolution.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/convolution.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/embeddingbag.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/embeddingbag.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/embeddingbag.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/embeddingbag.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/erf.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/erf.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/erf.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/erf.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/expand_indices.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/expand_indices.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/expand_indices.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/expand_indices.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/gather.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gather.hpp similarity index 97% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/gather.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gather.hpp index f3ace578558..b6cb57d3629 100644 --- a/nlp_toolkit/backends/neural_engine/executor/include/operators/gather.hpp +++ b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gather.hpp @@ -20,7 +20,7 @@ #include "../operator.hpp" #ifdef WITH_SPARSELIB -#include "SparseLib/include/interface.hpp" +#include "kernels/include/interface.hpp" #endif namespace executor { diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/gather_elements.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gather_elements.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/gather_elements.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gather_elements.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/gelu.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gelu.hpp similarity index 98% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/gelu.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gelu.hpp index d7f4a9474d0..3c9af9bfcb5 100644 --- a/nlp_toolkit/backends/neural_engine/executor/include/operators/gelu.hpp +++ b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/gelu.hpp @@ -22,8 +22,9 @@ #include "../operator.hpp" #include "oneapi/dnnl/dnnl.hpp" #ifdef WITH_SPARSELIB -#include "SparseLib/include/interface.hpp" +#include "kernels/include/interface.hpp" #endif + namespace executor { using dnnl::algorithm; using dnnl::engine; diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/group_norm.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/group_norm.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/group_norm.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/group_norm.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/inner_product.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/inner_product.hpp similarity index 99% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/inner_product.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/inner_product.hpp index 51062220667..9524ee474d2 100644 --- a/nlp_toolkit/backends/neural_engine/executor/include/operators/inner_product.hpp +++ b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/inner_product.hpp @@ -25,9 +25,8 @@ #include "../sparse_operators/sparse_inner_product.hpp" #include "oneapi/dnnl/dnnl.hpp" #ifdef WITH_SPARSELIB -#include "SparseLib/include/interface.hpp" +#include "kernels/include/interface.hpp" #endif - namespace executor { using dnnl::algorithm; diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/input.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/input.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/input.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/input.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/layer_norm.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/layer_norm.hpp similarity index 98% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/layer_norm.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/layer_norm.hpp index e86b482bc0e..a8104f15338 100644 --- a/nlp_toolkit/backends/neural_engine/executor/include/operators/layer_norm.hpp +++ b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/layer_norm.hpp @@ -22,11 +22,9 @@ #include "../operator.hpp" #include "oneapi/dnnl/dnnl.hpp" - #ifdef WITH_SPARSELIB -#include "SparseLib/include/interface.hpp" +#include "kernels/include/interface.hpp" #endif - namespace executor { using dnnl::algorithm; using dnnl::engine; diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/matmul.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/matmul.hpp similarity index 99% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/matmul.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/matmul.hpp index 0a2ebb3e0bb..23f619e45f5 100644 --- a/nlp_toolkit/backends/neural_engine/executor/include/operators/matmul.hpp +++ b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/matmul.hpp @@ -22,7 +22,7 @@ #include "../common.hpp" #include "../operator.hpp" #include "oneapi/dnnl/dnnl.hpp" -#include "SparseLib/include/interface.hpp" +#include "kernels/include/interface.hpp" namespace executor { diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/merged_embeddingbag.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/merged_embeddingbag.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/merged_embeddingbag.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/merged_embeddingbag.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/one_hot.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/one_hot.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/one_hot.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/one_hot.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/output.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/output.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/output.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/output.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/padding_sequence.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/padding_sequence.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/padding_sequence.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/padding_sequence.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/position_ids.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/position_ids.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/position_ids.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/position_ids.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/pow.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/pow.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/pow.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/pow.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/quantize.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/quantize.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/quantize.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/quantize.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/range.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/range.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/range.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/range.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/reduce_mean.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reduce_mean.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/reduce_mean.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reduce_mean.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/reduce_sum.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reduce_sum.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/reduce_sum.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reduce_sum.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/reorder.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reorder.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/reorder.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reorder.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/reshape.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reshape.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/reshape.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/reshape.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/scatter_elements.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/scatter_elements.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/scatter_elements.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/scatter_elements.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/shape.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/shape.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/shape.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/shape.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/slice.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/slice.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/slice.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/slice.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/softmax.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/softmax.hpp similarity index 98% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/softmax.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/softmax.hpp index 986becbebd5..cc87f7e6252 100644 --- a/nlp_toolkit/backends/neural_engine/executor/include/operators/softmax.hpp +++ b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/softmax.hpp @@ -21,7 +21,7 @@ #include "../operator.hpp" #include "oneapi/dnnl/dnnl.hpp" -#include "SparseLib/include/interface.hpp" +#include "kernels/include/interface.hpp" namespace executor { using dnnl::algorithm; diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/split.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/split.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/split.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/split.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/strided_slice.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/strided_slice.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/strided_slice.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/strided_slice.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/token_type_ids.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/token_type_ids.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/token_type_ids.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/token_type_ids.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/topk.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/topk.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/topk.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/topk.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/transpose.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/transpose.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/transpose.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/transpose.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/operators/unsqueeze.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/operators/unsqueeze.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/operators/unsqueeze.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/operators/unsqueeze.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/profiling.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/profiling.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/profiling.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/profiling.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/sparse_operators/sparse_inner_product.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/sparse_operators/sparse_inner_product.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/sparse_operators/sparse_inner_product.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/sparse_operators/sparse_inner_product.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/tensor.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/tensor.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/tensor.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/tensor.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/include/thread_pool.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/include/thread_pool.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/include/thread_pool.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/include/thread_pool.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/python/bind_executor.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/python/bind_executor.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/python/bind_executor.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/python/bind_executor.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/python/pybind_tensor.hpp b/intel_extension_for_transformers/backends/neural_engine/executor/python/pybind_tensor.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/python/pybind_tensor.hpp rename to intel_extension_for_transformers/backends/neural_engine/executor/python/pybind_tensor.hpp diff --git a/nlp_toolkit/backends/neural_engine/executor/python/test_model.py b/intel_extension_for_transformers/backends/neural_engine/executor/python/test_model.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/python/test_model.py rename to intel_extension_for_transformers/backends/neural_engine/executor/python/test_model.py diff --git a/nlp_toolkit/backends/neural_engine/executor/src/common.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/common.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/common.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/common.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/i_malloc.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/i_malloc.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/i_malloc.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/i_malloc.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/llga_operators/inner_product_graph.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/inner_product_graph.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/llga_operators/inner_product_graph.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/inner_product_graph.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/llga_operators/llga_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/llga_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/llga_operators/llga_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/llga_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/llga_operators/llga_op_creator.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/llga_op_creator.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/llga_operators/llga_op_creator.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/llga_op_creator.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/llga_operators/softmax_graph.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/softmax_graph.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/llga_operators/softmax_graph.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/llga_operators/softmax_graph.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/model.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/model.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/model.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/model.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/nlp_executor.cc b/intel_extension_for_transformers/backends/neural_engine/executor/src/nlp_executor.cc similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/nlp_executor.cc rename to intel_extension_for_transformers/backends/neural_engine/executor/src/nlp_executor.cc diff --git a/nlp_toolkit/backends/neural_engine/executor/src/op_tuning.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/op_tuning.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/op_tuning.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/op_tuning.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/binary_add.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/binary_add.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/binary_add.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/binary_add.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/concat.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/concat.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/concat.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/concat.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/convolution.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/convolution.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/convolution.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/convolution.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/embeddingbag.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/embeddingbag.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/embeddingbag.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/embeddingbag.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/erf.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/erf.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/erf.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/erf.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/expand_indices.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/expand_indices.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/expand_indices.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/expand_indices.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/gather.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/gather.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/gather.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/gather.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/gather_elements.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/gather_elements.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/gather_elements.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/gather_elements.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/gelu.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/gelu.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/gelu.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/gelu.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/group_norm.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/group_norm.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/group_norm.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/group_norm.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/inner_product.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/inner_product.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/inner_product.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/inner_product.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/input.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/input.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/input.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/input.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/layer_norm.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/layer_norm.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/layer_norm.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/layer_norm.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/matmul.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/matmul.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/matmul.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/matmul.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/merged_embeddingbag.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/merged_embeddingbag.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/merged_embeddingbag.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/merged_embeddingbag.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/one_hot.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/one_hot.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/one_hot.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/one_hot.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/output.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/output.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/output.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/output.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/padding_sequence.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/padding_sequence.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/padding_sequence.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/padding_sequence.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/position_ids.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/position_ids.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/position_ids.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/position_ids.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/pow.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/pow.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/pow.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/pow.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/quantize.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/quantize.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/quantize.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/quantize.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/range.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/range.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/range.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/range.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/reduce_mean.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reduce_mean.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/reduce_mean.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reduce_mean.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/reduce_sum.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reduce_sum.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/reduce_sum.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reduce_sum.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/reorder.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reorder.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/reorder.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reorder.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/reshape.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reshape.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/reshape.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/reshape.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/scatter_elements.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/scatter_elements.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/scatter_elements.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/scatter_elements.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/shape.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/shape.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/shape.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/shape.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/slice.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/slice.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/slice.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/slice.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/softmax.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/softmax.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/softmax.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/softmax.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/split.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/split.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/split.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/split.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/strided_slice.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/strided_slice.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/strided_slice.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/strided_slice.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/token_type_ids.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/token_type_ids.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/token_type_ids.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/token_type_ids.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/topk.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/topk.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/topk.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/topk.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/transpose.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/transpose.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/transpose.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/transpose.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/operators/unsqueeze.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/operators/unsqueeze.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/operators/unsqueeze.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/operators/unsqueeze.cpp diff --git a/nlp_toolkit/backends/neural_engine/executor/src/sparse_operators/sparse_inner_product.cpp b/intel_extension_for_transformers/backends/neural_engine/executor/src/sparse_operators/sparse_inner_product.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/executor/src/sparse_operators/sparse_inner_product.cpp rename to intel_extension_for_transformers/backends/neural_engine/executor/src/sparse_operators/sparse_inner_product.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/kernels/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/kernels/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/README.md b/intel_extension_for_transformers/backends/neural_engine/kernels/README.md similarity index 78% rename from nlp_toolkit/backends/neural_engine/SparseLib/README.md rename to intel_extension_for_transformers/backends/neural_engine/kernels/README.md index 4f65b60ff1f..5aa9391aef6 100644 --- a/nlp_toolkit/backends/neural_engine/SparseLib/README.md +++ b/intel_extension_for_transformers/backends/neural_engine/kernels/README.md @@ -1,14 +1,14 @@ -Just-in-time Deep Neural Network Library (SparseLib) +Transformers-accelerated Libraries =========================================== ## Abstract -SparseLib is a high-performance operator computing library implemented by assembly. SparseLib contains a JIT domain, a kernel domain, and a scheduling proxy framework. +Transformers-accelerated Libraries (formerly known as **SparseLib**) is a high-performance operator computing library implemented by assembly. Transformers-accelerated Libraries contains a JIT domain, a kernel domain, and a scheduling proxy framework. ## Installation ### Build ```shell -cd SparseLib/ +cd kernels/ mkdir build cd build cmake .. @@ -17,7 +17,7 @@ make -j ### Test ```shell -cd test/gtest/SparseLib/ +cd test/gtest/kernels/ mkdir build cd build cmake .. @@ -26,7 +26,7 @@ make -j ``` ### Performance -We provide a benchmark tool to measure the performance out of box, please refer to [benchmark](../test/SparseLib/benchmark/README.md) for more details. +We provide a benchmark tool to measure the performance out of box, please refer to [benchmark](../test/kernels/benchmark/README.md) for more details. For advanced users, please refer to [profling section](docs/profiling.md). ## API reference for users diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/cmake/FindVTune.cmake b/intel_extension_for_transformers/backends/neural_engine/kernels/cmake/FindVTune.cmake similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/cmake/FindVTune.cmake rename to intel_extension_for_transformers/backends/neural_engine/kernels/cmake/FindVTune.cmake diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_amx_bf16x16_calc.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_amx_bf16x16_calc.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_amx_bf16x16_calc.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_amx_bf16x16_calc.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_amx_bf16x16_relayout.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_amx_bf16x16_relayout.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_amx_bf16x16_relayout.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_amx_bf16x16_relayout.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_avx512f_pattern_base.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_avx512f_pattern_base.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_avx512f_pattern_base.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_avx512f_pattern_base.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_avx512f_pattern_unroll4.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_avx512f_pattern_unroll4.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_avx512f_pattern_unroll4.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_avx512f_pattern_unroll4.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_calc.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_calc.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_calc.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_calc.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_left_1x4.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_left_1x4.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_left_1x4.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_left_1x4.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_left_4x1.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_left_4x1.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_left_4x1.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_left_4x1.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_right_1x16.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_right_1x16.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_right_1x16.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_right_1x16.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_right_4x1.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_right_4x1.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_pattern_right_4x1.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_pattern_right_4x1.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_perf.png b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_perf.png similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/kernel_vnni_perf.png rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/kernel_vnni_perf.png diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/relu_formula.svg b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/relu_formula.svg similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/imgs/relu_formula.svg rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/imgs/relu_formula.svg diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/kernel_amx.md b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/kernel_amx.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/kernel_amx.md rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/kernel_amx.md diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/kernel_avx512f.md b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/kernel_avx512f.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/kernel_avx512f.md rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/kernel_avx512f.md diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/kernel_vnni.md b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/kernel_vnni.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/kernel_vnni.md rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/kernel_vnni.md diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/postop_injector.md b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/postop_injector.md similarity index 90% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/postop_injector.md rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/postop_injector.md index 7c58eb68915..f22b40e5464 100644 --- a/nlp_toolkit/backends/neural_engine/SparseLib/docs/kernel_desc/postop_injector.md +++ b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/kernel_desc/postop_injector.md @@ -1,10 +1,10 @@ # Introduction -op-fusion is a very widely used optimization approach in Deep-Learning.Consider we have two ops,Conv and Relu,in traditional way,we apply Conv op firstly,then store the value to the memory,after that we load the value and apply Relu.Obviously there have a useless load&store operations,we can fuse the Conv&Relu to remove the useless I/O,this is the key idea about op-fusion.
In SparseLib,we will provide a new class named injector for the op-fusion.In the perspective of the developers who want to apply the op-fusion optimization,they can make injector as a member of their jit_kernel class and initalize it in the kernel class's construct function,when they want to apply postop,just need to call **injector->vector_compute** and tell injector what registers has been used by **injector->escape_regs**.Besides,upper level user also should call **injector->prepare_table** to prepare the LUT which postop need in the end of thier xbyak kernel.
injector supports 8 operators currently,there are exp,tanh,gelu,relu,linear,quantize(fp32->u8/s8),dequantize(u8/s8->fp32) and look-up result from LUT(as experimental API now).Injector also supports a postop-chain for apply multiple postops sequentially. +op-fusion is a very widely used optimization approach in Deep-Learning.Consider we have two ops,Conv and Relu,in traditional way,we apply Conv op firstly,then store the value to the memory,after that we load the value and apply Relu.Obviously there have a useless load&store operations,we can fuse the Conv&Relu to remove the useless I/O,this is the key idea about op-fusion.
In Transformers-accelerated Libraries,we will provide a new class named injector for the op-fusion.In the perspective of the developers who want to apply the op-fusion optimization,they can make injector as a member of their jit_kernel class and initalize it in the kernel class's construct function,when they want to apply postop,just need to call **injector->vector_compute** and tell injector what registers has been used by **injector->escape_regs**.Besides,upper level user also should call **injector->prepare_table** to prepare the LUT which postop need in the end of thier xbyak kernel.
injector supports 8 operators currently,there are exp,tanh,gelu,relu,linear,quantize(fp32->u8/s8),dequantize(u8/s8->fp32) and look-up result from LUT(as experimental API now).Injector also supports a postop-chain for apply multiple postops sequentially. # Proposal -## SparseLib developer's perspective +## Transformers-accelerated Libraries developer's perspective ### Framework changes @@ -163,7 +163,7 @@ void jit_eltwiseop_t::generate() { ``` **NOTE:The postops will be apply **`in-place`** and storing work is upper op's task.** -## SparseLib user's perspective +## Transformers-accelerated Libraries user's perspective This is the guide about how to set op-fusion in UT in user's perspective. #### step0.Prepare the postop_attr diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/docs/profiling.md b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/profiling.md similarity index 91% rename from nlp_toolkit/backends/neural_engine/SparseLib/docs/profiling.md rename to intel_extension_for_transformers/backends/neural_engine/kernels/docs/profiling.md index b0c5de9687a..b2bb529a5eb 100644 --- a/nlp_toolkit/backends/neural_engine/SparseLib/docs/profiling.md +++ b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/profiling.md @@ -1,10 +1,16 @@ ### Performance and Profiling -* We support a brief verbose logging for kernel execution + +## Verbose +We support a brief verbose logger for kernel execution ```shell SPARSE_LIB_VERBOSE=1 ./{executable} +sparselib_verbose,info,cpu,runtime:CPU,nthr:224 # general info +sparselib_verbose,exec,cpu,sparse_matmul,shape_256_256_128,14.4658 # first kernel +sparselib_verbose,exec,cpu,sparse_matmul,shape_256_256_128,2.56982 # second kernel ``` -* For advanced users we also support vtune profling for kernels execution through [ITT Tasks](https://www.intel.com/content/www/us/en/develop/documentation/vtune-help/top/api-support/instrumentation-and-tracing-technology-apis/basic-usage-and-configuration/viewing-itt-api-task-data.html), to enable it you can follow the instructions: +## VTune +For advanced users we also support vtune profling for kernels execution through [ITT Tasks](https://www.intel.com/content/www/us/en/develop/documentation/vtune-help/top/api-support/instrumentation-and-tracing-technology-apis/basic-usage-and-configuration/viewing-itt-api-task-data.html), to enable it you can follow the instructions: ```shell mkdir build @@ -17,7 +23,11 @@ make -j SPARSE_LIB_VTUNE=1 ./{executable} ``` -* There is another way to verify the code generation itself via [SDE](https://www.intel.com/content/www/us/en/developer/articles/tool/software-development-emulator.html). SDE is also simulators for future intel CPU hardware so you can verify the implementation using feature ISAs without real hardware support: +We would recommend SSH methods in VTune to analyse details based on GUI. + + +## SDE +There is another way to verify the code generation itself via [SDE](https://www.intel.com/content/www/us/en/developer/articles/tool/software-development-emulator.html). SDE is also simulators for future intel CPU hardware so you can verify the implementation using feature ISAs **without** real hardware support: ```shell SPARSE_LIB_DUMP=1 ./{executable} path/to/sde/xed64 -64 -ir *.bin >> *.txt diff --git a/intel_extension_for_transformers/backends/neural_engine/kernels/docs/validated_data.md b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/validated_data.md new file mode 100644 index 00000000000..3acc8d2a26c --- /dev/null +++ b/intel_extension_for_transformers/backends/neural_engine/kernels/docs/validated_data.md @@ -0,0 +1,8039 @@ +Performance results test on ​​07/10/2022 with Intel(R) Xeon Platinum 8375C processor on AWS c6i.12xlarge instance. +Performance varies by use, configuration and other factors. See platform configuration for configuration details. For more complete information about performance and benchmark results, visit www.intel.com/benchmarks + +## Single Thread + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MKNSparsityLatency (ms)Gflops
256256160.70.003486601.578
256256160.750.003107675.028
256256160.80.002614802.259
256256160.850.002166968.068
256256160.90.0016471273.64
2561024160.70.011226747.284
2561024160.750.009475885.377
2561024160.80.0076411097.78
2561024160.850.0058451435.13
2561024160.90.0040712060.33
1024256160.70.012745658.198
1024256160.750.011113754.835
1024256160.80.009402892.256
1024256160.850.0076021103.45
1024256160.90.0054941527.01
10241024160.70.044202759.116
10241024160.750.036423921.251
10241024160.80.0292451147.34
10241024160.850.0223471501.5
10241024160.90.0149062251.11
10244096160.70.216338620.408
10244096160.750.179593747.345
10244096160.80.135054993.806
10244096160.850.0946981417.32
10244096160.90.060972201.37
40961024160.70.190508704.526
40961024160.750.159566841.14
40961024160.80.1308631025.64
40961024160.850.1039271291.46
40961024160.90.0739961813.85
768768160.70.02504753.784
768768160.750.021073895.682
768768160.80.0170991103.84
768768160.850.0130781443.24
768768160.90.0092332044.13
7683072160.70.112882668.818
7683072160.750.087939858.525
7683072160.80.0688351096.79
7683072160.850.0487341549.17
7683072160.90.0317562377.43
3072756160.70.109308679.894
3072756160.750.092247805.638
3072756160.80.075911979.016
3072756160.850.0558951329.61
3072756160.90.0358752071.57
256256320.70.00565742.297
256256320.750.005036832.858
256256320.80.004475937.182
256256320.850.0036851138.27
256256320.90.002711547.98
2561024320.70.018348914.386
2561024320.750.0154121088.57
2561024320.80.012571334.74
2561024320.850.0095991747.72
2561024320.90.0067762475.86
1024256320.70.020632813.162
1024256320.750.018427910.483
1024256320.80.016471018.66
1024256320.850.013581235.43
1024256320.90.0105521589.96
10241024320.70.072121930.506
10241024320.750.0596131125.74
10241024320.80.0483781387.18
10241024320.850.0366751829.83
10241024320.90.0248572699.78
10244096320.70.376539712.902
10244096320.750.31929840.727
10244096320.80.2612651027.45
10244096320.850.1935791386.7
10244096320.90.1184292266.64
40961024320.70.309195868.177
40961024320.750.2566191046.05
40961024320.80.2134541257.58
40961024320.850.1651921624.99
40961024320.90.1184022267.16
768768320.70.040293936.855
768768320.750.0341171106.45
768768320.80.0275281371.27
768768320.850.021131786.51
768768320.90.0150692505.01
7683072320.70.218765690.215
7683072320.750.180921834.592
7683072320.80.1427781057.55
7683072320.850.0976921545.62
7683072320.90.0620032435.27
3072756320.70.174101853.73
3072756320.750.1483691001.79
3072756320.80.1235541203
3072756320.850.0923661609.21
3072756320.90.0591282513.78
256256480.70.007447844.786
256256480.750.006572957.332
256256480.80.0057021103.43
256256480.850.0048311302.3
256256480.90.0036741712.4
2561024480.70.0247061018.6
2561024480.750.0207461213.03
2561024480.80.0169211487.22
2561024480.850.0129941936.79
2561024480.90.0090382784.57
1024256480.70.027588912.197
1024256480.750.0240421046.77
1024256480.80.0207881210.62
1024256480.850.0177871414.82
1024256480.90.0140131795.91
10241024480.70.0994571012.13
10241024480.750.0819731228.01
10241024480.80.0660571523.88
10241024480.850.0497082025.1
10241024480.90.0341912944.13
10244096480.70.500698804.184
10244096480.750.420619957.287
10244096480.80.3375291192.94
10244096480.850.2512211602.79
10244096480.90.1586682537.72
40961024480.70.412912975.155
40961024480.750.3463691162.5
40961024480.80.28311422.3
40961024480.850.2205851825.39
40961024480.90.1561682578.33
768768480.70.053841051.7
768768480.750.0455071244.28
768768480.80.0369781531.26
768768480.850.0284831987.96
768768480.90.0200462824.7
7683072480.70.283445799.069
7683072480.750.236101959.302
7683072480.80.1887451199.99
7683072480.850.1355321671.14
7683072480.90.0845542678.66
3072756480.70.227976977.971
3072756480.750.194631145.53
3072756480.80.1612511382.65
3072756480.850.1235851804.05
3072756480.90.0840322653.19
256256640.70.009405891.898
256256640.750.008221020.53
256256640.80.0070131196.08
256256640.850.0057991446.56
256256640.90.0046471805.15
2561024640.70.033634997.629
2561024640.750.0276091215.36
2561024640.80.0224431495.09
2561024640.850.0170811964.47
2561024640.90.0117542854.84
1024256640.70.03586935.698
1024256640.750.0310081082.11
1024256640.80.0260491288.15
1024256640.850.0215161559.49
1024256640.90.0170111972.57
10241024640.70.1332951006.92
10241024640.750.1111861207.14
10241024640.80.0881111523.27
10241024640.850.0665522016.74
10241024640.90.0454422953.63
10244096640.70.611718877.645
10244096640.750.5333291006.64
10244096640.80.4302881247.7
10244096640.850.3071411747.96
10244096640.90.1989872698.02
40961024640.70.556249965.163
40961024640.750.4627371160.21
40961024640.80.375371430.24
40961024640.850.283211895.66
40961024640.90.2039012633
768768640.70.0716361053.91
768768640.750.0603521250.96
768768640.80.049011540.46
768768640.850.0375682009.61
768768640.90.0267482822.51
7683072640.70.3572845.437
7683072640.750.2995611008.11
7683072640.80.2287521320.16
7683072640.850.1656481823.08
7683072640.90.1056562858.23
3072756640.70.298907994.529
3072756640.750.2555981163.04
3072756640.80.208241427.55
3072756640.850.1599111858.98
3072756640.90.1112112673.04
256256800.70.0153685.358
256256800.750.013145797.724
256256800.80.010967956.132
256256800.850.0092111138.39
256256800.90.0075541388.1
2561024800.70.055308758.351
2561024800.750.045848914.826
2561024800.80.0367261142.06
2561024800.850.0275351523.26
2561024800.90.0184642271.61
1024256800.70.059788701.527
1024256800.750.051022822.062
1024256800.80.042164994.75
1024256800.850.0333441257.91
1024256800.90.0296391415.15
10241024800.70.22241754.337
10241024800.750.18324915.587
10241024800.80.1453221154.49
10241024800.850.10831549.15
10241024800.90.0728842301.9
10244096800.71.11793600.298
10244096800.750.929398722.068
10244096800.80.720963930.823
10244096800.850.5100391315.76
10244096800.90.3365191994.21
40961024800.70.909941737.508
40961024800.750.753487890.644
40961024800.80.6064771106.54
40961024800.850.4599151459.16
40961024800.90.3034552211.49
768768800.70.123429764.586
768768800.750.103035915.923
768768800.80.0825031143.86
768768800.850.0625581508.55
768768800.90.0426942210.42
7683072800.70.60813620.734
7683072800.750.483053781.462
7683072800.80.3573481056.36
7683072800.850.2567151470.45
7683072800.90.1691192232.09
3072756800.70.503341738.245
3072756800.750.42134881.923
3072756800.80.3428431083.85
3072756800.850.2551991456.08
3072756800.90.1740912134.45
256256960.70.014267881.979
256256960.750.0123971015.03
256256960.80.0105871188.58
256256960.850.0086081461.71
256256960.90.0065541919.99
2561024960.70.053254945.119
2561024960.750.0447551124.61
2561024960.80.0348391444.7
2561024960.850.0266891885.89
2561024960.90.0177432836.69
1024256960.70.054911916.608
1024256960.750.0475551058.39
1024256960.80.0401661253.1
1024256960.850.0326951539.42
1024256960.90.0250322010.7
10241024960.70.220947911.2
10241024960.750.1783241128.99
10241024960.80.1373611465.68
10241024960.850.1031261952.25
10241024960.90.0701432870.23
10244096960.71.06648755.11
10244096960.750.893959900.831
10244096960.80.7188931120.2
10244096960.850.5374051498.51
10244096960.90.2806862869.06
40961024960.70.902588892.219
40961024960.750.7306781102.14
40961024960.80.5828711381.62
40961024960.850.4441331813.21
40961024960.90.2984892697.94
768768960.70.1162974.581
768768960.750.0955951184.64
768768960.80.0781091449.85
768768960.850.0596191899.51
768768960.90.0414052735.1
7683072960.70.596568759.318
7683072960.750.497127911.205
7683072960.80.3966341142.07
7683072960.850.2869731578.49
7683072960.90.1674732704.83
3072756960.70.472466943.787
3072756960.750.3910551140.27
3072756960.80.3198651394.05
3072756960.850.2474911801.71
3072756960.90.1698742624.93
2562561120.70.021379686.67
2562561120.750.018228805.358
2562561120.80.015207965.36
2562561120.850.0120531218.01
2562561120.90.0103961412.07
25610241120.70.077159761.028
25610241120.750.06424914.074
25610241120.80.0514671140.93
25610241120.850.0387891513.85
25610241120.90.0256872285.98
10242561120.70.083622702.211
10242561120.750.071292823.664
10242561120.80.058915996.697
10242561120.850.0484071213.06
10242561120.90.0411331427.57
102410241120.70.311691753.569
102410241120.750.256635915.232
102410241120.80.2049731145.91
102410241120.850.1530861534.31
102410241120.90.1012062320.83
102440961120.71.59625588.583
102440961120.751.33246705.106
102440961120.81.03849904.703
102440961120.850.7389391271.45
102440961120.90.4803081956.09
409610241120.71.26056745.324
409610241120.751.04874895.862
409610241120.80.8429361114.59
409610241120.850.6355541478.28
409610241120.90.421452229.27
7687681120.70.174215758.375
7687681120.750.145836905.956
7687681120.80.1167011132.13
7687681120.850.0876731506.98
7687681120.90.05952220.51
76830721120.70.883258598.333
76830721120.750.711146743.142
76830721120.80.543634972.129
76830721120.850.4024061313.3
76830721120.90.2669451979.74
30727561120.70.70404738.913
30727561120.750.592043878.695
30727561120.80.4754441094.19
30727561120.850.3589321449.37
30727561120.90.2407392160.95
2562561280.70.018438909.907
2562561280.750.0158711057.07
2562561280.80.0135041242.39
2562561280.850.0109931526.22
2562561280.90.008262031.1
25610241280.70.0612721095.26
25610241280.750.0609621100.83
25610241280.80.0467291436.13
25610241280.850.0340361971.69
25610241280.90.0231282901.6
10242561280.70.071725935.645
10242561280.750.0616921087.8
10242561280.80.052161286.61
10242561280.850.0423271585.5
10242561280.90.0321112089.91
102410241280.70.286461937.076
102410241280.750.2425121106.89
102410241280.80.1817831476.68
102410241280.850.1390211930.9
102410241280.90.0870123085.04
102440961280.71.36322787.652
102440961280.751.08991985.163
102440961280.80.8801851219.91
102440961280.850.6945331545.99
102440961280.90.3663282931.09
409610241280.71.15748927.652
409610241280.750.9378241144.93
409610241280.80.7453571440.57
409610241280.850.5580611924.06
409610241280.90.3913872743.43
7687681280.70.157349959.616
7687681280.750.1298061163.23
7687681280.80.1030831464.78
7687681280.850.0784421924.93
7687681280.90.0536392815.02
76830721280.70.767569786.874
76830721280.750.641585941.387
76830721280.80.4885581236.25
76830721280.850.3556511698.24
76830721280.90.2309452615.25
30727561280.70.611059972.97
30727561280.750.5141081156.45
30727561280.80.4165711427.23
30727561280.850.3180941869.08
30727561280.90.2250232642.14
2562562560.70.037509894.564
2562562560.750.031761056.49
2562562560.80.0261891281.25
2562562560.850.0211151589.12
2562562560.90.0158952110.99
25610242560.70.141333949.658
25610242560.750.1336471004.27
25610242560.80.11342.18
25610242560.850.0710331889.52
25610242560.90.0429923121.95
10242562560.70.147474910.11
10242562560.750.1253441070.79
10242562560.80.1042941286.92
10242562560.850.0849771579.47
10242562560.90.0624732148.41
102410242560.70.701639765.167
102410242560.750.5346151004.22
102410242560.80.3597911492.17
102410242560.850.26122055.4
102410242560.90.1831932930.62
102440962560.72.92067735.271
102440962560.752.4402880.046
102440962560.81.691861269.3
102440962560.851.174361828.64
102440962560.90.8801792439.83
409610242560.72.62123819.264
409610242560.752.14905999.271
409610242560.81.642531307.43
409610242560.851.14131881.61
409610242560.90.7714392783.74
7687682560.70.369429817.451
7687682560.750.2882361047.72
7687682560.80.2095261441.3
7687682560.850.1481152038.89
7687682560.90.1060892846.58
76830722560.71.53204788.465
76830722560.751.28593939.368
76830722560.81.08521113.12
76830722560.850.6853821762.46
76830722560.90.4083572958.09
30727562560.71.4521818.87
30727562560.751.134661047.97
30727562560.80.8539161392.51
30727562560.850.6426721850.22
30727562560.90.4464422663.47
2562563840.70.057419876.569
2562563840.750.0460811092.24
2562563840.80.0389361292.69
2562563840.850.0312791609.12
2562563840.90.023452146.36
25610243840.70.204966982.242
25610243840.750.173641159.45
25610243840.80.1410971426.86
25610243840.850.1051921913.89
25610243840.90.0677772970.44
10242563840.70.21269946.573
10242563840.750.1915641050.96
10242563840.80.1563681287.52
10242563840.850.12861565.52
10242563840.90.0933572156.53
102410243840.70.84983947.609
102410243840.750.7897771019.66
102410243840.80.5981681346.29
102410243840.850.4334921857.72
102410243840.90.2695042988.11
102440963840.73.74845859.348
102440963840.753.96945811.505
102440963840.82.770821162.55
102440963840.851.764071826.02
102440963840.91.097192935.88
409610243840.73.73162863.224
409610243840.753.002411072.88
409610243840.82.390241347.66
409610243840.851.773171816.65
409610243840.91.195362694.79
7687683840.70.517278875.708
7687683840.750.3822081185.18
7687683840.80.3178371425.21
7687683840.850.2332311942.22
7687683840.90.1529252962.14
76830723840.72.44309741.658
76830723840.751.624091115.66
76830723840.81.649631098.39
76830723840.851.039231743.55
76830723840.90.6250892898.69
30727563840.72.07006861.632
30727563840.751.642471085.94
30727563840.81.333861337.2
30727563840.850.9949351792.71
30727563840.90.683832608.29
2562565120.70.071358940.456
2562565120.750.0612461095.73
2562565120.80.0516791298.58
2562565120.850.0414831617.73
2562565120.90.0309892165.6
25610245120.70.282241951.085
25610245120.750.2388551123.84
25610245120.80.1937131385.74
25610245120.850.1467811828.81
25610245120.90.0938962858.86
10242565120.70.286141938.122
10242565120.750.2571381043.94
10242565120.80.2142081253.16
10242565120.850.1675751601.88
10242565120.90.1240722163.54
102410245120.71.21431884.237
102410245120.750.9552511124.04
102410245120.80.7752941384.95
102410245120.850.632821696.76
102410245120.90.373332876.12
102440965120.74.87335881.317
102440965120.753.650591176.51
102440965120.82.947331457.24
102440965120.852.622411637.79
102440965120.91.477522906.88
409610245120.75.75786745.931
409610245120.754.83024889.184
409610245120.83.874291108.58
409610245120.852.876851492.94
409610245120.91.753282449.68
7687685120.70.718776840.289
7687685120.750.5613171076.01
7687685120.80.4291661407.33
7687685120.850.3172041904.07
7687685120.90.2085552896.02
76830725120.72.94057821.581
76830725120.752.304581048.31
76830725120.81.67021446.49
76830725120.851.257991920.47
76830725120.90.841212871.96
30727565120.73.34423711.127
30727565120.752.77444857.17
30727565120.82.100491132.2
30727565120.851.525091559.37
30727565120.90.9260382568.11
+ + +## Multi Thread (Thread = 4) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MKNSparsityLatency (ms)Gflops
256256160.70.002031032.94
256256160.750.0019811058.84
256256160.80.0017941169.1
256256160.850.0016371280.99
256256160.90.001711226.24
2561024160.70.0039582119.5
2561024160.750.0034052463.96
2561024160.80.0030862718.43
2561024160.850.0029272866.1
2561024160.90.0021793849.95
1024256160.70.0043711918.96
1024256160.750.0041822005.83
1024256160.80.0034482433
1024256160.850.002912882.63
1024256160.90.0029372856.46
10241024160.70.0120952774.32
10241024160.750.0103383245.84
10241024160.80.0085083943.97
10241024160.850.0065955087.77
10241024160.90.0048476923.16
10244096160.70.0524512558.92
10244096160.750.0428743130.54
10244096160.80.0323644147.11
10244096160.850.023125805.3
10244096160.90.015228818.59
40961024160.70.0451232974.5
40961024160.750.0373823590.4
40961024160.80.0302754433.35
40961024160.850.0231825789.74
40961024160.90.016178300.6
768768160.70.0073142580.48
768768160.750.0063322980.89
768768160.80.0053593522.29
768768160.850.0044934200.52
768768160.90.0034185521.87
7683072160.70.0270032795.91
7683072160.750.021973436.39
7683072160.80.0177484253.93
7683072160.850.0131345748.48
7683072160.90.0089748412.5
3072756160.70.0257192889.63
3072756160.750.0218973393.99
3072756160.80.0180934107.48
3072756160.850.0137425408.06
3072756160.90.0099247488.55
256256320.70.0028911450.74
256256320.750.0024581706.24
256256320.80.0025231662.22
256256320.850.0022661851.32
256256320.90.0018032326.09
2561024320.70.0057892898.1
2561024320.750.0050043353.02
2561024320.80.004243956.8
2561024320.850.0035054786.85
2561024320.90.0029725644.46
1024256320.70.0069332420.03
1024256320.750.0059312828.91
1024256320.80.0052673185.37
1024256320.850.0048013494.54
1024256320.90.0038514356.24
10241024320.70.0193523467.87
10241024320.750.0166254036.54
10241024320.80.0132875050.74
10241024320.850.0106296313.59
10241024320.90.0075918841.1
10244096320.70.0977262746.83
10244096320.750.0813273300.69
10244096320.80.0656324090
10244096320.850.0476465633.92
10244096320.90.0295369088.36
40961024320.70.0736193646.28
40961024320.750.0603794445.85
40961024320.80.0489945478.97
40961024320.850.0369927256.58
40961024320.90.0258810372.2
768768320.70.0116593237.75
768768320.750.0097913855.4
768768320.80.0081184650.12
768768320.850.0066795652.1
768768320.90.0049397643.6
7683072320.70.0537772807.81
7683072320.750.0459143288.68
7683072320.80.035884208.33
7683072320.850.025295970.53
7683072320.90.0167429018.97
3072756320.70.0407053651.53
3072756320.750.0342914334.57
3072756320.80.0285685202.87
3072756320.850.021876796.25
3072756320.90.0161869183
256256480.70.0031551994.06
256256480.750.0029522131.62
256256480.80.0026552369.77
256256480.850.0024362582.9
256256480.90.0022362813.73
2561024480.70.007453377.93
2561024480.750.0065653833.08
2561024480.80.0054264638.05
2561024480.850.0042565912.38
2561024480.90.0034667261.4
1024256480.70.0083593010.51
1024256480.750.007423391.82
1024256480.80.0065353850.81
1024256480.850.0055594526.9
1024256480.90.0048815156.31
10241024480.70.0257263912.87
10241024480.750.0217474628.88
10241024480.80.0176515703.16
10241024480.850.0137127341.14
10241024480.90.00980110270.4
10244096480.70.1247493227.71
10244096480.750.1033853894.71
10244096480.80.0832194838.46
10244096480.850.0626016432.03
10244096480.90.03979910117.1
40961024480.70.1010713983.87
40961024480.750.0825674876.71
40961024480.80.0668526023.1
40961024480.850.0487738255.7
40961024480.90.03541911368.2
768768480.70.0147723833.19
768768480.750.012784430.65
768768480.80.0105325376.08
768768480.850.0086986509.93
768768480.90.0064368798
7683072480.70.0698873240.85
7683072480.750.0588583848.1
7683072480.80.0473414784.26
7683072480.850.0346146543.34
7683072480.90.02229510159
3072756480.70.0538154142.99
3072756480.750.0454584904.58
3072756480.80.0373585968.05
3072756480.850.029147651.21
3072756480.90.0208910672.7
256256640.70.0039362131.31
256256640.750.0033222525.07
256256640.80.0031732643.78
256256640.850.0026913117.13
256256640.90.0023583557.32
2561024640.70.0093823576.47
2561024640.750.0082894047.86
2561024640.80.0067244990.61
2561024640.850.0051486518.36
2561024640.90.0040618262.2
1024256640.70.0107123132.55
1024256640.750.0092033646.22
1024256640.80.0079434224.63
1024256640.850.0069814806.38
1024256640.90.005615980.91
10241024640.70.0345673882.83
10241024640.750.0289724632.68
10241024640.80.0232585770.71
10241024640.850.0179777466.26
10241024640.90.01281410474
10244096640.70.1581623394.43
10244096640.750.1325524050.27
10244096640.80.1018445271.51
10244096640.850.0809366633.28
10244096640.90.0530110127.8
40961024640.70.1358393952.25
40961024640.750.111164829.72
40961024640.80.0899515968.46
40961024640.850.0633028481.15
40961024640.90.04674611484.8
768768640.70.0192633919.34
768768640.750.0164594586.97
768768640.80.0135355578.04
768768640.850.0107717009.2
768768640.90.0076119919.29
7683072640.70.0854883532.56
7683072640.750.0747274041.24
7683072640.80.0604764993.56
7683072640.850.0417967225.26
7683072640.90.02754710962.8
3072756640.70.0713824164.51
3072756640.750.0600774948.16
3072756640.80.0496585986.36
3072756640.850.0381397794.53
3072756640.90.02702510999.9
256256800.70.0050252086.83
256256800.750.0045192320.6
256256800.80.0039682642.46
256256800.850.003393093.49
256256800.90.0031533325.19
2561024800.70.0147952834.92
2561024800.750.0126233322.87
2561024800.80.0102694084.43
2561024800.850.0078595337.09
2561024800.90.0057037354.65
1024256800.70.0161692594.02
1024256800.750.0144482902.97
1024256800.80.0117053583.33
1024256800.850.0101494132.84
1024256800.90.0087934769.87
10241024800.70.056032994.34
10241024800.750.04683584.89
10241024800.80.037444481.13
10241024800.850.0281665956.55
10241024800.90.0192598711.21
10244096800.70.2778152415.59
10244096800.750.2298372919.85
10244096800.80.17873755.4
10244096800.850.1265455303.18
10244096800.90.0835328033.9
40961024800.70.2249252983.61
40961024800.750.1842563642.16
40961024800.80.1464524582.3
40961024800.850.1088996162.5
40961024800.90.0740669060.75
768768800.70.0319462954.15
768768800.750.0268143519.55
768768800.80.0217064347.69
768768800.850.0166675662.34
768768800.90.0116478102.93
7683072800.70.1505492507.4
7683072800.750.1199513147.02
7683072800.80.0893094226.74
7683072800.850.0637235923.92
7683072800.90.0427848823.11
3072756800.70.1224213035.33
3072756800.750.1022443634.35
3072756800.80.0822014520.51
3072756800.850.0622485969.46
3072756800.90.0429448652.96
256256960.70.0048112615.59
256256960.750.0044382835.29
256256960.80.0038993227.52
256256960.850.0034383659.72
256256960.90.0029784225.52
2561024960.70.0145763453.11
2561024960.750.0120784167.21
2561024960.80.0089635615.31
2561024960.850.0072766917.06
2561024960.90.0054859176.61
1024256960.70.0150623341.54
1024256960.750.0132433800.69
1024256960.80.0114434398.48
1024256960.850.0092885418.82
1024256960.90.0074466759.98
10241024960.70.0543983701.01
10241024960.750.0445694517.16
10241024960.80.0363335541.17
10241024960.850.0270497442.97
10241024960.90.01874210742.3
10244096960.70.2673673011.99
10244096960.750.2236153601.31
10244096960.80.17974481.38
10244096960.850.1347165977.82
10244096960.90.06927511624.7
40961024960.70.2226613616.73
40961024960.750.1795254485.75
40961024960.80.1392985781.18
40961024960.850.0958798399.22
40961024960.90.1762924568.02
768768960.70.0700281617.15
768768960.750.0258344383.59
768768960.80.020755457.53
768768960.850.0161287021.85
768768960.90.0116019761.93
7683072960.70.1485123050.15
7683072960.750.1242383646.1
7683072960.80.099534551.22
7683072960.850.0714326341.45
7683072960.90.04415410259.2
3072756960.70.1131743940.02
3072756960.750.0961664636.84
3072756960.80.0765845822.45
3072756960.850.0592917520.62
3072756960.90.04166110703.1
2562561120.70.0065572239
2562561120.750.0058482510.44
2562561120.80.0050212923.66
2562561120.850.0046633148.41
2562561120.90.0039333732.77
25610241120.70.0203922879.53
25610241120.750.0171023433.58
25610241120.80.0137034285.32
25610241120.850.0109535361.24
25610241120.90.0074967833.17
10242561120.70.0221322653.19
10242561120.750.0193763030.55
10242561120.80.0158773698.38
10242561120.850.0134374370.15
10242561120.90.0116375046.22
102410241120.70.0780753008.4
102410241120.750.0651553604.95
102410241120.80.0521954500.11
102410241120.850.039415959.92
102410241120.90.0262718940.83
102440961120.70.3985092357.6
102440961120.750.3321532828.58
102440961120.80.2590513626.8
102440961120.850.1844125094.69
102440961120.90.1196787850.42
409610241120.70.3136562995.4
409610241120.750.2579893641.72
409610241120.80.2060494559.71
409610241120.850.1528486146.79
409610241120.90.1031259110.51
7687681120.70.0444862969.93
7687681120.750.0373343538.88
7687681120.80.0302294370.61
7687681120.850.0231985695.36
7687681120.90.015878325.1
76830721120.70.2197772404.63
76830721120.750.177642975.03
76830721120.80.1358643889.79
76830721120.850.1004245262.52
76830721120.90.0677557799.93
30727561120.70.1723563018.32
30727561120.750.1442313606.89
30727561120.80.1160194483.96
30727561120.850.0873885953.04
30727561120.90.05968728.54
2562561280.70.0060972751.74
2562561280.750.0051763241.12
2562561280.80.004493736.94
2562561280.850.0039494248.73
2562561280.90.0031755284.78
25610241280.70.0204833276.3
25610241280.750.0137694873.87
25610241280.80.0114195876.74
25610241280.850.0089337512.43
25610241280.90.00653310272.1
10242561280.70.0192413487.82
10242561280.750.0168353986.32
10242561280.80.0142344714.59
10242561280.850.0119465617.9
10242561280.90.0091067369.36
102410241280.70.078113436.63
102410241280.750.0614814366.18
102410241280.80.0462015810.17
102410241280.850.0357027518.72
102410241280.90.0222112086.1
102440961280.70.3269173284.45
102440961280.750.2733633927.89
102440961280.80.2209944858.7
102440961280.850.1661536462.38
102440961280.90.0854412567.2
409610241280.70.3113483448.68
409610241280.750.2430124418.47
409610241280.80.1879335713.44
409610241280.850.1268198466.75
409610241280.90.08556912548.3
7687681280.70.0404783730.31
7687681280.750.032974579.78
7687681280.80.0265275692.09
7687681280.850.0207827265.85
7687681280.90.01390410860
76830721280.70.1830033300.39
76830721280.750.1614433741.12
76830721280.80.1297674654.33
76830721280.850.0893356760.88
76830721280.90.0610689890.22
30727561280.70.155253829.58
30727561280.750.1257724727.15
30727561280.80.1014345861.35
30727561280.850.0765087770.96
30727561280.90.05373211064.9
2562562560.70.0104213219.91
2562562560.750.0090823694.63
2562562560.80.0078134294.82
2562562560.850.0065115153.44
2562562560.90.0051726487.84
25610242560.70.0321384176.33
25610242560.750.026994972.79
25610242560.80.0220016100.42
25610242560.850.0169017941.41
25610242560.90.01200411180.9
10242562560.70.037383590.61
10242562560.750.031494262.3
10242562560.80.0266735031.96
10242562560.850.0217826161.81
10242562560.90.0168227978.77
102410242560.70.1760313049.86
102410242560.750.1344423993.33
102410242560.80.0843786362.72
102410242560.850.0640588381.05
102410242560.90.04378712261
102440962560.70.6978763077.17
102440962560.750.6153383489.92
102440962560.80.3501476133.09
102440962560.850.2672898034.33
102440962560.90.17969511950.7
409610242560.70.7032073053.84
409610242560.750.5618053822.47
409610242560.80.4230525076.17
409610242560.850.2548358426.97
409610242560.90.17143412526.6
7687682560.70.0933483235.09
7687682560.750.0725634161.75
7687682560.80.053925600.73
7687682560.850.0376128029.1
7687682560.90.02631411476.5
76830722560.70.4073882965.13
76830722560.750.3409393543.04
76830722560.80.2610284627.69
76830722560.850.1516497965.49
76830722560.90.10337911684.7
30727562560.70.3587693314.35
30727562560.750.2830464201.04
30727562560.80.2121525604.88
30727562560.850.1585027502
30727562560.90.10751411059.8
2562563840.70.0148333393.25
2562563840.750.0129123898.17
2562563840.80.0110544553.38
2562563840.850.0092535439.63
2562563840.90.007177019.88
25610243840.70.0526613823.07
25610243840.750.0447284501.11
25610243840.80.0365835503.22
25610243840.850.0274877324.31
25610243840.90.0183910947.9
10242563840.70.0556653616.74
10242563840.750.0465884321.41
10242563840.80.0394115108.38
10242563840.850.0318756316.23
10242563840.90.0247068148.88
102410243840.70.2633163058.33
102410243840.750.1983034061
102410243840.80.1419665672.54
102410243840.850.1058497608.09
102410243840.90.06878711707.3
102440963840.70.7938224057.87
102440963840.750.9969793230.99
102440963840.80.542885933.58
102440963840.850.4121747815.21
102440963840.90.27444111737.4
409610243840.71.034743113.09
409610243840.750.8327013868.41
409610243840.80.5647875703.43
409610243840.850.4217817637.2
409610243840.90.27158711860.8
7687683840.70.1308343462.28
7687683840.750.0943314802.11
7687683840.80.0760125959.36
7687683840.850.0580477803.72
7687683840.90.03953411458
76830723840.70.6357292850.17
76830723840.750.3798994769.52
76830723840.80.3079085884.68
76830723840.850.2330577774.65
76830723840.90.15751811503.1
30727563840.70.5182763441.47
30727563840.750.4225174221.44
30727563840.80.3234765513.94
30727563840.850.2472517213.84
30727563840.90.16336910917.8
2562565120.70.0192313489.57
2562565120.750.0168153991.09
2562565120.80.0143274683.96
2562565120.850.0119325624.27
2562565120.90.009157334.5
25610245120.70.0723143712.06
25610245120.750.061384373.31
25610245120.80.0500215366.46
25610245120.850.0376957121.28
25610245120.90.0249410763.4
10242565120.70.0796633369.65
10242565120.750.061674352.8
10242565120.80.0521335149.09
10242565120.850.0421886362.85
10242565120.90.0325748240.75
102410245120.70.3805822821.32
102410245120.750.240384466.86
102410245120.80.1952715498.73
102410245120.850.1469357307.58
102410245120.90.09483211322.6
102440965120.71.087943947.8
102440965120.750.9136264701.01
102440965120.80.7379435820.19
102440965120.850.5584737690.55
102440965120.90.36946611624.8
409610245120.71.510092844.18
409610245120.751.269743382.55
409610245120.80.7782445518.79
409610245120.850.5898037282.03
409610245120.90.37528411444.6
7687685120.70.1607053758.31
7687685120.750.1354954457.57
7687685120.80.1087185555.46
7687685120.850.081127445.53
7687685120.90.05335111320.8
76830725120.70.6141093934.03
76830725120.750.5186144658.42
76830725120.80.4184915772.93
76830725120.850.315927647.25
76830725120.90.21189311401.6
30727565120.70.8366162842.61
30727565120.750.6995793399.43
30727565120.80.5441944370.08
30727565120.850.3796996263.3
30727565120.90.22804310428.6
+ +## platform configuration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ManufacturerAmazon EC2
Product Namec6i.12xlarge
BIOS Version1
OSUbuntu   20.04.3 LTS
Kernel5.15.0-1021-aws
Microcode0xd000331
IRQ BalanceDisabled
CPU ModelIntel(R) Xeon Platinum 8375C CPU @ 2.90GHz
Base Frequency2.9GHz
Maximum Frequency3.9GHz
All-core Maximum Frequency3.5GHz
CPU(s)48
Thread(s) per CoreN/A
Core(s) per Socket24
Socket(s)1
NUMA Node(s)1
TurboEnabled
FrequencyGovernerDefault
Max C-State9
\ No newline at end of file diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/amx_utils.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/amx_utils.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/amx_utils.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/amx_utils.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/cpu_engine.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/cpu_engine.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/cpu_engine.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/cpu_engine.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/cpu_isa.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/cpu_isa.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/cpu_isa.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/cpu_isa.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/engine.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/engine.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/engine.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/engine.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/engine_factory.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/engine_factory.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/engine_factory.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/engine_factory.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/impl_list_item.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/impl_list_item.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/impl_list_item.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/impl_list_item.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/interface.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/interface.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/interface.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/interface.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_amx_configure.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_amx_configure.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_amx_configure.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_amx_configure.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_binary_injector.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_binary_injector.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_binary_injector.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_binary_injector.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_eltwise_injector.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_eltwise_injector.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_eltwise_injector.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_eltwise_injector.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_eltwiseop.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_eltwiseop.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_eltwiseop.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_eltwiseop.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_gather.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_gather.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_gather.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_gather.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_layernorm_ba.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_layernorm_ba.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_layernorm_ba.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_layernorm_ba.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_matmul_avx512f_p2031_p2013.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_matmul_avx512f_p2031_p2013.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_matmul_avx512f_p2031_p2013.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_matmul_avx512f_p2031_p2013.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_softmax.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_softmax.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_softmax.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_softmax.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_spmm_amx_bf16_x16.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_spmm_amx_bf16_x16.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_spmm_amx_bf16_x16.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_spmm_amx_bf16_x16.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_spmm_avx512f.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_spmm_avx512f.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_spmm_avx512f.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_spmm_avx512f.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_spmm_vnni.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_spmm_vnni.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_spmm_vnni.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_spmm_vnni.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_trans_cpy_nx8_4b.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_trans_cpy_nx8_4b.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_domain/jit_trans_cpy_nx8_4b.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_domain/jit_trans_cpy_nx8_4b.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/jit_generator.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_generator.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/jit_generator.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/jit_generator.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernel.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernel.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernel_cache.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel_cache.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernel_cache.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel_cache.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernel_desc.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel_desc.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernel_desc.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel_desc.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernel_hashing.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel_hashing.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernel_hashing.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernel_hashing.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/attention.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/attention.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/attention.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/attention.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/eltwiseop.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/eltwiseop.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/eltwiseop.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/eltwiseop.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/eltwiseop_ref.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/eltwiseop_ref.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/eltwiseop_ref.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/eltwiseop_ref.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/eltwiseop_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/eltwiseop_types.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/eltwiseop_types.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/eltwiseop_types.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/gather.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/gather.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/gather.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/gather.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/gather_ref.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/gather_ref.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/gather_ref.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/gather_ref.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/gather_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/gather_types.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/gather_types.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/gather_types.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/layernorm_ba.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/layernorm_ba.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/layernorm_ba.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/layernorm_ba.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/layernorm_ba_ref.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/layernorm_ba_ref.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/layernorm_ba_ref.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/layernorm_ba_ref.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/layernorm_ba_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/layernorm_ba_types.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/layernorm_ba_types.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/layernorm_ba_types.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_avx512f_p2031_p2013.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_avx512f_p2031_p2013.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_avx512f_p2031_p2013.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_avx512f_p2031_p2013.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_ref.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_ref.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_ref.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_ref.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_types.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_types.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_types.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_vnni_noperm_p2031_p1302.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_vnni_noperm_p2031_p1302.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/matmul_vnni_noperm_p2031_p1302.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/matmul_vnni_noperm_p2031_p1302.hpp diff --git a/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/postop_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/postop_types.hpp new file mode 100644 index 00000000000..32587a5371d --- /dev/null +++ b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/postop_types.hpp @@ -0,0 +1,43 @@ +// Copyright (c) 2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ENGINE_SPARSELIB_INCLUDE_KERNELS_GATHER_TYPES_HPP_ +#define ENGINE_SPARSELIB_INCLUDE_KERNELS_GATHER_TYPES_HPP_ + +#include +#include "param_types.hpp" + +namespace jd { +namespace ssd { +struct gather_param_t { + data_type dt; + int dt_size; + int loops, remain; + int64_t mask, extend_mask; + int src_axis_size, dst_axis_size; + int outer_size, inner_size; + std::vector binaryop_attrs; + std::vector binary_ts_sizes; +}; + +struct gather_data_t { + void* src; + void* idx; + void* dst; + void* binaryop_addrs[16]; +}; + +} // namespace ssd +} // namespace jd +#endif // ENGINE_SPARSELIB_INCLUDE_KERNELS_GATHER_TYPES_HPP_ diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/softmax.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/softmax.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/softmax.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/softmax.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/softmax_ref.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/softmax_ref.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/softmax_ref.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/softmax_ref.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/softmax_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/softmax_types.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/softmax_types.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/softmax_types.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/sparse_data.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/sparse_data.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/sparse_data.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/sparse_data.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_amx_bf16_x16.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_amx_bf16_x16.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_amx_bf16_x16.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_amx_bf16_x16.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_avx512f.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_avx512f.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_avx512f.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_avx512f.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_ref.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_ref.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_ref.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_ref.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_types.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_types.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_types.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_vnni.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_vnni.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/kernels/spmm_vnni.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/kernels/spmm_vnni.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/operator_desc.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/operator_desc.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/operator_desc.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/operator_desc.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/param_types.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/param_types.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/param_types.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/param_types.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/tensor_desc.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/tensor_desc.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/tensor_desc.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/tensor_desc.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/utils.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/utils.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/utils.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/utils.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/verbose.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/verbose.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/verbose.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/verbose.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/include/vtune_wrapper.hpp b/intel_extension_for_transformers/backends/neural_engine/kernels/include/vtune_wrapper.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/include/vtune_wrapper.hpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/include/vtune_wrapper.hpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/amx_utils.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/amx_utils.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/amx_utils.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/amx_utils.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/cpu_engine.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/cpu_engine.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/cpu_engine.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/cpu_engine.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/cpu_isa.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/cpu_isa.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/cpu_isa.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/cpu_isa.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/interface.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/interface.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/interface.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/interface.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_amx_configure.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_amx_configure.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_amx_configure.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_amx_configure.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_binary_injector.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_binary_injector.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_binary_injector.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_binary_injector.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_eltwise_injector.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_eltwise_injector.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_eltwise_injector.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_eltwise_injector.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_eltwiseop.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_eltwiseop.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_eltwiseop.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_eltwiseop.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_gather.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_gather.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_gather.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_gather.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_layernorm_ba.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_layernorm_ba.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_layernorm_ba.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_layernorm_ba.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_matmul_avx512f_p2031_p2013.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_matmul_avx512f_p2031_p2013.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_matmul_avx512f_p2031_p2013.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_matmul_avx512f_p2031_p2013.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_matmul_vnni_Ba4b_Ab4a_ba.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_matmul_vnni_noperm_p2031_p1302.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_softmax.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_softmax.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_softmax.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_softmax.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_spmm_amx_bf16_x16.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_spmm_amx_bf16_x16.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_spmm_amx_bf16_x16.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_spmm_amx_bf16_x16.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_spmm_avx512f.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_spmm_avx512f.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_spmm_avx512f.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_spmm_avx512f.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_spmm_vnni.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_spmm_vnni.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_spmm_vnni.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_spmm_vnni.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_trans_cpy_nx8_4b.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_trans_cpy_nx8_4b.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_domain/jit_trans_cpy_nx8_4b.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_domain/jit_trans_cpy_nx8_4b.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/jit_generator.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_generator.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/jit_generator.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/jit_generator.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernel_cache.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernel_cache.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernel_cache.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernel_cache.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernel_desc.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernel_desc.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernel_desc.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernel_desc.cpp diff --git a/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/CMakeLists.txt new file mode 100644 index 00000000000..affc78ad4a0 --- /dev/null +++ b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/CMakeLists.txt @@ -0,0 +1,3 @@ +file(GLOB OP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +set(SOURCES ${SOURCES} ${OP_SOURCES} PARENT_SCOPE) + diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/attention.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/attention.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/attention.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/attention.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_attention_list.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_attention_list.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_attention_list.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_attention_list.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_eltwiseop_list.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_eltwiseop_list.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_eltwiseop_list.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_eltwiseop_list.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_gather_list.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_gather_list.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_gather_list.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_gather_list.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_layernorm_ba_list.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_layernorm_ba_list.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_layernorm_ba_list.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_layernorm_ba_list.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_softmax_list.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_softmax_list.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_softmax_list.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_softmax_list.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_sparse_matmul_list.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_sparse_matmul_list.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_sparse_matmul_list.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_sparse_matmul_list.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_transpose_matmul_list.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_transpose_matmul_list.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/cpu_transpose_matmul_list.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/cpu_transpose_matmul_list.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/eltwiseop.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/eltwiseop.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/eltwiseop.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/eltwiseop.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/eltwiseop_ref.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/eltwiseop_ref.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/eltwiseop_ref.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/eltwiseop_ref.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/gather.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/gather.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/gather.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/gather.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/gather_ref.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/gather_ref.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/gather_ref.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/gather_ref.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/layernorm_ba.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/layernorm_ba.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/layernorm_ba.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/layernorm_ba.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/layernorm_ba_ref.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/layernorm_ba_ref.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/layernorm_ba_ref.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/layernorm_ba_ref.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/matmul_avx512f_p2031_p2013.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/matmul_avx512f_p2031_p2013.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/matmul_avx512f_p2031_p2013.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/matmul_avx512f_p2031_p2013.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/matmul_ref.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/matmul_ref.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/matmul_ref.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/matmul_ref.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/matmul_vnni_noperm_p2031_p1302.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/matmul_vnni_noperm_p2031_p1302.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/matmul_vnni_noperm_p2031_p1302.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/matmul_vnni_noperm_p2031_p1302.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/softmax.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/softmax.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/softmax.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/softmax.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/softmax_ref.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/softmax_ref.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/softmax_ref.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/softmax_ref.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/sparse_data.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/sparse_data.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/sparse_data.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/sparse_data.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_amx_bf16_x16.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_amx_bf16_x16.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_amx_bf16_x16.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_amx_bf16_x16.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_avx512f.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_avx512f.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_avx512f.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_avx512f.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_ref.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_ref.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_ref.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_ref.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_vnni.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_vnni.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/kernels/spmm_vnni.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/kernels/spmm_vnni.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/utils.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/utils.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/utils.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/utils.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/verbose.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/verbose.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/verbose.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/verbose.cpp diff --git a/nlp_toolkit/backends/neural_engine/SparseLib/src/vtune_wrapper.cpp b/intel_extension_for_transformers/backends/neural_engine/kernels/src/vtune_wrapper.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/SparseLib/src/vtune_wrapper.cpp rename to intel_extension_for_transformers/backends/neural_engine/kernels/src/vtune_wrapper.cpp diff --git a/intel_extension_for_transformers/backends/neural_engine/oneDNN-THIRD-PARTY-PROGRAMS b/intel_extension_for_transformers/backends/neural_engine/oneDNN-THIRD-PARTY-PROGRAMS new file mode 100644 index 00000000000..36e4c59c685 --- /dev/null +++ b/intel_extension_for_transformers/backends/neural_engine/oneDNN-THIRD-PARTY-PROGRAMS @@ -0,0 +1,583 @@ +oneAPI Deep Neural Network Library (oneDNN) Third Party Programs File + +This file contains the list of third party software ("third party programs") +contained in the Intel software and their required notices and/or license +terms. This third party software, even if included with the distribution of +the Intel software, may be governed by separate license terms, including +without limitation, third party license terms, other Intel software license +terms, and open source software license terms. These separate license terms +govern your use of the third party programs as set forth in in the +"THIRD-PARTY-PROGRAMS" file. + +Third party programs and their corresponding required notices and/or license +terms are listed below. + +-------------------------------------------------------------------------------- +1. XByak (src/cpu/xbyak/) +Copyright (c) 2007 MITSUNARI Shigeo +All rights reserved. + +3-Clause BSD License + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. +Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +Neither the name of the copyright owner nor the names of its contributors may +be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. + + +ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た +す場合に限り、再頒布および使用が許可されます。 + +ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項 +を含めること。 +バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作 +権表示、本条件一覧、および下記免責条項を含めること。 +書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進 +に、著作権者の名前またはコントリビューターの名前を使用してはならない。 +本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ +れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性 +に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。 +著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを +問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で +あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、 +本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の +喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接 +損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、 +一切責任を負わないものとします。 + +-------------------------------------------------------------------------------- +2. Googletest (tests/gtests/gtest/) +Copyright 2005, Google Inc. +Copyright 2006, Google Inc. +Copyright 2007, Google Inc. +Copyright 2008, Google Inc. +Copyright 2015, Google Inc. +All rights reserved. + +3-Clause BSD License + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- +3. Instrumentation and Tracing Technology API (src/common/ittnotify/) +Copyright (c) 2011, Intel Corporation. All rights reserved. +Copyright (c) 2005-2014 Intel Corporation. All rights reserved. + +3-Clause BSD License + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- +4. CMake (cmake/FindOpenCL.cmake, cmake/FindBLAS.cmake, cmake/FindACL.cmake) +CMake - Cross Platform Makefile Generator +Copyright 2000-2020 Kitware, Inc. and Contributors +All rights reserved. + +3-Clause BSD License + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of Kitware, Inc. nor the names of Contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------ + +The following individuals and institutions are among the Contributors: + +* Aaron C. Meadows +* Adriaan de Groot +* Aleksey Avdeev +* Alexander Neundorf +* Alexander Smorkalov +* Alexey Sokolov +* Alex Merry +* Alex Turbov +* Andreas Pakulat +* Andreas Schneider +* André Rigland Brodtkorb +* Axel Huebl, Helmholtz-Zentrum Dresden - Rossendorf +* Benjamin Eikel +* Bjoern Ricks +* Brad Hards +* Christopher Harvey +* Christoph Grüninger +* Clement Creusot +* Daniel Blezek +* Daniel Pfeifer +* Enrico Scholz +* Eran Ifrah +* Esben Mose Hansen, Ange Optimization ApS +* Geoffrey Viola +* Google Inc +* Gregor Jasny +* Helio Chissini de Castro +* Ilya Lavrenov +* Insight Software Consortium +* Jan Woetzel +* Julien Schueller +* Kelly Thompson +* Konstantin Podsvirov +* Laurent Montel +* Mario Bensi +* Martin Gräßlin +* Mathieu Malaterre +* Matthaeus G. Chajdas +* Matthias Kretz +* Matthias Maennich +* Michael Hirsch, Ph.D. +* Michael Stürmer +* Miguel A. Figueroa-Villanueva +* Mike Jackson +* Mike McQuaid +* Nicolas Bock +* Nicolas Despres +* Nikita Krupen'ko +* NVIDIA Corporation +* OpenGamma Ltd. +* Patrick Stotko +* Per Øyvind Karlsen +* Peter Collingbourne +* Petr Gotthard +* Philip Lowman +* Philippe Proulx +* Raffi Enficiaud, Max Planck Society +* Raumfeld +* Roger Leigh +* Rolf Eike Beer +* Roman Donchenko +* Roman Kharitonov +* Ruslan Baratov +* Sebastian Holtermann +* Stephen Kelly +* Sylvain Joubert +* The Qt Company Ltd. +* Thomas Sondergaard +* Tobias Hunger +* Todd Gamblin +* Tristan Carel +* University of Dundee +* Vadim Zhukov +* Will Dicharry + +See version control history for details of individual contributions. + +The above copyright and license notice applies to distributions of +CMake in source and binary form. Third-party software packages supplied +with CMake under compatible licenses provide their own copyright notices +documented in corresponding subdirectories or source files. + +------------------------------------------------------------------------------ + +CMake was initially developed by Kitware with the following sponsorship: + + * National Library of Medicine at the National Institutes of Health + as part of the Insight Segmentation and Registration Toolkit (ITK). + + * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel + Visualization Initiative. + + * National Alliance for Medical Image Computing (NAMIC) is funded by the + National Institutes of Health through the NIH Roadmap for Medical Research, + Grant U54 EB005149. + + * Kitware, Inc. + +-------------------------------------------------------------------------------- +5. Xbyak_aarch64 (src/cpu/aarch64/xbyak_aarch64/) +Copyright 2019-2020 FUJITSU LIMITED + +Apache License, Version 2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + +-------------------------------------------------------------------------------- +6. Boost C++ Libraries (src/common/primitive_hashing.hpp) +Copyright 2005-2014 Daniel James. + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- +7. Intel(R) Graphics Compute Runtime for oneAPI Level Zero and OpenCL(TM) +Driver (src/gpu/jit/ngen/npack/{elf_structs,hash}.hpp) +Copyright (c) 2018 Intel Corporation + +Intel(R) Graphics Compiler (src/gpu/jit/ngen/npack/neo_structs.hpp) +Copyright (c) 2019 Intel Corporation + +oneAPI Level Zero (src/sycl/level_zero) +Copyright (C) 2019-2021 Intel Corporation + +Doxyrest toolkit (doc/doxyrest/*) +Copyright (c) 2016, Tibbo Technology Inc +Copyright (c) 2016, Vladimir Gladkov +Copyright (c) 2016, Doxyrest maintainers + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +8. Sphinx (doc/sphinx/conf/py) +Copyright (c) 2007-2021 by the Sphinx team (see AUTHORS file). +All rights reserved. + +2-Clause BSD License + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- +9. Intel(R) Metrics Discovery Application Programming Interface (src/gpu/ocl/mdapi/metrics_discovery_api.h) +MIT License + +Copyright (c) 2019, Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. \ No newline at end of file diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/gtest/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/main.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/main.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/main.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/main.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_attention.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_attention.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_attention.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_attention.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_eltwiseop_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_eltwiseop_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_eltwiseop_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_eltwiseop_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_gather_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_gather_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_gather_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_gather_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_layernorm_ba_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_layernorm_ba_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_layernorm_ba_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_layernorm_ba_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_matmul_avx512f_p2031_p2013.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_matmul_avx512f_p2031_p2013.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_matmul_avx512f_p2031_p2013.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_matmul_avx512f_p2031_p2013.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_matmul_vnni_noperm_p2031_p1302.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_matmul_vnni_noperm_p2031_p1302.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_matmul_vnni_noperm_p2031_p1302.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_matmul_vnni_noperm_p2031_p1302.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_softmax_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_softmax_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_softmax_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_softmax_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_spmm_amx_bf16_x16_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_spmm_amx_bf16_x16_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_spmm_amx_bf16_x16_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_spmm_amx_bf16_x16_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_spmm_avx512f_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_spmm_avx512f_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_spmm_avx512f_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_spmm_avx512f_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_spmm_vnni_kernel.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_spmm_vnni_kernel.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/test_spmm_vnni_kernel.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/test_spmm_vnni_kernel.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/unit_test_utils.hpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/unit_test_utils.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/SparseLib/unit_test_utils.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/kernels/unit_test_utils.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/main.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/main.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/main.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/main.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_binary_add_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_binary_add_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_binary_add_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_binary_add_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_cast_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_cast_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_cast_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_cast_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_concat_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_concat_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_concat_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_concat_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_convolution_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_convolution_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_convolution_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_convolution_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_dequantize_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_dequantize_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_dequantize_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_dequantize_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_div_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_div_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_div_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_div_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_embeddingbag_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_embeddingbag_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_embeddingbag_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_embeddingbag_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_erf_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_erf_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_erf_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_erf_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_expand_indices_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_expand_indices_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_expand_indices_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_expand_indices_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_gather_element_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_gather_element_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_gather_element_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_gather_element_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_gather_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_gather_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_gather_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_gather_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_gelu_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_gelu_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_gelu_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_gelu_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_group_norm_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_group_norm_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_group_norm_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_group_norm_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_i_malloc.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_i_malloc.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_i_malloc.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_i_malloc.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_inner_product_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_inner_product_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_inner_product_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_inner_product_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_layer_norm_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_layer_norm_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_layer_norm_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_layer_norm_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_matmul_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_matmul_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_matmul_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_matmul_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_mul_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_mul_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_mul_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_mul_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_one_hot_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_one_hot_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_one_hot_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_one_hot_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_padding_sequence_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_padding_sequence_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_padding_sequence_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_padding_sequence_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_position_ids_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_position_ids_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_position_ids_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_position_ids_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_pow_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_pow_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_pow_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_pow_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_range_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_range_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_range_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_range_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_reduce_mean_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_reduce_mean_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_reduce_mean_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_reduce_mean_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_reorder_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_reorder_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_reorder_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_reorder_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_shape_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_shape_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_shape_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_shape_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_slice_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_slice_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_slice_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_slice_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_softmax_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_softmax_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_softmax_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_softmax_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_sparse_inner_product.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_sparse_inner_product.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_sparse_inner_product.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_sparse_inner_product.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_split_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_split_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_split_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_split_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_sqrt_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_sqrt_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_sqrt_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_sqrt_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_strided_slice_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_strided_slice_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_strided_slice_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_strided_slice_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_sub_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_sub_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_sub_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_sub_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_tanh_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_tanh_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_tanh_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_tanh_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_token_type_ids_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_token_type_ids_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_token_type_ids_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_token_type_ids_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/gtest/test_unsqueeze_op.cpp b/intel_extension_for_transformers/backends/neural_engine/test/gtest/test_unsqueeze_op.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/gtest/test_unsqueeze_op.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/gtest/test_unsqueeze_op.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/README.md b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/README.md similarity index 94% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/README.md rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/README.md index 6a4d827f34a..0e935f8912f 100644 --- a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/README.md +++ b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/README.md @@ -1,5 +1,5 @@ -# Benchmark for SparseLib -To perform accuracy test and performance test for kernels in [SparseLib](https://github.com/intel-innersource/frameworks.ai.nlp-toolkit.intel-nlp-toolkit/tree/develop/nlp_toolkit/backends/neural_engine/SparseLib). +# Benchmark for Transformers-accelerated Libraries(SparseLib) +To perform accuracy test and performance test for kernels in [SparseLib](https://github.com/intel/intel-extension-for-transformers/tree/develop/intel_extension_for_transformers/backends/neural_engine/kernels). ## Build ```shell diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/benchmark.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/benchmark.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/benchmark.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/benchmark.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/benchmark_utils.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/benchmark_utils.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/benchmark_utils.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/benchmark_utils.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/benchmark_utils.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/benchmark_utils.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/benchmark_utils.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/benchmark_utils.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/benchmark.sh b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/benchmark.sh similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/benchmark.sh rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/benchmark.sh diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/README.md b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/README.md similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/README.md rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/README.md diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_amx_bf16_x16_input b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_amx_bf16_x16_input similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_amx_bf16_x16_input rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_amx_bf16_x16_input diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_eltwiseop_input b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_eltwiseop_input similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_eltwiseop_input rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_eltwiseop_input diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_layernorm_ba_input b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_layernorm_ba_input similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_layernorm_ba_input rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_layernorm_ba_input diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_matmul_avx512f_p2031_p2013_input b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_matmul_avx512f_p2031_p2013_input similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_matmul_avx512f_p2031_p2013_input rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_matmul_avx512f_p2031_p2013_input diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_matmul_vnni_noperm_p2031_p1302_input b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_matmul_vnni_noperm_p2031_p1302_input similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_matmul_vnni_noperm_p2031_p1302_input rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_matmul_vnni_noperm_p2031_p1302_input diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_softmax_input b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_softmax_input similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_softmax_input rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_softmax_input diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_vnni_input b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_vnni_input similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/inputs/ci_vnni_input rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/inputs/ci_vnni_input diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/run_ci.sh b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/run_ci.sh similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/run_ci.sh rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/run_ci.sh diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/to_summary.sh b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/to_summary.sh similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/ci/to_summary.sh rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/ci/to_summary.sh diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/common_utils.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/common_utils.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/common_utils.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/common_utils.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/common_utils.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/common_utils.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/common_utils.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/common_utils.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/eltwiseop/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/eltwiseop/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/eltwiseop/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/eltwiseop/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/eltwiseop/eltwiseop.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/eltwiseop/eltwiseop.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/eltwiseop/eltwiseop.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/eltwiseop/eltwiseop.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/eltwiseop/eltwiseop.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/eltwiseop/eltwiseop.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/eltwiseop/eltwiseop.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/eltwiseop/eltwiseop.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/layernorm_ba/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/layernorm_ba/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/layernorm_ba/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/layernorm_ba/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/layernorm_ba/layernorm_ba.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/layernorm_ba/layernorm_ba.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/layernorm_ba/layernorm_ba.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/layernorm_ba/layernorm_ba.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/layernorm_ba/layernorm_ba.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/layernorm_ba/layernorm_ba.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/layernorm_ba/layernorm_ba.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/layernorm_ba/layernorm_ba.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/softmax/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/softmax/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/softmax/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/softmax/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/softmax/softmax.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/softmax/softmax.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/softmax/softmax.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/softmax/softmax.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/softmax/softmax.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/softmax/softmax.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/softmax/softmax.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/softmax/softmax.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/sparse_matmul.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/sparse_matmul.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/sparse_matmul.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/sparse_matmul.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/sparse_matmul.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/sparse_matmul.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/sparse_matmul.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/sparse_matmul.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_amx_bf16_x16.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_amx_bf16_x16.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_amx_bf16_x16.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_amx_bf16_x16.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_amx_bf16_x16.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_amx_bf16_x16.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_amx_bf16_x16.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_amx_bf16_x16.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_avx512f.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_avx512f.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_avx512f.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_avx512f.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_avx512f.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_avx512f.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_avx512f.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_avx512f.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_vnni.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_vnni.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_vnni.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_vnni.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_vnni.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_vnni.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/sparse_matmul/spmm_vnni.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/sparse_matmul/spmm_vnni.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/CMakeLists.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/CMakeLists.txt rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/CMakeLists.txt diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_avx512f_p2031_p2013.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/matmul_vnni_noperm_p2031_p1302.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/transpose_matmul.cpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/transpose_matmul.cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/transpose_matmul.cpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/transpose_matmul.cpp diff --git a/nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/transpose_matmul.hpp b/intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/transpose_matmul.hpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/SparseLib/benchmark/transpose_matmul/transpose_matmul.hpp rename to intel_extension_for_transformers/backends/neural_engine/test/kernels/benchmark/transpose_matmul/transpose_matmul.hpp diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/requirements.txt b/intel_extension_for_transformers/backends/neural_engine/test/pytest/requirements.txt similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/pytest/requirements.txt rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/requirements.txt diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_add_cls_token.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_add_cls_token.py similarity index 97% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_add_cls_token.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_add_cls_token.py index 619992f7889..f25b8e77a03 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_add_cls_token.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_add_cls_token.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.add_cls_token import AddClsToken +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.add_cls_token import AddClsToken import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py similarity index 91% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py index d48eb5755fb..21d4ec2ef27 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.attention_output_layer_norm_length_adaptive_keep_indices import AttentionOutputLayerNormLengthAdaptiveExpandIndices +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.attention_output_layer_norm_length_adaptive_keep_indices import AttentionOutputLayerNormLengthAdaptiveExpandIndices import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_attention_reshape.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_attention_reshape.py similarity index 94% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_attention_reshape.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_attention_reshape.py index 503b305da29..4fbcb1eec59 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_attention_reshape.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_attention_reshape.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.attention_reshape import AttentionReshape +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.attention_reshape import AttentionReshape import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_conv_reshape.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_conv_reshape.py similarity index 91% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_conv_reshape.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_conv_reshape.py index 94fd4cdc334..3ab5a0acea3 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_conv_reshape.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_conv_reshape.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.conv_reshape import ConvReshape +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.conv_reshape import ConvReshape import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_embeddingbag.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_embeddingbag.py similarity index 85% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_embeddingbag.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_embeddingbag.py index 10da28924f8..fce946dbec4 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_embeddingbag.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_embeddingbag.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.embeddingbag import EmbeddingBag +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.embeddingbag import EmbeddingBag import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_execution_options.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_execution_options.py similarity index 100% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_execution_options.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_execution_options.py diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_gelu.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_gelu.py similarity index 89% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_gelu.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_gelu.py index 7ead1bd19a7..a8bd8011bcc 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_gelu.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_gelu.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.gelu import Gelu +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.gelu import Gelu class TestGelu(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_graph_dispatch.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_graph_dispatch.py similarity index 97% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_graph_dispatch.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_graph_dispatch.py index 5b6b1865f8b..2bdd3b6e3f9 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_graph_dispatch.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_graph_dispatch.py @@ -18,7 +18,7 @@ import os import unittest import numpy as np -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile class TestGraphDispatch(unittest.TestCase): @classmethod diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_insert_input_output_data.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_insert_input_output_data.py similarity index 76% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_insert_input_output_data.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_insert_input_output_data.py index 5b3f78442ae..ec72d15b1dd 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_insert_input_output_data.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_insert_input_output_data.py @@ -17,11 +17,11 @@ import os import unittest -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.sub_graph.input_data import InputData -from nlp_toolkit.backends.neural_engine.compile.sub_graph.output_data import OutputData +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.input_data import InputData +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.output_data import OutputData os.environ['GLOG_minloglevel'] = '2' class TestInsertInputOuputData(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_is_supported_onnx_node.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_is_supported_onnx_node.py similarity index 82% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_is_supported_onnx_node.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_is_supported_onnx_node.py index 761d6873ffb..bb121d91cec 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_is_supported_onnx_node.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_is_supported_onnx_node.py @@ -17,9 +17,9 @@ import os import unittest -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.onnx_utils import is_supported_onnx_node +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.onnx_utils import is_supported_onnx_node os.environ['GLOG_minloglevel'] = '2' class TestIsSupportedOnnxNode(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_last_layer_shape.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_last_layer_shape.py similarity index 89% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_last_layer_shape.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_last_layer_shape.py index 5c7290da497..798201675db 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_last_layer_shape.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_last_layer_shape.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.last_layer_shape import LastLayerShape +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.last_layer_shape import LastLayerShape class TestLastLayerShape(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_layer_norm_with_reduce_mean.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_layer_norm_with_reduce_mean.py similarity index 86% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_layer_norm_with_reduce_mean.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_layer_norm_with_reduce_mean.py index 28a3010d60d..77e4e541993 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_layer_norm_with_reduce_mean.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_layer_norm_with_reduce_mean.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.layer_norm_with_reduce_mean import LayerNormWithReduceMean +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.layer_norm_with_reduce_mean import LayerNormWithReduceMean class TestLayerNormWithReduceMean(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_main.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_main.py similarity index 95% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_main.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_main.py index 83b68a8dfdb..4a90563c1e9 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_main.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_main.py @@ -16,8 +16,8 @@ # limitations under the License. import unittest -from nlp_toolkit.backends.neural_engine.compile import compile -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor import numpy as np import os import shutil diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_relu.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_relu.py similarity index 84% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_relu.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_relu.py index 2833f071698..10b4ac0024f 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_relu.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_relu.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.matmul_with_bias_relu import MatMulWithBiasRelu +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.matmul_with_bias_relu import MatMulWithBiasRelu import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_sigmoid.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_sigmoid.py similarity index 84% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_sigmoid.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_sigmoid.py index a3f16e29b23..efc1700d926 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_sigmoid.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_sigmoid.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.matmul_with_bias_sigmoid import MatMulWithBiasSigmoid +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.matmul_with_bias_sigmoid import MatMulWithBiasSigmoid import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_tanh.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_tanh.py similarity index 86% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_tanh.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_tanh.py index 7c153484817..57ef6f81cc3 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_matmul_with_bias_tanh.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_matmul_with_bias_tanh.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.matmul_with_bias_tanh import MatmulWithBiasTanh +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.matmul_with_bias_tanh import MatmulWithBiasTanh import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_merged_embeddingbag.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_merged_embeddingbag.py similarity index 94% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_merged_embeddingbag.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_merged_embeddingbag.py index 26b42f6b0fe..477c52417a0 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_merged_embeddingbag.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_merged_embeddingbag.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.merged_embeddingbag import MergedEmbeddingbag +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.merged_embeddingbag import MergedEmbeddingbag import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_onnx_utils.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_onnx_utils.py similarity index 92% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_onnx_utils.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_onnx_utils.py index 41022fe9664..d57efe2ae59 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_onnx_utils.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_onnx_utils.py @@ -16,10 +16,10 @@ # limitations under the License. import unittest -import nlp_toolkit.backends.neural_engine.compile as compile +import intel_extension_for_transformers.backends.neural_engine.compile as compile import numpy as np -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor class TestOnnxUtils(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_ops.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_ops.py similarity index 99% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_ops.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_ops.py index e20522c6423..0d2f152d720 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_ops.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_ops.py @@ -21,8 +21,8 @@ from onnx import NodeProto from onnx.helper import make_attribute from collections import namedtuple -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor class TestOps(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_padding_sequence.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_padding_sequence.py similarity index 97% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_padding_sequence.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_padding_sequence.py index 2c2a9420b4f..aa176da9fe8 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_padding_sequence.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_padding_sequence.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.padding_sequence import PaddingSequence +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.padding_sequence import PaddingSequence import numpy as np class TestPaddingSequence(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_pattern_dispatch.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_pattern_dispatch.py similarity index 84% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_pattern_dispatch.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_pattern_dispatch.py index bb0b21cc206..88c5520a752 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_pattern_dispatch.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_pattern_dispatch.py @@ -18,10 +18,10 @@ import os import unittest import numpy as np -from nlp_toolkit.backends.neural_engine.compile import compile -from nlp_toolkit.backends.neural_engine.compile.loaders.loader import Loader -from nlp_toolkit.backends.neural_engine.compile.extractors.extractor import Extractor -from nlp_toolkit.backends.neural_engine.compile.sub_graph.subgraph_matcher import SubGraphMatcher +from intel_extension_for_transformers.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.loaders.loader import Loader +from intel_extension_for_transformers.backends.neural_engine.compile.extractors.extractor import Extractor +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.subgraph_matcher import SubGraphMatcher class TestPatternDispatch(unittest.TestCase): @classmethod diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_position_embeddings.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_position_embeddings.py similarity index 90% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_position_embeddings.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_position_embeddings.py index b78ccaeb98b..9968c224838 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_position_embeddings.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_position_embeddings.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.position_embeddings import PositionEmbeddings +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.position_embeddings import PositionEmbeddings import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_qkv_merge.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_qkv_merge.py similarity index 87% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_qkv_merge.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_qkv_merge.py index 96d1e62a7c3..73c61f43a2c 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_qkv_merge.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_qkv_merge.py @@ -16,8 +16,8 @@ # limitations under the License. import unittest -from nlp_toolkit.backends.neural_engine.compile import compile -from nlp_toolkit.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph class TestQKVMerge(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_qkv_reshape.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_qkv_reshape.py similarity index 86% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_qkv_reshape.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_qkv_reshape.py index 5f7ce270509..e92f4c42f0c 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_qkv_reshape.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_qkv_reshape.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.qkv_reshape import QKVReshape +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.qkv_reshape import QKVReshape class TestAttentionReshape(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_quant_onnx_execute.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_quant_onnx_execute.py similarity index 96% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_quant_onnx_execute.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_quant_onnx_execute.py index f0bd52ae6ae..4e44baeabe7 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_quant_onnx_execute.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_quant_onnx_execute.py @@ -18,7 +18,7 @@ import os import unittest import numpy as np -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile import compile class TestQuantOnnxExecute(unittest.TestCase): @classmethod diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_after_restore_hidden_states.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_after_restore_hidden_states.py similarity index 87% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_after_restore_hidden_states.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_after_restore_hidden_states.py index 4389fe1cedb..7f13a1501d3 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_after_restore_hidden_states.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_after_restore_hidden_states.py @@ -18,10 +18,10 @@ import unittest import numpy as np from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.reshape_after_restore_hidden_states import ReshapeAfterRestoreHiddenStates +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.reshape_after_restore_hidden_states import ReshapeAfterRestoreHiddenStates class TestLayerNormWithReduceMean(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_before_and_after_attention_out_layer_norm_gather_elements.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_before_and_after_attention_out_layer_norm_gather_elements.py similarity index 88% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_before_and_after_attention_out_layer_norm_gather_elements.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_before_and_after_attention_out_layer_norm_gather_elements.py index 783276a2701..b9528c39f26 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_before_and_after_attention_out_layer_norm_gather_elements.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_before_and_after_attention_out_layer_norm_gather_elements.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.reshape_before_and_after_attention_out_layer_norm_gather_elements import ReshapeBeforeAndAfterAttentionOutLayerNormGatherElements +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.reshape_before_and_after_attention_out_layer_norm_gather_elements import ReshapeBeforeAndAfterAttentionOutLayerNormGatherElements import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_before_restore_hidden_states.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_before_restore_hidden_states.py similarity index 86% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_before_restore_hidden_states.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_before_restore_hidden_states.py index 0adbb6e3c3f..b2bef151e06 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_reshape_before_restore_hidden_states.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_reshape_before_restore_hidden_states.py @@ -18,10 +18,10 @@ import unittest import numpy as np from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.reshape_before_restore_hidden_states import ReshapeBeforeRestoreHiddenStates +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.reshape_before_restore_hidden_states import ReshapeBeforeRestoreHiddenStates class TestLayerNormWithReduceMean(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_start_end_logits.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_start_end_logits.py similarity index 87% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_start_end_logits.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_start_end_logits.py index c4280a8679c..ecf1d5fcfa2 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_start_end_logits.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_start_end_logits.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.start_end_logits import StartEndLogits +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.start_end_logits import StartEndLogits class TestStartEndLogits(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_tf_utils.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_tf_utils.py similarity index 93% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_tf_utils.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_tf_utils.py index fa0424eea25..2c14fefaa25 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_tf_utils.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_tf_utils.py @@ -17,7 +17,7 @@ import unittest from tensorflow.core.framework import node_def_pb2 -import nlp_toolkit.backends.neural_engine.compile.tf_utils as util +import intel_extension_for_transformers.backends.neural_engine.compile.tf_utils as util class TestTfUtils(unittest.TestCase): diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_token_type_embeddings.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_token_type_embeddings.py similarity index 94% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_token_type_embeddings.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_token_type_embeddings.py index 886abf8bead..0a4aab6b117 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_token_type_embeddings.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_token_type_embeddings.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.token_type_embeddings import TokenTypeEmbeddings +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.token_type_embeddings import TokenTypeEmbeddings import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_transpose.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_transpose.py similarity index 93% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_transpose.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_transpose.py index e81c0a63bff..448cecb182c 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_transpose.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_transpose.py @@ -16,8 +16,8 @@ # limitations under the License. import unittest -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile import compile +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile import compile import numpy as np import os diff --git a/nlp_toolkit/backends/neural_engine/test/pytest/test_transpose_batch_matmul.py b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_transpose_batch_matmul.py similarity index 90% rename from nlp_toolkit/backends/neural_engine/test/pytest/test_transpose_batch_matmul.py rename to intel_extension_for_transformers/backends/neural_engine/test/pytest/test_transpose_batch_matmul.py index 6199e710374..9d311d9af54 100644 --- a/nlp_toolkit/backends/neural_engine/test/pytest/test_transpose_batch_matmul.py +++ b/intel_extension_for_transformers/backends/neural_engine/test/pytest/test_transpose_batch_matmul.py @@ -17,10 +17,10 @@ import unittest from collections import OrderedDict -from nlp_toolkit.backends.neural_engine.compile.ops.op import OPERATORS, Operator -from nlp_toolkit.backends.neural_engine.compile.ops.tensor import Tensor -from nlp_toolkit.backends.neural_engine.compile.graph import Graph -from nlp_toolkit.backends.neural_engine.compile.sub_graph.transpose_batch_matmul import TransposeBatchMatMul +from intel_extension_for_transformers.backends.neural_engine.compile.ops.op import OPERATORS, Operator +from intel_extension_for_transformers.backends.neural_engine.compile.ops.tensor import Tensor +from intel_extension_for_transformers.backends.neural_engine.compile.graph import Graph +from intel_extension_for_transformers.backends.neural_engine.compile.sub_graph.transpose_batch_matmul import TransposeBatchMatMul import numpy as np diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/assert b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/assert similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/assert rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/assert diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/config b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/config similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/config rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/config diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/container b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/container rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/container_hash b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container_hash similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/container_hash rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/container_hash diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/core b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/core similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/core rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/core diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/interprocess b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/interprocess similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/interprocess rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/interprocess diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/intrusive b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/intrusive similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/intrusive rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/intrusive diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/move b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/move similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/move rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/move diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/mp11 b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/mp11 similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/mp11 rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/mp11 diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/predef b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/predef similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/predef rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/predef diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/preprocessor b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/preprocessor similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/preprocessor rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/preprocessor diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/static_assert b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/static_assert similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/static_assert rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/static_assert diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/throw_exception b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/throw_exception similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/throw_exception rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/throw_exception diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/tuple b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/tuple similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/tuple rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/tuple diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/type_traits b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/type_traits similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/type_traits rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/type_traits diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/unordered b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/unordered similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/unordered rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/unordered diff --git a/nlp_toolkit/backends/neural_engine/third_party/boost/libs/winapi b/intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/winapi similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/boost/libs/winapi rename to intel_extension_for_transformers/backends/neural_engine/third_party/boost/libs/winapi diff --git a/nlp_toolkit/backends/neural_engine/third_party/gflags b/intel_extension_for_transformers/backends/neural_engine/third_party/gflags similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/gflags rename to intel_extension_for_transformers/backends/neural_engine/third_party/gflags diff --git a/nlp_toolkit/backends/neural_engine/third_party/glog b/intel_extension_for_transformers/backends/neural_engine/third_party/glog similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/glog rename to intel_extension_for_transformers/backends/neural_engine/third_party/glog diff --git a/nlp_toolkit/backends/neural_engine/third_party/googletest b/intel_extension_for_transformers/backends/neural_engine/third_party/googletest similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/googletest rename to intel_extension_for_transformers/backends/neural_engine/third_party/googletest diff --git a/nlp_toolkit/backends/neural_engine/third_party/jemalloc b/intel_extension_for_transformers/backends/neural_engine/third_party/jemalloc similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/jemalloc rename to intel_extension_for_transformers/backends/neural_engine/third_party/jemalloc diff --git a/nlp_toolkit/backends/neural_engine/third_party/oneDNN b/intel_extension_for_transformers/backends/neural_engine/third_party/oneDNN similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/oneDNN rename to intel_extension_for_transformers/backends/neural_engine/third_party/oneDNN diff --git a/nlp_toolkit/backends/neural_engine/third_party/oneDNNGraph b/intel_extension_for_transformers/backends/neural_engine/third_party/oneDNNGraph similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/oneDNNGraph rename to intel_extension_for_transformers/backends/neural_engine/third_party/oneDNNGraph diff --git a/nlp_toolkit/backends/neural_engine/third_party/pybind11 b/intel_extension_for_transformers/backends/neural_engine/third_party/pybind11 similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/pybind11 rename to intel_extension_for_transformers/backends/neural_engine/third_party/pybind11 diff --git a/nlp_toolkit/backends/neural_engine/third_party/xbyak b/intel_extension_for_transformers/backends/neural_engine/third_party/xbyak similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/xbyak rename to intel_extension_for_transformers/backends/neural_engine/third_party/xbyak diff --git a/nlp_toolkit/backends/neural_engine/third_party/yaml-cpp b/intel_extension_for_transformers/backends/neural_engine/third_party/yaml-cpp similarity index 100% rename from nlp_toolkit/backends/neural_engine/third_party/yaml-cpp rename to intel_extension_for_transformers/backends/neural_engine/third_party/yaml-cpp diff --git a/nlp_toolkit/optimization/__init__.py b/intel_extension_for_transformers/optimization/__init__.py similarity index 100% rename from nlp_toolkit/optimization/__init__.py rename to intel_extension_for_transformers/optimization/__init__.py diff --git a/nlp_toolkit/optimization/benchmark.py b/intel_extension_for_transformers/optimization/benchmark.py similarity index 94% rename from nlp_toolkit/optimization/benchmark.py rename to intel_extension_for_transformers/optimization/benchmark.py index 9148e20403c..67845ccdbe3 100644 --- a/nlp_toolkit/optimization/benchmark.py +++ b/intel_extension_for_transformers/optimization/benchmark.py @@ -29,7 +29,7 @@ def _prepare_inference_func(self, model_name: str, batch_size: int, sequence_len config.torchscript = True logger.warning("Function transformers.PyTorchBenchmark._prepare_inference_func is replaced " - "by nlp_toolkit.optimization.benchmark to support int8 models.") + "by intel_extension_for_transformers.optimization.benchmark to support int8 models.") model = OptimizedModel.from_pretrained(model_name) model.eval() @@ -117,9 +117,9 @@ def _prepare_inference_func(self, model_name: str, batch_size: int, sequence_len config = self.config_dict[model_name] logger.warning("Function transformers.PyTorchBenchmark._prepare_inference_func is replaced " - "by nlp_toolkit.optimization.benchmark to support executor.") + "by intel_extension_for_transformers.optimization.benchmark to support executor.") - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile model = compile(model_name) # encoder-decoder has vocab size saved differently diff --git a/nlp_toolkit/optimization/config.py b/intel_extension_for_transformers/optimization/config.py similarity index 98% rename from nlp_toolkit/optimization/config.py rename to intel_extension_for_transformers/optimization/config.py index 1375dcc44ca..189cd0d848f 100644 --- a/nlp_toolkit/optimization/config.py +++ b/intel_extension_for_transformers/optimization/config.py @@ -20,13 +20,13 @@ Distillation_Conf, Pruner, Pruning_Conf, Quantization_Conf ) from neural_compressor.conf.dotdict import DotDict -from nlp_toolkit.optimization.utils.metrics import Metric -from nlp_toolkit.optimization.utils.objectives import Objective, performance -from nlp_toolkit.optimization.quantization import QuantizationMode, SUPPORTED_QUANT_MODE -from nlp_toolkit.optimization.distillation import ( +from intel_extension_for_transformers.optimization.utils.metrics import Metric +from intel_extension_for_transformers.optimization.utils.objectives import Objective, performance +from intel_extension_for_transformers.optimization.quantization import QuantizationMode, SUPPORTED_QUANT_MODE +from intel_extension_for_transformers.optimization.distillation import ( Criterion, DistillationCriterionMode, SUPPORTED_DISTILLATION_CRITERION_MODE ) -from nlp_toolkit.optimization.utils.utility import LazyImport +from intel_extension_for_transformers.optimization.utils.utility import LazyImport from typing import List, Union from xmlrpc.client import boolean diff --git a/nlp_toolkit/optimization/distillation.py b/intel_extension_for_transformers/optimization/distillation.py similarity index 100% rename from nlp_toolkit/optimization/distillation.py rename to intel_extension_for_transformers/optimization/distillation.py diff --git a/nlp_toolkit/optimization/dynamic/__init__.py b/intel_extension_for_transformers/optimization/dynamic/__init__.py similarity index 100% rename from nlp_toolkit/optimization/dynamic/__init__.py rename to intel_extension_for_transformers/optimization/dynamic/__init__.py diff --git a/nlp_toolkit/optimization/dynamic/drop_and_restore_utils.py b/intel_extension_for_transformers/optimization/dynamic/drop_and_restore_utils.py similarity index 100% rename from nlp_toolkit/optimization/dynamic/drop_and_restore_utils.py rename to intel_extension_for_transformers/optimization/dynamic/drop_and_restore_utils.py diff --git a/nlp_toolkit/optimization/dynamic/evolution.py b/intel_extension_for_transformers/optimization/dynamic/evolution.py similarity index 98% rename from nlp_toolkit/optimization/dynamic/evolution.py rename to intel_extension_for_transformers/optimization/dynamic/evolution.py index a590a95bf5a..788dcda1c7e 100644 --- a/nlp_toolkit/optimization/dynamic/evolution.py +++ b/intel_extension_for_transformers/optimization/dynamic/evolution.py @@ -26,7 +26,7 @@ import timeit import numpy as np import torch -from nlp_toolkit.optimization.utils.utility import LazyImport +from intel_extension_for_transformers.optimization.utils.utility import LazyImport torchprofile = LazyImport("torchprofile") logger = logging.getLogger(__name__) diff --git a/nlp_toolkit/optimization/mixture/__init__.py b/intel_extension_for_transformers/optimization/mixture/__init__.py similarity index 100% rename from nlp_toolkit/optimization/mixture/__init__.py rename to intel_extension_for_transformers/optimization/mixture/__init__.py diff --git a/nlp_toolkit/optimization/mixture/auto_distillation.py b/intel_extension_for_transformers/optimization/mixture/auto_distillation.py similarity index 99% rename from nlp_toolkit/optimization/mixture/auto_distillation.py rename to intel_extension_for_transformers/optimization/mixture/auto_distillation.py index e13531d73d0..2b9ced8a26c 100644 --- a/nlp_toolkit/optimization/mixture/auto_distillation.py +++ b/intel_extension_for_transformers/optimization/mixture/auto_distillation.py @@ -28,8 +28,8 @@ from neural_compressor.experimental import Distillation from neural_compressor.strategy.bayesian import BayesianOptimization from neural_compressor.utils import logger -from nlp_toolkit.optimization.config import AutoDistillationConfig -from nlp_toolkit.optimization.utils.utility import LazyImport +from intel_extension_for_transformers.optimization.config import AutoDistillationConfig +from intel_extension_for_transformers.optimization.utils.utility import LazyImport torch = LazyImport("torch") diff --git a/nlp_toolkit/optimization/model.py b/intel_extension_for_transformers/optimization/model.py similarity index 97% rename from nlp_toolkit/optimization/model.py rename to intel_extension_for_transformers/optimization/model.py index 08cdd6d4cf2..ccb81f5a9bd 100644 --- a/nlp_toolkit/optimization/model.py +++ b/intel_extension_for_transformers/optimization/model.py @@ -20,7 +20,7 @@ import transformers from .config import WEIGHTS_NAME from neural_compressor.utils import logger -from nlp_toolkit.optimization.utils.utility import LazyImport +from intel_extension_for_transformers.optimization.utils.utility import LazyImport from packaging.version import Version from transformers import AutoConfig diff --git a/intel_extension_for_transformers/optimization/neural-compressor-third-party-programs.txt b/intel_extension_for_transformers/optimization/neural-compressor-third-party-programs.txt new file mode 100644 index 00000000000..dc399671bb9 --- /dev/null +++ b/intel_extension_for_transformers/optimization/neural-compressor-third-party-programs.txt @@ -0,0 +1,1916 @@ +Intel® Neural Compressor Third Party Programs File + +This file contains the list of third party software ("third party programs") +contained in the Intel software and their required notices and/or license +terms. This third party software, even if included with the distribution of +the Intel software, may be governed by separate license terms, including +without limitation, third party license terms, other Intel software license +terms, and open source software license terms. These separate license terms +govern your use of the third party programs as set forth in in the +"THIRD-PARTY-PROGRAMS" file. + +Third party programs and their corresponding required notices and/or license +terms are listed below. +------------------------------------------------------------- +1. Intel® AI Quantization Tools for Tensorflow* + + blendcnn + + examples-PyTorch-imagenet_recognition-efficientnet + Copyright 2020 Ross Wightman + + examples-PyTorch-PeleeNet + + examples-PyTorch-ResNest + + example-Tensorflow-albert + + example-tensorflow-bert + + example-Tensorflow-dataset_tools + + example-tensorflow-DIEN + Copyright 2015 The TensorFlow Authors. All Rights Reserved. + + example-tensorflow-models + Copyright 2016, The Authors. + + Gluon-CV + + Gluon-NLP + + IntelAI Models + + Intel TensorFlow Quantization Tool + + keras-example-resnetv2 + + mlperf-inference + + mlperf-training + Copyright 2018 The MLPerf Authors + + nvidia-bert + + onnx/models + + opencv-openbert + + pytorch-mobilenet-v3 + + rxjs + Copyright (c) 2015-2018 Google, Inc., Netflix, Inc., Microsoft Corp. and contributors + + tensor2tensor + + tensorflow-yolo-v3 + + Transformer + + nnUnet + Copyright [2019] [Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany] + + Ultra-Light-Fast-Generic-Face-Detector-1MB + Copyright (c) 2019 linzai + + plotly + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +------------------------------------------------------------- +2. torchvision + Copyright (c) Soumith Chintala 2016, All rights reserved. + + Examples + Copyright (c) 2017, All rights reserved. + + example-PyTorch-SE_ResNetx50_32x4d + Copyright (c) 2017, Remi Cadene All rights reserved. + + BSD 3-Clause License + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------- +3. BayesianOptimization + Copyright (c) 2014 Fernando M. F. Nogueira + + dlrm + Copyright (c) Facebook, Inc. and its affiliates. + + example-TensorFlow-DCGAN + Copyright (c) 2016 Taehoon Kim + + MMdnn + Copyright (c) Microsoft Corporation. All rights reserved. + + @angular/animations + Copyright (c) 2010-2020 Google LLC. + + @angular/cdk + Copyright (c) 2010-2020 Google LLC. + + @angular/common + Copyright (c) 2010-2020 Google LLC. + + @angular/compiler + Copyright (c) 2010-2020 Google LLC. + + @angular/core + Copyright (c) 2010-2020 Google LLC. + + @angular/flex-layout + Copyright (c) 2010-2020 Google LLC. + + @angular/forms + Copyright (c) 2010-2020 Google LLC. + + @angular/material + Copyright (c) 2010-2020 Google LLC. + + @angular/platform-browser + Copyright (c) 2010-2020 Google LLC. + + @angular/platform-browser-dynamic + Copyright (c) 2010-2020 Google LLC. + + @angular/router + Copyright (c) 2010-2020 Google LLC. + + socket.io-client + Copyright (c) 2014 Guillermo Rauch + + ts-md5 + Copyright (C) 2014 Raymond Hill + + zone.js + Copyright (c) 2010-2020 Google LLC. + + js-sha256 + Copyright (c) 2014-2017 Chen, Yi-Cyuan + + @swimlane/ngx-graph + Copyright (c) 2016 Swimlane + + ngx-pipes + Copyright (c) Dan Revah + + ngx-charts + + ngx-joyride + Copyright (c) 2018 Tommasi Nicola. + + example-pytorch-WideResNet-cifar10 + Copyright (c) 2019 xternalz + + example-pytorch-plain_cnn-cifar100 + Copyright (c) 2019 Iman Mirzadeh + + haq/model/mobilenetv2 + Copyright (c) 2019 MIT HAN Lab + + mnist + Copyright (c) Microsoft Corporation. All rights reserved. + + onnxconverter-common + Copyright (c) Microsoft Corporation. All rights reserved. + + FileSaver.js + Copyright © 2016 Eli Grey. + + The MIT License (MIT) + + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + + +----------------------------------------------------------------- +4. mxnet + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ====================================================================================== + Apache MXNET (incubating) Subcomponents: + + The Apache MXNET (incubating) project contains subcomponents with separate + copyright notices and license terms. Your use of the source code for the + these subcomponents is subject to the terms and conditions of the following + licenses. If not stated otherwise, their copyright notices and license terms + are available at the path of the subcomponent. + + If a folder hierarchy is listed as subcomponent, separate listings of + further subcomponents (files or folder hierarchies) part of the hierarchy + take precedence. + + ======================================================================================= + Apache-2.0 license + ======================================================================================= + + 3rdparty/ctc_include + 3rdparty/dlpack + 3rdparty/dmlc-core + 3rdparty/mshadow + 3rdparty/tvm + 3rdparty/tvm/3rdparty/dmlc-core + 3rdparty/tvm/3rdparty/dlpack + 3rdparty/ps-lite + 3rdparty/mkldnn + 3rdparty/googletest/googlemock/scripts/generator + 3rdparty/onnx-tensorrt/third_party/onnx/third_party/benchmark + 3rdparty/mkldnn/tests/benchdnn (Copy of the License available at top of current file) + src/operator/special_functions-inl.h Cephes Library Functions (Copy of the License available at top of current file) + 3rdparty/mkldnn/doc/assets/mathjax (Copy of the License available at top of current file) + docs/python_docs/themes/mx-theme/mxtheme/static/material-design-icons-3.0.1 (Copy of the License available at top of current file) + docs/python_docs/themes/mx-theme/mxtheme/static/font/Roboto (Copy of the License available at top of current file) + 3rdparty/tvm/3rdparty/bfloat16/bfloat16.cc (Copy of the License available at top of current file) + + ======================================================================================= + MIT license + ======================================================================================= + + 3rdparty/miniz/miniz.c + 3rdparty/miniz/miniz.h + example/gluon/tree_lstm + 3rdparty/tvm/3rdparty/cma + 3rdparty/onnx-tensorrt + 3rdparty/onnx-tensorrt/third_party/onnx + docs/static_site/src/assets/js/clipboard.js + docs/python_docs/themes/mx-theme + 3rdparty/intgemm + 3rdparty/tvm/3rdparty/compiler-rt/builtin_fp16.h + src/operator/nn/layer_norm.cc + + ======================================================================================= + 3-clause BSD license + ======================================================================================= + + 3rdparty/mkldnn/cmake/FindOpenCL.cmake (Copy of the License available at licenses/BSD3-cmake) + 3rdparty/mkldnn/src/cpu/x64/jit_utils/jitprofiling/ + 3rdparty/ctc_include/contrib/moderngpu + 3rdparty/nvidia_cub + 3rdparty/nvidia_cub/test/mersenne.h + 3rdparty/googletest/googlemock + 3rdparty/googletest/googletest + cmake/upstream/FindCUDAToolkit.cmake + cmake/upstream/select_compute_arch.cmake + src/operator/numpy/np_einsum_op-inl.h + src/operator/numpy/np_einsum_path_op-inl.h + src/operator/numpy/np_einsum_op.cc + src/operator/contrib/erfinv-inl.h + + ======================================================================================= + 2-clause BSD license + ======================================================================================= + + 3rdparty/dmlc-core/include/dmlc/concurrentqueue.h + 3rdparty/tvm/3rdparty/picojson/picojson.h + + ======================================================================================= + Apache-2.0 license + LLVM Exceptions + ======================================================================================= + + 3rdparty/openmp + + ======================================================================================= + Caffe Licensing Model + ======================================================================================= + + src/operator/nn/pool.h + src/operator/nn/pool.cuh + src/operator/nn/im2col.h + src/operator/nn/im2col.cuh + src/operator/contrib/nn/deformable_im2col.h + src/operator/contrib/nn/deformable_im2col.cuh + src/operator/contrib/nn/modulated_deformable_im2col.h + src/operator/contrib/nn/modulated_deformable_im2col.cuh + + ======================================================================================= + 2-clause BSD license + zlib license + ======================================================================================= + + 3rdparty/dmlc-core/include/dmlc/blockingconcurrentqueue.h + + ======================================================================================= + Apache-2.0 license + 3-clause BSD license + ======================================================================================= + + python/mxnet/contrib/onnx/mx2onnx/_op_translations.py + python/mxnet/contrib/onnx/mx2onnx/export_onnx.py + + ======================================================================================= + Apache-2.0 license + MIT License + ======================================================================================= + + src/serialization/cnpy.h (Copy of the AL2 License available at the top of this file, MIT License available at licenses/MIT) + src/serialization/cnpy.cc (Copy of the AL2 License available at the top of this file, MIT License available at licenses/MIT) + 3rdparty/onnx-tensorrt/third_party/onnx/tools/protoc-gen-mypy.py (Copy of the referenced AL2 License available at top of current file) + + ======================================================================================= + Apache-2.0 license + Boost Software License, Version 1.0 + ======================================================================================= + + cmake/Modules/FindJeMalloc.cmake + + ======================================================================================= + Boost Software License, Version 1.0 + ======================================================================================= + + 3rdparty/intgemm/test/3rd_party/catch.hpp (Copy of the License available at licenses/BOOST1_0) + 3rdparty/mkldnn/src/common/primitive_hashing.hpp + + ======================================================================================= + LLVM Release License + ======================================================================================= + + + ======================================================================================= + Unlicense + ======================================================================================= + + 3rdparty/tvm/3rdparty/rang + + ======================================================================================= + SIL Open Font License (OFL) + ======================================================================================= + + docs/python_docs/themes/mx-theme/mxtheme/static/webfonts/ (Copy of the License available at licenses/OFL1_1) +------------------------------------------------------------------ + +5. examples-PyTorch-object_detection_yolo_v3 + + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for + software and other kinds of works. + + The licenses for most software and other practical works are designed + to take away your freedom to share and change the works. By contrast, + the GNU General Public License is intended to guarantee your freedom to + share and change all versions of a program--to make sure it remains free + software for all its users. We, the Free Software Foundation, use the + GNU General Public License for most of our software; it applies also to + any other work released this way by its authors. You can apply it to + your programs, too. + + When we speak of free software, we are referring to freedom, not + price. Our General Public Licenses are designed to make sure that you + have the freedom to distribute copies of free software (and charge for + them if you wish), that you receive source code or can get it if you + want it, that you can change the software or use pieces of it in new + free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you + these rights or asking you to surrender the rights. Therefore, you have + certain responsibilities if you distribute copies of the software, or if + you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether + gratis or for a fee, you must pass on to the recipients the same + freedoms that you received. You must make sure that they, too, receive + or can get the source code. And you must show them these terms so they + know their rights. + + Developers that use the GNU GPL protect your rights with two steps: + (1) assert copyright on the software, and (2) offer you this License + giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains + that there is no warranty for this free software. For both users' and + authors' sake, the GPL requires that modified versions be marked as + changed, so that their problems will not be attributed erroneously to + authors of previous versions. + + Some devices are designed to deny users access to install or run + modified versions of the software inside them, although the manufacturer + can do so. This is fundamentally incompatible with the aim of + protecting users' freedom to change the software. The systematic + pattern of such abuse occurs in the area of products for individuals to + use, which is precisely where it is most unacceptable. Therefore, we + have designed this version of the GPL to prohibit the practice for those + products. If such problems arise substantially in other domains, we + stand ready to extend this provision to those domains in future versions + of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. + States should not allow patents to restrict development and use of + software on general-purpose computers, but in those that do, we wish to + avoid the special danger that patents applied to a free program could + make it effectively proprietary. To prevent this, the GPL assures that + patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and + modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of + works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this + License. Each licensee is addressed as "you". "Licensees" and + "recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work + in a fashion requiring copyright permission, other than the making of an + exact copy. The resulting work is called a "modified version" of the + earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based + on the Program. + + To "propagate" a work means to do anything with it that, without + permission, would make you directly or secondarily liable for + infringement under applicable copyright law, except executing it on a + computer or modifying a private copy. Propagation includes copying, + distribution (with or without modification), making available to the + public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other + parties to make or receive copies. Mere interaction with a user through + a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" + to the extent that it includes a convenient and prominently visible + feature that (1) displays an appropriate copyright notice, and (2) + tells the user that there is no warranty for the work (except to the + extent that warranties are provided), that licensees may convey the + work under this License, and how to view a copy of this License. If + the interface presents a list of user commands or options, such as a + menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work + for making modifications to it. "Object code" means any non-source + form of a work. + + A "Standard Interface" means an interface that either is an official + standard defined by a recognized standards body, or, in the case of + interfaces specified for a particular programming language, one that + is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other + than the work as a whole, that (a) is included in the normal form of + packaging a Major Component, but which is not part of that Major + Component, and (b) serves only to enable use of the work with that + Major Component, or to implement a Standard Interface for which an + implementation is available to the public in source code form. A + "Major Component", in this context, means a major essential component + (kernel, window system, and so on) of the specific operating system + (if any) on which the executable work runs, or a compiler used to + produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all + the source code needed to generate, install, and (for an executable + work) run the object code and to modify the work, including scripts to + control those activities. However, it does not include the work's + System Libraries, or general-purpose tools or generally available free + programs which are used unmodified in performing those activities but + which are not part of the work. For example, Corresponding Source + includes interface definition files associated with source files for + the work, and the source code for shared libraries and dynamically + linked subprograms that the work is specifically designed to require, + such as by intimate data communication or control flow between those + subprograms and other parts of the work. + + The Corresponding Source need not include anything that users + can regenerate automatically from other parts of the Corresponding + Source. + + The Corresponding Source for a work in source code form is that + same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of + copyright on the Program, and are irrevocable provided the stated + conditions are met. This License explicitly affirms your unlimited + permission to run the unmodified Program. The output from running a + covered work is covered by this License only if the output, given its + content, constitutes a covered work. This License acknowledges your + rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not + convey, without conditions so long as your license otherwise remains + in force. You may convey covered works to others for the sole purpose + of having them make modifications exclusively for you, or provide you + with facilities for running those works, provided that you comply with + the terms of this License in conveying all material for which you do + not control copyright. Those thus making or running the covered works + for you must do so exclusively on your behalf, under your direction + and control, on terms that prohibit them from making any copies of + your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under + the conditions stated below. Sublicensing is not allowed; section 10 + makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological + measure under any applicable law fulfilling obligations under article + 11 of the WIPO copyright treaty adopted on 20 December 1996, or + similar laws prohibiting or restricting circumvention of such + measures. + + When you convey a covered work, you waive any legal power to forbid + circumvention of technological measures to the extent such circumvention + is effected by exercising rights under this License with respect to + the covered work, and you disclaim any intention to limit operation or + modification of the work as a means of enforcing, against the work's + users, your or third parties' legal rights to forbid circumvention of + technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you + receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice; + keep intact all notices stating that this License and any + non-permissive terms added in accord with section 7 apply to the code; + keep intact all notices of the absence of any warranty; and give all + recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, + and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to + produce it from the Program, in the form of source code under the + terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent + works, which are not by their nature extensions of the covered work, + and which are not combined with it such as to form a larger program, + in or on a volume of a storage or distribution medium, is called an + "aggregate" if the compilation and its resulting copyright are not + used to limit the access or legal rights of the compilation's users + beyond what the individual works permit. Inclusion of a covered work + in an aggregate does not cause this License to apply to the other + parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms + of sections 4 and 5, provided that you also convey the + machine-readable Corresponding Source under the terms of this License, + in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded + from the Corresponding Source as a System Library, need not be + included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any + tangible personal property which is normally used for personal, family, + or household purposes, or (2) anything designed or sold for incorporation + into a dwelling. In determining whether a product is a consumer product, + doubtful cases shall be resolved in favor of coverage. For a particular + product received by a particular user, "normally used" refers to a + typical or common use of that class of product, regardless of the status + of the particular user or of the way in which the particular user + actually uses, or expects or is expected to use, the product. A product + is a consumer product regardless of whether the product has substantial + commercial, industrial or non-consumer uses, unless such uses represent + the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, + procedures, authorization keys, or other information required to install + and execute modified versions of a covered work in that User Product from + a modified version of its Corresponding Source. The information must + suffice to ensure that the continued functioning of the modified object + code is in no case prevented or interfered with solely because + modification has been made. + + If you convey an object code work under this section in, or with, or + specifically for use in, a User Product, and the conveying occurs as + part of a transaction in which the right of possession and use of the + User Product is transferred to the recipient in perpetuity or for a + fixed term (regardless of how the transaction is characterized), the + Corresponding Source conveyed under this section must be accompanied + by the Installation Information. But this requirement does not apply + if neither you nor any third party retains the ability to install + modified object code on the User Product (for example, the work has + been installed in ROM). + + The requirement to provide Installation Information does not include a + requirement to continue to provide support service, warranty, or updates + for a work that has been modified or installed by the recipient, or for + the User Product in which it has been modified or installed. Access to a + network may be denied when the modification itself materially and + adversely affects the operation of the network or violates the rules and + protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, + in accord with this section must be in a format that is publicly + documented (and with an implementation available to the public in + source code form), and must require no special password or key for + unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this + License by making exceptions from one or more of its conditions. + Additional permissions that are applicable to the entire Program shall + be treated as though they were included in this License, to the extent + that they are valid under applicable law. If additional permissions + apply only to part of the Program, that part may be used separately + under those permissions, but the entire Program remains governed by + this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option + remove any additional permissions from that copy, or from any part of + it. (Additional permissions may be written to require their own + removal in certain cases when you modify the work.) You may place + additional permissions on material, added by you to a covered work, + for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you + add to a covered work, you may (if authorized by the copyright holders of + that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further + restrictions" within the meaning of section 10. If the Program as you + received it, or any part of it, contains a notice stating that it is + governed by this License along with a term that is a further + restriction, you may remove that term. If a license document contains + a further restriction but permits relicensing or conveying under this + License, you may add to a covered work material governed by the terms + of that license document, provided that the further restriction does + not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you + must place, in the relevant source files, a statement of the + additional terms that apply to those files, or a notice indicating + where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the + form of a separately written license, or stated as exceptions; + the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly + provided under this License. Any attempt otherwise to propagate or + modify it is void, and will automatically terminate your rights under + this License (including any patent licenses granted under the third + paragraph of section 11). + + However, if you cease all violation of this License, then your + license from a particular copyright holder is reinstated (a) + provisionally, unless and until the copyright holder explicitly and + finally terminates your license, and (b) permanently, if the copyright + holder fails to notify you of the violation by some reasonable means + prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is + reinstated permanently if the copyright holder notifies you of the + violation by some reasonable means, this is the first time you have + received notice of violation of this License (for any work) from that + copyright holder, and you cure the violation prior to 30 days after + your receipt of the notice. + + Termination of your rights under this section does not terminate the + licenses of parties who have received copies or rights from you under + this License. If your rights have been terminated and not permanently + reinstated, you do not qualify to receive new licenses for the same + material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or + run a copy of the Program. Ancillary propagation of a covered work + occurring solely as a consequence of using peer-to-peer transmission + to receive a copy likewise does not require acceptance. However, + nothing other than this License grants you permission to propagate or + modify any covered work. These actions infringe copyright if you do + not accept this License. Therefore, by modifying or propagating a + covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically + receives a license from the original licensors, to run, modify and + propagate that work, subject to this License. You are not responsible + for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an + organization, or substantially all assets of one, or subdividing an + organization, or merging organizations. If propagation of a covered + work results from an entity transaction, each party to that + transaction who receives a copy of the work also receives whatever + licenses to the work the party's predecessor in interest had or could + give under the previous paragraph, plus a right to possession of the + Corresponding Source of the work from the predecessor in interest, if + the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the + rights granted or affirmed under this License. For example, you may + not impose a license fee, royalty, or other charge for exercise of + rights granted under this License, and you may not initiate litigation + (including a cross-claim or counterclaim in a lawsuit) alleging that + any patent claim is infringed by making, using, selling, offering for + sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this + License of the Program or a work on which the Program is based. The + work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims + owned or controlled by the contributor, whether already acquired or + hereafter acquired, that would be infringed by some manner, permitted + by this License, of making, using, or selling its contributor version, + but do not include claims that would be infringed only as a + consequence of further modification of the contributor version. For + purposes of this definition, "control" includes the right to grant + patent sublicenses in a manner consistent with the requirements of + this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free + patent license under the contributor's essential patent claims, to + make, use, sell, offer for sale, import and otherwise run, modify and + propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express + agreement or commitment, however denominated, not to enforce a patent + (such as an express permission to practice a patent or covenant not to + sue for patent infringement). To "grant" such a patent license to a + party means to make such an agreement or commitment not to enforce a + patent against the party. + + If you convey a covered work, knowingly relying on a patent license, + and the Corresponding Source of the work is not available for anyone + to copy, free of charge and under the terms of this License, through a + publicly available network server or other readily accessible means, + then you must either (1) cause the Corresponding Source to be so + available, or (2) arrange to deprive yourself of the benefit of the + patent license for this particular work, or (3) arrange, in a manner + consistent with the requirements of this License, to extend the patent + license to downstream recipients. "Knowingly relying" means you have + actual knowledge that, but for the patent license, your conveying the + covered work in a country, or your recipient's use of the covered work + in a country, would infringe one or more identifiable patents in that + country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or + arrangement, you convey, or propagate by procuring conveyance of, a + covered work, and grant a patent license to some of the parties + receiving the covered work authorizing them to use, propagate, modify + or convey a specific copy of the covered work, then the patent license + you grant is automatically extended to all recipients of the covered + work and works based on it. + + A patent license is "discriminatory" if it does not include within + the scope of its coverage, prohibits the exercise of, or is + conditioned on the non-exercise of one or more of the rights that are + specifically granted under this License. You may not convey a covered + work if you are a party to an arrangement with a third party that is + in the business of distributing software, under which you make payment + to the third party based on the extent of your activity of conveying + the work, and under which the third party grants, to any of the + parties who would receive the covered work from you, a discriminatory + patent license (a) in connection with copies of the covered work + conveyed by you (or copies made from those copies), or (b) primarily + for and in connection with specific products or compilations that + contain the covered work, unless you entered into that arrangement, + or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting + any implied license or other defenses to infringement that may + otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot convey a + covered work so as to satisfy simultaneously your obligations under this + License and any other pertinent obligations, then as a consequence you may + not convey it at all. For example, if you agree to terms that obligate you + to collect a royalty for further conveying from those to whom you convey + the Program, the only way you could satisfy both those terms and this + License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have + permission to link or combine any covered work with a work licensed + under version 3 of the GNU Affero General Public License into a single + combined work, and to convey the resulting work. The terms of this + License will continue to apply to the part which is the covered work, + but the special requirements of the GNU Affero General Public License, + section 13, concerning interaction through a network will apply to the + combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of + the GNU General Public License from time to time. Such new versions will + be similar in spirit to the present version, but may differ in detail to + address new problems or concerns. + + Each version is given a distinguishing version number. If the + Program specifies that a certain numbered version of the GNU General + Public License "or any later version" applies to it, you have the + option of following the terms and conditions either of that numbered + version or of any later version published by the Free Software + Foundation. If the Program does not specify a version number of the + GNU General Public License, you may choose any version ever published + by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future + versions of the GNU General Public License can be used, that proxy's + public statement of acceptance of a version permanently authorizes you + to choose that version for the Program. + + Later license versions may give you additional or different + permissions. However, no additional obligations are imposed on any + author or copyright holder as a result of your choosing to follow a + later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY + APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT + HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY + OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, + THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM + IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF + ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS + THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY + GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE + USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF + DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD + PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), + EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF + SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided + above cannot be given local legal effect according to their terms, + reviewing courts shall apply local law that most closely approximates + an absolute waiver of all civil liability in connection with the + Program, unless a warranty or assumption of liability accompanies a + copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest + possible use to the public, the best way to achieve this is to make it + free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest + to attach them to the start of each source file to most effectively + state the exclusion of warranty; and each file should have at least + the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short + notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + + The hypothetical commands `show w' and `show c' should show the appropriate + parts of the General Public License. Of course, your program's commands + might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, + if any, to sign a "copyright disclaimer" for the program, if necessary. + For more information on this, and how to apply and follow the GNU GPL, see + . + + The GNU General Public License does not permit incorporating your program + into proprietary programs. If your program is a subroutine library, you + may consider it more useful to permit linking proprietary applications with + the library. If this is what you want to do, use the GNU Lesser General + Public License instead of this License. But first, please read + . + + +------------------------------------------------------------------ +6. tslib + Copyright (c) Microsoft Corporation. + + BSD Zero Clause License + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. + + +------------------------------------------------------------------ +7. sha.js + Copyright (c) 2013-2018 sha.js contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +Copyright (c) 1998 - 2009, Paul Johnston & Contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +Redistributions in binary form must reproduce the above copyright notice, this +list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +Neither the name of the author nor the names of its contributors may be used to +endorse or promote products derived from this software without specific prior +written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------ +8. FastFormers + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE + + + ====================================================================================== + + The FastFormers project contains several components with separate copyright + notices and license terms. Your use of the source code for the these + components is subject to the terms and conditions of the following + licenses. + + ====================================================================================== + MIT License + + -------------------------------------------------------------------------------------- + 1. onnxruntime from Microsoft + + Same as above license. + + + ====================================================================================== + Apache-2.0 licenses + + -------------------------------------------------------------------------------------- + 1. HuggingFace's transformers, Alex' SuperGLUE transformers + 2. TinyBERT from Huawei + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ====================================================================================== + BSD Lisence + + -------------------------------------------------------------------------------------- + 1. FBGEMM from Facebook + + BSD License + + For FBGEMM software + + Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + -------------------------------------------------------------------------------------- + 2. PyTorch + + From PyTorch: + + Copyright (c) 2016- Facebook, Inc (Adam Paszke) + Copyright (c) 2014- Facebook, Inc (Soumith Chintala) + Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) + Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) + Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) + Copyright (c) 2011-2013 NYU (Clement Farabet) + Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) + Copyright (c) 2006 Idiap Research Institute (Samy Bengio) + Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) + + From Caffe2: + + Copyright (c) 2016-present, Facebook Inc. All rights reserved. + + All contributions by Facebook: + Copyright (c) 2016 Facebook Inc. + + All contributions by Google: + Copyright (c) 2015 Google Inc. + All rights reserved. + + All contributions by Yangqing Jia: + Copyright (c) 2015 Yangqing Jia + All rights reserved. + + All contributions by Kakao Brain: + Copyright 2019-2020 Kakao Brain + + All contributions from Caffe: + Copyright(c) 2013, 2014, 2015, the respective contributors + All rights reserved. + + All other contributions: + Copyright(c) 2015, 2016 the respective contributors + All rights reserved. + + Caffe2 uses a copyright model similar to Caffe: each contributor holds + copyright over their contributions to Caffe2. The project versioning records + all such contribution and copyright details. If a contributor wants to further + mark their specific copyright on a particular contribution, they should + indicate their copyright solely in the commit message of the change when it is + committed. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America + and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------- +9. protobuf + Copyright 2008 Google Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. +------------------------------------------------------------------ + +The following third party programs have their own third party program files. These additional +third party program files are as follows: + +1. Intel® AI Quantization Tools for Tensorflow*: python3.7/site-packages/neural_compressor-1.11.dist-info +------------------------------------------------------------- +Other names and brands may be claimed as the property of others \ No newline at end of file diff --git a/nlp_toolkit/optimization/optimizer.py b/intel_extension_for_transformers/optimization/optimizer.py similarity index 97% rename from nlp_toolkit/optimization/optimizer.py rename to intel_extension_for_transformers/optimization/optimizer.py index 5c003b508f4..c5e9d4efde2 100644 --- a/nlp_toolkit/optimization/optimizer.py +++ b/intel_extension_for_transformers/optimization/optimizer.py @@ -24,14 +24,14 @@ Distillation, ) from neural_compressor.experimental.scheduler import Scheduler -from nlp_toolkit import( +from intel_extension_for_transformers import( DistillationConfig, Provider, QuantizationConfig, PruningConfig ) -from nlp_toolkit.optimization.utils.utility import LazyImport -from nlp_toolkit.optimization.quantization import QuantizationMode +from intel_extension_for_transformers.optimization.utils.utility import LazyImport +from intel_extension_for_transformers.optimization.quantization import QuantizationMode from transformers import PreTrainedModel from transformers.file_utils import WEIGHTS_NAME from typing import Callable, Optional, Union, List @@ -177,7 +177,7 @@ def init_quantizer( # pylint: disable=E0401 def _nncf_quantize(self): # pragma: no cover - from nlp_toolkit import NncfConfig + from intel_extension_for_transformers import NncfConfig from nncf import create_compressed_model compression_state = None assert isinstance(self.quant_config, NncfConfig), \ diff --git a/nlp_toolkit/optimization/optimizer_tf.py b/intel_extension_for_transformers/optimization/optimizer_tf.py similarity index 98% rename from nlp_toolkit/optimization/optimizer_tf.py rename to intel_extension_for_transformers/optimization/optimizer_tf.py index f6daf33f26e..85047312dd2 100644 --- a/nlp_toolkit/optimization/optimizer_tf.py +++ b/intel_extension_for_transformers/optimization/optimizer_tf.py @@ -24,10 +24,10 @@ from neural_compressor.experimental import common from neural_compressor.model.model import saved_model_session from neural_compressor.model.model import get_model_type -from nlp_toolkit import (DistillationConfig, QuantizationConfig, PruningConfig, AutoDistillation) -from nlp_toolkit.optimization.quantization import QuantizationMode -from nlp_toolkit.optimization.utils.metrics import Metric -from nlp_toolkit.optimization.utils.utility import LazyImport +from intel_extension_for_transformers import (DistillationConfig, QuantizationConfig, PruningConfig, AutoDistillation) +from intel_extension_for_transformers.optimization.quantization import QuantizationMode +from intel_extension_for_transformers.optimization.utils.metrics import Metric +from intel_extension_for_transformers.optimization.utils.utility import LazyImport from packaging import version from transformers import PreTrainedModel from transformers.training_args_tf import TFTrainingArguments diff --git a/nlp_toolkit/optimization/pipeline.py b/intel_extension_for_transformers/optimization/pipeline.py similarity index 94% rename from nlp_toolkit/optimization/pipeline.py rename to intel_extension_for_transformers/optimization/pipeline.py index d5be47f7f45..32898a84ffa 100644 --- a/nlp_toolkit/optimization/pipeline.py +++ b/intel_extension_for_transformers/optimization/pipeline.py @@ -41,12 +41,12 @@ def infer_framework_load_model( `Tuple`: A tuple framework, model. """ logger.warning("Function transformers.pipelines.base.infer_framework_load_model is replaced " - "by nlp_toolkit.optimization.pipeline.") + "by intel_extension_for_transformers.optimization.pipeline.") backend = model_kwargs['backend'] if 'backend' in model_kwargs else None if isinstance(model, str): if backend == 'executor': - from nlp_toolkit.backends.neural_engine.compile import compile + from intel_extension_for_transformers.backends.neural_engine.compile import compile model = compile(model) model.__call__= model.inference model.config = config diff --git a/nlp_toolkit/optimization/pruning.py b/intel_extension_for_transformers/optimization/pruning.py similarity index 100% rename from nlp_toolkit/optimization/pruning.py rename to intel_extension_for_transformers/optimization/pruning.py diff --git a/nlp_toolkit/optimization/pytorch_pruner/__init__.py b/intel_extension_for_transformers/optimization/pytorch_pruner/__init__.py similarity index 100% rename from nlp_toolkit/optimization/pytorch_pruner/__init__.py rename to intel_extension_for_transformers/optimization/pytorch_pruner/__init__.py diff --git a/nlp_toolkit/optimization/pytorch_pruner/patterns.py b/intel_extension_for_transformers/optimization/pytorch_pruner/patterns.py similarity index 100% rename from nlp_toolkit/optimization/pytorch_pruner/patterns.py rename to intel_extension_for_transformers/optimization/pytorch_pruner/patterns.py diff --git a/nlp_toolkit/optimization/pytorch_pruner/prune_utils.py b/intel_extension_for_transformers/optimization/pytorch_pruner/prune_utils.py similarity index 100% rename from nlp_toolkit/optimization/pytorch_pruner/prune_utils.py rename to intel_extension_for_transformers/optimization/pytorch_pruner/prune_utils.py diff --git a/nlp_toolkit/optimization/pytorch_pruner/pruner.py b/intel_extension_for_transformers/optimization/pytorch_pruner/pruner.py similarity index 100% rename from nlp_toolkit/optimization/pytorch_pruner/pruner.py rename to intel_extension_for_transformers/optimization/pytorch_pruner/pruner.py diff --git a/nlp_toolkit/optimization/pytorch_pruner/pruning.py b/intel_extension_for_transformers/optimization/pytorch_pruner/pruning.py similarity index 100% rename from nlp_toolkit/optimization/pytorch_pruner/pruning.py rename to intel_extension_for_transformers/optimization/pytorch_pruner/pruning.py diff --git a/nlp_toolkit/optimization/pytorch_pruner/scheduler.py b/intel_extension_for_transformers/optimization/pytorch_pruner/scheduler.py similarity index 100% rename from nlp_toolkit/optimization/pytorch_pruner/scheduler.py rename to intel_extension_for_transformers/optimization/pytorch_pruner/scheduler.py diff --git a/nlp_toolkit/optimization/quantization.py b/intel_extension_for_transformers/optimization/quantization.py similarity index 100% rename from nlp_toolkit/optimization/quantization.py rename to intel_extension_for_transformers/optimization/quantization.py diff --git a/nlp_toolkit/optimization/trainer.py b/intel_extension_for_transformers/optimization/trainer.py similarity index 99% rename from nlp_toolkit/optimization/trainer.py rename to intel_extension_for_transformers/optimization/trainer.py index 3bb92cf39c2..fa0a7b475df 100644 --- a/nlp_toolkit/optimization/trainer.py +++ b/intel_extension_for_transformers/optimization/trainer.py @@ -29,7 +29,7 @@ from neural_compressor.experimental import Component from neural_compressor.model.torch_model import PyTorchIpexModel from neural_compressor.utils import logger -from nlp_toolkit import ( +from intel_extension_for_transformers import ( AutoDistillation, DistillationConfig, Provider, @@ -39,8 +39,8 @@ PruningConfig, DynamicLengthConfig, ) -from nlp_toolkit.optimization.utils.metrics import Metric -from nlp_toolkit.optimization.utils.utility import LazyImport +from intel_extension_for_transformers.optimization.utils.metrics import Metric +from intel_extension_for_transformers.optimization.utils.utility import LazyImport from packaging import version from tqdm.auto import tqdm from transformers import __version__, Seq2SeqTrainer, Trainer, PreTrainedModel @@ -237,7 +237,7 @@ def init_quantizer( # pylint: disable=E0401 def _nncf_quantize(self): # pragma: no cover - from nlp_toolkit import NncfConfig + from intel_extension_for_transformers import NncfConfig from nncf import create_compressed_model compression_state = None assert isinstance(self.quant_config, NncfConfig), \ @@ -458,7 +458,7 @@ def orchestrate_optimizations( eval_func: Optional[Callable] = None, train_func: Optional[Callable] = None, ): - from nlp_toolkit.optimization.optimizer import Orchestrate_optimizer + from intel_extension_for_transformers.optimization.optimizer import Orchestrate_optimizer self.orchestrate_opt = True self._eval_func = self.builtin_eval_func if eval_func is None else eval_func self._train_func = self.builtin_train_func if train_func is None else train_func @@ -763,7 +763,7 @@ def train( if hasattr(self.component, "teacher_model"): self.component.teacher_model._model = self._wrap_model( self.component.teacher_model.model) - component.pre_epoch_begin() + component.pre_epoch_begin(self.calib_dataloader if self.calib_dataloader else None) if component.combination is not None and "Quantization" in component.combination: model = component.model.model for epoch in range(epochs_trained, num_train_epochs): diff --git a/nlp_toolkit/optimization/utils/__init__.py b/intel_extension_for_transformers/optimization/utils/__init__.py similarity index 100% rename from nlp_toolkit/optimization/utils/__init__.py rename to intel_extension_for_transformers/optimization/utils/__init__.py diff --git a/nlp_toolkit/optimization/utils/metrics.py b/intel_extension_for_transformers/optimization/utils/metrics.py similarity index 100% rename from nlp_toolkit/optimization/utils/metrics.py rename to intel_extension_for_transformers/optimization/utils/metrics.py diff --git a/nlp_toolkit/optimization/utils/models/__init__.py b/intel_extension_for_transformers/optimization/utils/models/__init__.py similarity index 100% rename from nlp_toolkit/optimization/utils/models/__init__.py rename to intel_extension_for_transformers/optimization/utils/models/__init__.py diff --git a/nlp_toolkit/optimization/utils/models/modeling_bert_dynamic.py b/intel_extension_for_transformers/optimization/utils/models/modeling_bert_dynamic.py similarity index 100% rename from nlp_toolkit/optimization/utils/models/modeling_bert_dynamic.py rename to intel_extension_for_transformers/optimization/utils/models/modeling_bert_dynamic.py diff --git a/nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py b/intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py similarity index 100% rename from nlp_toolkit/optimization/utils/models/modeling_roberta_dynamic.py rename to intel_extension_for_transformers/optimization/utils/models/modeling_roberta_dynamic.py diff --git a/nlp_toolkit/optimization/utils/objectives.py b/intel_extension_for_transformers/optimization/utils/objectives.py similarity index 100% rename from nlp_toolkit/optimization/utils/objectives.py rename to intel_extension_for_transformers/optimization/utils/objectives.py diff --git a/nlp_toolkit/optimization/utils/utility.py b/intel_extension_for_transformers/optimization/utils/utility.py similarity index 100% rename from nlp_toolkit/optimization/utils/utility.py rename to intel_extension_for_transformers/optimization/utils/utility.py diff --git a/nlp_toolkit/optimization/utils/utility_tf.py b/intel_extension_for_transformers/optimization/utils/utility_tf.py similarity index 100% rename from nlp_toolkit/optimization/utils/utility_tf.py rename to intel_extension_for_transformers/optimization/utils/utility_tf.py diff --git a/nlp_toolkit/preprocessing/__init__.py b/intel_extension_for_transformers/preprocessing/__init__.py similarity index 100% rename from nlp_toolkit/preprocessing/__init__.py rename to intel_extension_for_transformers/preprocessing/__init__.py diff --git a/nlp_toolkit/preprocessing/data_augmentation.py b/intel_extension_for_transformers/preprocessing/data_augmentation.py similarity index 99% rename from nlp_toolkit/preprocessing/data_augmentation.py rename to intel_extension_for_transformers/preprocessing/data_augmentation.py index 1343054e047..6e6560868bf 100644 --- a/nlp_toolkit/preprocessing/data_augmentation.py +++ b/intel_extension_for_transformers/preprocessing/data_augmentation.py @@ -21,7 +21,7 @@ import numpy as np import os from datasets import load_dataset -from nlp_toolkit.optimization.utils.utility import LazyImport +from intel_extension_for_transformers.optimization.utils.utility import LazyImport from operator import methodcaller from tqdm import tqdm from .utils import AugmenterType, get_augmenter_from_type diff --git a/nlp_toolkit/preprocessing/utils.py b/intel_extension_for_transformers/preprocessing/utils.py similarity index 100% rename from nlp_toolkit/preprocessing/utils.py rename to intel_extension_for_transformers/preprocessing/utils.py diff --git a/nlp_toolkit/version.py b/intel_extension_for_transformers/version.py similarity index 96% rename from nlp_toolkit/version.py rename to intel_extension_for_transformers/version.py index 92023513c23..959e87565d4 100644 --- a/nlp_toolkit/version.py +++ b/intel_extension_for_transformers/version.py @@ -15,4 +15,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.4.0" +__version__ = "1.0b" diff --git a/nlp_toolkit/backends/neural_engine/docs/onnx_quantize.md b/nlp_toolkit/backends/neural_engine/docs/onnx_quantize.md deleted file mode 100644 index 20dacc5ecbd..00000000000 --- a/nlp_toolkit/backends/neural_engine/docs/onnx_quantize.md +++ /dev/null @@ -1,107 +0,0 @@ -# Quantize a ONNX model to engine low precision/int8 IR - -## Design -Quantizing a ONNX model to engine low precision/int8 IR has two steps: 1. Convert ONNX model to engine float IR; 2. Quantize float IR to low precision/int8 IR. The first step will be finished in engine compile. We focus on the second step how to quantize a float engine IR to low precision IR in INC. The whole is in examples/engine/nlp/bert_base_mrpc. - -## Prerequisite -### Install environment -```shell -cd /examples/deepengine/nlp/distilbert_base_uncased_mrpc -conda create -n python=3.7 -conda activate -pip install -r requirements.txt -``` -### Prepare Dataset -```python -python prepare_dataset.py --tasks='MRPC' --output_dir=./data -``` -### Prepare ONNX model -```shell -bash prepare_model.sh -``` - -## Tuning and benchmark -### Tune the low precision model -The process of converting a float model to a low precision model in INC is called tuning. -The whole workflow is as follow: ![avatar](imgs/engine_adaptor_workflow.png) Now the const tensor has symmetric quantization mode, and it also can be quantized by per channel or per tensor two. Activation tensor has asymmetric and symmetric two modes. -And there is a simple example to show how float model tuned to low precision model, like this ![avatar](imgs/engine_adaptor_example.png) -It quantizes the all int8 operators by calibration dataset. In order to meet the accuracy requirements, it will run low precision models and determine whether the relative accuracy between low precision model with float model is within the limited range. If not, it will recall some int8 operators to float. So the object quantizer need dataset, model and metric. -```python -from neural_compressor.experimental import Quantization, common -ds = TF_BERTDataSet(args.data_dir, args.vocab_file, args.do_lower_case) -quantizer = Quantization(args.config) -quantizer.model = args.input_model -quantizer.eval_dataloader = common.DataLoader(ds, args.batch_size) -quantizer.calib_dataloader = common.DataLoader(ds, args.batch_size) -q_model = quantizer.fit() -q_model.save(args.output_model) -``` -There are several build-in metrics in INC. In this example, MRPC is used and configured in bert.yaml: -```shell -evaluation: - accuracy: - metric: - GLUE: - task: mrpc -``` - -### Benchmark the tuned low precision model: -User can also run tuned low precision model on benchmark dataset and get the accuracy and performance of it by configuring mode=accuracy or mode=performance. -```python -from neural_compressor.experimental import Benchmark, common -ds = TF_BERTDataSet(args.data_dir, args.vocab_file, args.do_lower_case) -evaluator = Benchmark(args.config) -evaluator.model = args.input_model -evaluator.b_dataloader = common.DataLoader(ds, args.batch_size) -evaluator(args.mode) -``` - -### Config for tuning and benchmark: -The yaml can config sampling size of calibration dataset for quantization. And the accuracy criterion is generally 1% relative error. Or if you only want to quantize but no callback, you can set timeout to 1 for tuning exit policy. -```shell -quantization: - calibration: - sampling_size: 40 - -tuning: - accuracy_criterion: - relative: 0.01 - exit_policy: - timeout: 0 - random_seed: 9527 -``` - -And if you want to get performance, setting num of instance and cores per instance according to your device is needed. In addition, don't remember to set warmup and iteration. It's better if warmup is more than 5 and iteration is more than 10. -```shell - performance: - warmup: 5 - iteration: 10 - configs: - num_of_instance: 1 - cores_per_instance: 28 -``` - -Each model has its own metric to get accuracy. INC also provides some metrics for users and you only need to set task in yaml as follows: -```shell -evaluation: - accuracy: - metric: - GLUE: - task: mrpc -``` - -## Run tuning and benchmark -Users can run shell to tune model and get its accuracy and performance. -### 1. To get the tuned model and its accuracy: -```shell -bash run_tuning.sh --config=bert.yaml --input_model=roberta_base_mrpc.onnx --output_model=ir --dataset_location=data -``` - -### 2. To get the benchmark of tuned model: -```shell -bash run_benchmark.sh --config=bert.yaml --input_model=ir --dataset_location=data --batch_size=1 --mode=accuracy -``` - -```shell -bash run_benchmark.sh --config=bert.yaml --input_model=ir --dataset_location=data --batch_size=1 --mode=performance -``` diff --git a/nlp_toolkit/backends/openvino/nncf_utils.py b/nlp_toolkit/backends/openvino/nncf_utils.py deleted file mode 100644 index cf1a0156748..00000000000 --- a/nlp_toolkit/backends/openvino/nncf_utils.py +++ /dev/null @@ -1,56 +0,0 @@ -import nncf -import torch -from torch import Tensor, device, nn - - -NNCF_PT_STATE_NAME = "nncf_state.bin" - -def get_nncf_train_dataloader_for_init(args, train_dataset, data_collator=None): - from torch.utils.data import RandomSampler - from torch.utils.data import DistributedSampler - train_sampler = ( - RandomSampler(train_dataset) - if args.local_rank == -1 - else DistributedSampler(train_dataset) - ) - - if data_collator is None: - from transformers.data.data_collator import default_data_collator - data_collator = default_data_collator - - from torch.utils.data import DataLoader - data_loader = DataLoader( - train_dataset, - batch_size=args.train_batch_size, - sampler=train_sampler, - collate_fn=data_collator, - drop_last=args.dataloader_drop_last, - ) - return data_loader - - -@nncf.torch.register_module() -class Conv1D(nn.Module): - """ - 1D-convolutional layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2). - - Basically works like a linear layer but the weights are transposed. - - Args: - nf (:obj:`int`): The number of output features. - nx (:obj:`int`): The number of input features. - """ - - def __init__(self, nf, nx): - super().__init__() - self.nf = nf - w = torch.empty(nx, nf) - nn.init.normal_(w, std=0.02) - self.weight = nn.Parameter(w) - self.bias = nn.Parameter(torch.zeros(nf)) - - def forward(self, x): - size_out = x.size()[:-1] + (self.nf,) - x = torch.addmm(self.bias, x.view(-1, x.size(-1)), self.weight) - x = x.view(*size_out) - return x diff --git a/setup.py b/setup.py index 1cb70fd1342..86e07137840 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ cwd = os.path.dirname(os.path.abspath(__file__)) try: - filepath = './nlp_toolkit/version.py' + filepath = './intel_extension_for_transformers/version.py' with open(filepath) as version_file: __version__, = re.findall('__version__ = "(.*)"', version_file.read()) except Exception as error: @@ -172,19 +172,19 @@ def not_exists_or_empty(folder): check_submodules() setup( - name="nlp_toolkit", + name="intel_extension_for_transformers", version=__version__, author="Intel AIA/AIPC Team", author_email= "feng.tian@intel.com, haihao.shen@intel.com,hanwen.chang@intel.com, penghui.cheng@intel.com", - description="Repository of Intel® NLP toolkit", + description="Repository of Intel® Intel Extension for Transformers", long_description=open("README.md", "r", encoding='utf-8').read(), long_description_content_type="text/markdown", keywords= 'quantization, auto-tuning, post-training static quantization, post-training dynamic quantization, quantization-aware training, tuning strategy', license='Apache 2.0', url="https://github.com/intel/", - ext_modules=[CMakeExtension("neural_engine_py", str(cwd) + '/nlp_toolkit/backends/neural_engine/')], + ext_modules=[CMakeExtension("neural_engine_py", str(cwd) + '/intel_extension_for_transformers/backends/neural_engine/')], packages = find_packages(), include_package_data = True, package_dir = {'':'.'}, @@ -196,8 +196,8 @@ def not_exists_or_empty(folder): 'build_ext': build_ext, }, install_requires=['numpy', 'transformers>=4.12.0', 'packaging'], - scripts=['nlp_toolkit/backends/neural_engine/bin/neural_engine'], - python_requires='>=3.6.0', + scripts=['intel_extension_for_transformers/backends/neural_engine/bin/neural_engine'], + python_requires='>=3.7.0', classifiers=[ 'Intended Audience :: Science/Research', 'Programming Language :: Python :: 3', diff --git a/tests/test_autodistillation.py b/tests/test_autodistillation.py index 44c4e819d40..2ef6deaacb9 100644 --- a/tests/test_autodistillation.py +++ b/tests/test_autodistillation.py @@ -3,13 +3,13 @@ import torch import torch.utils.data as data import unittest -from nlp_toolkit import ( +from intel_extension_for_transformers import ( AutoDistillationConfig, FlashDistillationConfig, metrics, ) -from nlp_toolkit.optimization.trainer import NLPTrainer -from nlp_toolkit.optimization.utils.utility import distributed_init +from intel_extension_for_transformers.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.utils.utility import distributed_init from transformers import ( AutoModelForPreTraining, AutoTokenizer, @@ -67,8 +67,8 @@ def main_worker(rank, world_size, model, teacher_model, dataset): class DummyDataset(data.Dataset): def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") - self.sequence_a = "NLP-toolkit is based in SH" - self.sequence_b = "Where is NLP-toolkit based? NYC or SH" + self.sequence_a = "intel-extension-for-transformers is based in SH" + self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH" self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b) self.encoded_dict['labels'] = [-100] * len(self.encoded_dict['input_ids']) self.encoded_dict['labels'][1] = 17953 diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 8d51378d563..93649ff631f 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -2,12 +2,12 @@ import shutil import unittest import neural_compressor.adaptor.pytorch as nc_torch -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, objectives, QuantizationConfig, ) -from nlp_toolkit.optimization.benchmark import ( +from intel_extension_for_transformers.optimization.benchmark import ( PyTorchBenchmark, PyTorchBenchmarkArguments, ExecutorBenchmark, diff --git a/tests/test_config.py b/tests/test_config.py index e8cfa54c21a..0c8fe17093f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -3,7 +3,7 @@ import torch import unittest -from nlp_toolkit import ( +from intel_extension_for_transformers import ( DistillationConfig, metrics, objectives, @@ -14,12 +14,12 @@ FlashDistillationConfig, TFOptimization, ) -from nlp_toolkit.optimization.distillation import Criterion as DistillationCriterion -from nlp_toolkit.optimization.distillation import DistillationCriterionMode -from nlp_toolkit.optimization.trainer import NLPTrainer -from nlp_toolkit.optimization.utils.objectives import Objective -from nlp_toolkit.optimization.utils.utility_tf import TFDataloader -from nlp_toolkit.preprocessing.data_augmentation import DataAugmentation +from intel_extension_for_transformers.optimization.distillation import Criterion as DistillationCriterion +from intel_extension_for_transformers.optimization.distillation import DistillationCriterionMode +from intel_extension_for_transformers.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.utils.objectives import Objective +from intel_extension_for_transformers.optimization.utils.utility_tf import TFDataloader +from intel_extension_for_transformers.preprocessing.data_augmentation import DataAugmentation from transformers import ( AutoModelForPreTraining, diff --git a/tests/test_data_augmentation.py b/tests/test_data_augmentation.py index 84d3a38d72f..7bcceb06a3f 100644 --- a/tests/test_data_augmentation.py +++ b/tests/test_data_augmentation.py @@ -3,12 +3,12 @@ import unittest from datasets import load_dataset -from nlp_toolkit.preprocessing.data_augmentation import DataAugmentation +from intel_extension_for_transformers.preprocessing.data_augmentation import DataAugmentation def build_fake_dataset(save_path): from datasets import load_dataset - from nlp_toolkit.preprocessing.utils import EOS + from intel_extension_for_transformers.preprocessing.utils import EOS split = 'validation' count = 10 diff --git a/tests/test_distillation.py b/tests/test_distillation.py index 0071c9a4b80..8365712a2bd 100644 --- a/tests/test_distillation.py +++ b/tests/test_distillation.py @@ -6,15 +6,15 @@ import torch.utils.data as data import unittest from datasets import load_dataset, load_metric -from nlp_toolkit import ( +from intel_extension_for_transformers import ( DistillationConfig, DistillationCriterionMode, metrics, OptimizedModel, NoTrainerOptimizer ) -from nlp_toolkit.optimization.trainer import NLPTrainer -from nlp_toolkit.optimization.distillation import Criterion +from intel_extension_for_transformers.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.distillation import Criterion from transformers import ( AutoModelForSequenceClassification, AutoTokenizer, diff --git a/tests/test_dynamic_length.py b/tests/test_dynamic_length.py index 33f3cf761a7..a9aa794ec6f 100644 --- a/tests/test_dynamic_length.py +++ b/tests/test_dynamic_length.py @@ -8,15 +8,15 @@ import logging import numpy as np -from nlp_toolkit.optimization.trainer import NLPTrainer -from nlp_toolkit.optimization.utils.models.modeling_roberta_dynamic import RobertaForQuestionAnswering -from nlp_toolkit.optimization.utils.models.modeling_bert_dynamic import BertForQuestionAnswering -from nlp_toolkit.optimization.dynamic.drop_and_restore_utils import ( +from intel_extension_for_transformers.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.utils.models.modeling_roberta_dynamic import RobertaForQuestionAnswering +from intel_extension_for_transformers.optimization.utils.models.modeling_bert_dynamic import BertForQuestionAnswering +from intel_extension_for_transformers.optimization.dynamic.drop_and_restore_utils import ( sample_length_configuration, sample_layer_configuration ) -from nlp_toolkit.optimization.dynamic.evolution import ( +from intel_extension_for_transformers.optimization.dynamic.evolution import ( approx_ratio, inverse, store2str, Evolution ) @@ -30,7 +30,7 @@ TrainingArguments, ) -from nlp_toolkit import ( +from intel_extension_for_transformers import ( DynamicLengthConfig, ) @@ -55,8 +55,8 @@ class DummyDataset(data.Dataset): def __init__(self, labels=False, type=None): MODEL_NAME = BERT_MODEL if type=='bert' else MINILM_MODEL self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) - self.sequence_a = "NLP-toolkit is based in SH" - self.sequence_b = "Where is NLP-toolkit based?" + self.sequence_a = "intel-extension-for-transformers is based in SH" + self.sequence_b = "Where is intel-extension-for-transformers based?" self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, padding="max_length", max_length=MAX_LENGTH) if labels: self.encoded_dict['start_positions'] = [21] diff --git a/tests/test_orchestrate_optimization.py b/tests/test_orchestrate_optimization.py index 3d391dd8e48..96b47602cd5 100644 --- a/tests/test_orchestrate_optimization.py +++ b/tests/test_orchestrate_optimization.py @@ -7,7 +7,7 @@ import unittest import neural_compressor from datasets import load_dataset, load_metric -from nlp_toolkit import ( +from intel_extension_for_transformers import ( PrunerConfig, PruningConfig, DistillationConfig, @@ -17,8 +17,8 @@ objectives, OptimizedModel, ) -from nlp_toolkit.optimization.trainer import NLPTrainer -from nlp_toolkit.optimization.distillation import Criterion +from intel_extension_for_transformers.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.distillation import Criterion from transformers import ( AutoModelForSequenceClassification, @@ -73,6 +73,7 @@ def compute_metrics(p): eval_dataset=self.dataset, compute_metrics=compute_metrics, ) + self.trainer.calib_dataloader = self.trainer.get_eval_dataloader() tune_metric = metrics.Metric( name="eval_accuracy", is_relative=True, criterion=0.5 ) @@ -98,4 +99,4 @@ def compute_metrics(p): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index f881965609e..22f339e378f 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,7 +1,7 @@ import os import unittest import neural_compressor.adaptor.pytorch as nc_torch -from nlp_toolkit.optimization.pipeline import pipeline +from intel_extension_for_transformers.optimization.pipeline import pipeline from packaging.version import Version from transformers import ( AutoConfig, diff --git a/tests/test_pruning.py b/tests/test_pruning.py index 8e698e3115d..6bb4e954d4a 100644 --- a/tests/test_pruning.py +++ b/tests/test_pruning.py @@ -3,7 +3,7 @@ import shutil import torch.utils.data as data import unittest -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, OptimizedModel, PrunerConfig, @@ -11,7 +11,7 @@ PruningMode, NoTrainerOptimizer ) -from nlp_toolkit.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer from transformers import ( AutoModelForSequenceClassification, AutoTokenizer @@ -24,8 +24,8 @@ class DummyDataset(data.Dataset): def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") - self.sequence_a = "NLP-toolkit is based in SH" - self.sequence_b = "Where is NLP-toolkit based? NYC or SH" + self.sequence_a = "intel-extension-for-transformers is based in SH" + self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH" self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b) self.encoded_dict['labels'] = 1 diff --git a/tests/test_quantization.py b/tests/test_quantization.py index c8f0370ef05..18316d898dc 100644 --- a/tests/test_quantization.py +++ b/tests/test_quantization.py @@ -5,7 +5,7 @@ import torch import torch.utils.data as data import unittest -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, objectives, OptimizedModel, @@ -13,8 +13,8 @@ QuantizationMode, NoTrainerOptimizer, ) -from nlp_toolkit.optimization.trainer import NLPTrainer -from nlp_toolkit.optimization.trainer import NLPSeq2SeqTrainer +from intel_extension_for_transformers.optimization.trainer import NLPTrainer +from intel_extension_for_transformers.optimization.trainer import NLPSeq2SeqTrainer from transformers import ( AutoModelForSequenceClassification, AutoTokenizer, @@ -28,8 +28,8 @@ class DummyDataset(data.Dataset): def __init__(self): self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) - self.sequence_a = "NLP-toolkit is based in SH" - self.sequence_b = "Where is NLP-toolkit based? NYC or SH" + self.sequence_a = "intel-extension-for-transformers is based in SH" + self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH" self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b) self.encoded_dict['labels'] = 1 diff --git a/tests/test_tf_autodistillation.py b/tests/test_tf_autodistillation.py index 5808ccdc247..258aad8e210 100644 --- a/tests/test_tf_autodistillation.py +++ b/tests/test_tf_autodistillation.py @@ -6,13 +6,13 @@ from transformers import (TFAutoModelForSequenceClassification, AutoTokenizer, DefaultDataCollator, HfArgumentParser, TFTrainingArguments, set_seed) -from nlp_toolkit import ( +from intel_extension_for_transformers import ( AutoDistillationConfig, TFDistillationConfig, metrics, ) -from nlp_toolkit.optimization.optimizer_tf import TFOptimization -from nlp_toolkit.optimization.utils.utility_tf import distributed_init +from intel_extension_for_transformers.optimization.optimizer_tf import TFOptimization +from intel_extension_for_transformers.optimization.utils.utility_tf import distributed_init def compute_metrics(preds, label_ids): metric = load_metric("glue", "sst2") diff --git a/tests/test_tf_distillation.py b/tests/test_tf_distillation.py index c02506ce129..53fba1aa424 100644 --- a/tests/test_tf_distillation.py +++ b/tests/test_tf_distillation.py @@ -6,9 +6,9 @@ from transformers import (TFAutoModelForSequenceClassification, AutoTokenizer, HfArgumentParser, TFTrainingArguments, set_seed, DefaultDataCollator) -from nlp_toolkit import (DistillationConfig, metrics) -from nlp_toolkit.optimization.distillation import Criterion -from nlp_toolkit.optimization.optimizer_tf import TFOptimization +from intel_extension_for_transformers import (DistillationConfig, metrics) +from intel_extension_for_transformers.optimization.distillation import Criterion +from intel_extension_for_transformers.optimization.optimizer_tf import TFOptimization class TestDistillation(unittest.TestCase): diff --git a/tests/test_tf_pruning.py b/tests/test_tf_pruning.py index 102b97cae21..236af0d348f 100644 --- a/tests/test_tf_pruning.py +++ b/tests/test_tf_pruning.py @@ -1,11 +1,11 @@ -from nlp_toolkit.optimization.utils.utility_tf import get_filepath +from intel_extension_for_transformers.optimization.utils.utility_tf import get_filepath import numpy as np import os import shutil import tensorflow as tf import unittest from datasets import load_dataset, load_metric -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, PrunerConfig, PruningConfig, @@ -77,13 +77,13 @@ def tearDownClass(self): def test_tf_model_quant(self): # check whether it is possible to set distributed environment # only for coverage currently - from nlp_toolkit.optimization.utils.utility_tf import distributed_init + from intel_extension_for_transformers.optimization.utils.utility_tf import distributed_init distributed_init(["localhost:12345","localhost:23456"], "worker", 0) self.assertTrue(os.environ['TF_CONFIG'] != None) del os.environ['TF_CONFIG'] # check whether filepath can be set correctly if using distributed environment # only for coverage currently - from nlp_toolkit.optimization.utils.utility_tf import get_filepath + from intel_extension_for_transformers.optimization.utils.utility_tf import get_filepath self.assertTrue(type(get_filepath("dummy", "worker", 0)) == str) self.assertTrue(type(get_filepath("dummy", "worker", 1)) == str) self.assertTrue(get_filepath("dummy", "worker", 0) != get_filepath("dummy", "worker", 1)) diff --git a/tests/test_tf_quantization.py b/tests/test_tf_quantization.py index f465201194c..c5175982907 100644 --- a/tests/test_tf_quantization.py +++ b/tests/test_tf_quantization.py @@ -4,13 +4,13 @@ import tensorflow as tf import unittest from datasets import load_dataset, load_metric -from nlp_toolkit import ( +from intel_extension_for_transformers import ( metrics, objectives, QuantizationConfig, TFOptimization ) -# from nlp_toolkit import metrics, objectives +# from intel_extension_for_transformers import metrics, objectives from transformers import ( AutoTokenizer, DefaultDataCollator, diff --git a/third_party_programs.txt b/third_party_programs.txt new file mode 100644 index 00000000000..e2d13c6b12b --- /dev/null +++ b/third_party_programs.txt @@ -0,0 +1,235 @@ +Intel Extension for Transformers Third Party Programs File + +This file contains the list of third party software ("third party programs") +contained in the Intel software and their required notices and/or license +terms. This third party software, even if included with the distribution of +the Intel software, may be governed by separate license terms, including +without limitation, third party license terms, other Intel software license +terms, and open source software license terms. These separate license terms +govern your use of the third party programs as set forth in in the +"THIRD-PARTY-PROGRAMS" file. + +Third party programs and their corresponding required notices and/or license +terms are listed below. + +-------------------------------------------------------------------------- +1. huggingface/transformers + + huggingface/diffusers + + Intel(R) Neural Compressor + Copyright Intel Corporation + + oneAPI Deep Neural Network Library (oneDNN) + Copyright Intel Corporation + + + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------- + +The following third party programs have their own third party program files. These additional third party program files are as follows: +1. Intel® Neural Compressor, found at intel_extension_for_transformers/optimization/neural-compreossr-third-party-programs.txt +2. oneAPI Deep Neural Network Library (oneDNN), found at intel_extension_for_transformers/backends/neural_engine/oneDNN-THIRD-PARTY-PROGRAMS