From ad86abf3e5a81a48e5295073b6cf186cd8131c5b Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Wed, 25 Sep 2024 13:40:49 -0700 Subject: [PATCH 1/3] Enable generating cached native code by default --- third_party/intel/backend/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/intel/backend/compiler.py b/third_party/intel/backend/compiler.py index 9c1d25b2c1..6715438541 100644 --- a/third_party/intel/backend/compiler.py +++ b/third_party/intel/backend/compiler.py @@ -53,7 +53,7 @@ class XPUOptions: max_num_imprecise_acc_default: int = 0 # `max_num_imprecise_acc` only applies to fp8 -> fp32 dot on sm_90 for cuda extern_libs: dict = None debug: bool = False - generate_native_code: bool = False + generate_native_code: bool = True backend_name: str = 'intel' def __post_init__(self): From c188aca97d982b9c45bff92d3e0d98b6a4ed0bf2 Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Wed, 25 Sep 2024 14:03:15 -0700 Subject: [PATCH 2/3] Force the TRITON_XPU_GEN_NATIVE_CODE env if not previously set This is a bit of a hack, but I did not want to pollute build_flags with something that is not a real build_flag and there is no other way to pass params to loadBinary. We could probably duplicate loadBinary - I am considering that too, but for now the hack should let the tests run. --- third_party/intel/backend/compiler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/third_party/intel/backend/compiler.py b/third_party/intel/backend/compiler.py index 6715438541..8211ffe36f 100644 --- a/third_party/intel/backend/compiler.py +++ b/third_party/intel/backend/compiler.py @@ -65,7 +65,11 @@ def __post_init__(self): object.__setattr__(self, 'extern_libs', tuple(extern_libs.items())) if self.num_warps <= 0 or (self.num_warps & (self.num_warps - 1)) != 0: raise AssertionError("num_warps must be a power of 2") - self.generate_native_code = bool(os.getenv("TRITON_XPU_GEN_NATIVE_CODE", self.generate_native_code)) + generate_native_code_env = os.getenv("TRITON_XPU_GEN_NATIVE_CODE") + if generate_native_code_env: + self.generate_native_code = bool(generate_native_code_env) + else: + os.putenv("TRITON_XPU_GEN_NATIVE_CODE", str(self.generate_native_code)) def hash(self): key = '_'.join([f'{name}-{val}' for name, val in self.__dict__.items()]) From 0e6c1902113f8878f702dc770f1d57fa5c0167de Mon Sep 17 00:00:00 2001 From: Alex Baden Date: Wed, 25 Sep 2024 18:15:55 -0700 Subject: [PATCH 3/3] do not generate native code in test_line_info --- python/test/unit/language/test_line_info.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/test/unit/language/test_line_info.py b/python/test/unit/language/test_line_info.py index 3f92ffaff0..42e464e8f8 100644 --- a/python/test/unit/language/test_line_info.py +++ b/python/test/unit/language/test_line_info.py @@ -188,17 +188,17 @@ def test_line_info(func: str): shape = (128, ) kernel_info = {} if func == "single": - kernel_info = kernel_single.warmup(torch.float32, torch.float32, BLOCK=shape[0], grid=(1,)) + kernel_info = kernel_single.warmup(torch.float32, torch.float32, BLOCK=shape[0], grid=(1,), generate_native_code=False) elif func == "call": - kernel_info = kernel_call.warmup(torch.float32, torch.float32, BLOCK=shape[0], grid=(1,)) + kernel_info = kernel_call.warmup(torch.float32, torch.float32, BLOCK=shape[0], grid=(1,), generate_native_code=False) elif func == "call_noinline": - kernel_info = kernel_call_noinline.warmup(torch.float32, torch.float32, BLOCK=shape[0], grid=(1,)) + kernel_info = kernel_call_noinline.warmup(torch.float32, torch.float32, BLOCK=shape[0], grid=(1,), generate_native_code=False) elif func == "autotune": - kernel_info = kernel_autotune.warmup(torch.float32, torch.float32, SIZE=shape[0], grid=(1,))[0] + kernel_info = kernel_autotune.warmup(torch.float32, torch.float32, SIZE=shape[0], grid=(1,), generate_native_code=False)[0] elif func == "dot_combine": - kernel_info = kernel_dot_combine.warmup(20, grid=(1,)) + kernel_info = kernel_dot_combine.warmup(20, grid=(1,), generate_native_code=False) elif func == "cdiv": - kernel_info = kernel_cdiv.warmup(20, grid=(1,)) + kernel_info = kernel_cdiv.warmup(20, grid=(1,), generate_native_code=False) if obj_kind == "spvbin": file_lines = spv_extract_file_lines(kernel_info.asm["spv"], command)