diff --git a/external/llvm-project/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/external/llvm-project/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h index 564778771299..f1233ad894da 100644 --- a/external/llvm-project/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h +++ b/external/llvm-project/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h @@ -42,7 +42,7 @@ void configureGpuToROCDLConversionLegality(ConversionTarget &target); /// is configurable. std::unique_ptr> createLowerGpuOpsToROCDLOpsPass( - const std::string &chipset = "gfx900", + const std::string &chipset = "infer", unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout, bool useBarePtrCallConv = false, gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown); diff --git a/external/llvm-project/mlir/include/mlir/Conversion/Passes.td b/external/llvm-project/mlir/include/mlir/Conversion/Passes.td index e52c7ff6bd56..9123b8ef46e6 100644 --- a/external/llvm-project/mlir/include/mlir/Conversion/Passes.td +++ b/external/llvm-project/mlir/include/mlir/Conversion/Passes.td @@ -592,8 +592,8 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { ]; let options = [ Option<"chipset", "chipset", "std::string", - /*default=*/"\"gfx000\"", - "Chipset that these operations will run on">, + /*default=*/"\"infer\"", + "Chipset that these operations will run on. By Default it will infer target from attached target attribute on GPU module on which it operates">, Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", "Bitwidth of the index type, 0 to use size of machine word">, diff --git a/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 36fbf80c8156..8dbae392204c 100644 --- a/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/external/llvm-project/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -55,7 +55,6 @@ namespace mlir { } // namespace mlir #include "mlir/Dialect/LLVMIR/Transforms/Passes.h" - using namespace mlir; /// Returns true if the given `gpu.func` can be safely called using the bare @@ -220,6 +219,27 @@ struct LowerGpuOpsToROCDLOpsPass gpu::GPUModuleOp m = getOperation(); MLIRContext *ctx = m.getContext(); + if (chipset == "infer") { + ArrayAttr targets = m.getTargetsAttr(); + if (!targets) { + m->emitError("there are no target attributes to infer"); + return signalPassFailure(); + } + if (targets.size() != 1) { + m->emitError("expected a single target attribute"); + return signalPassFailure(); + } + ROCDL::ROCDLTargetAttr targetAttr = + dyn_cast(targets[0]); + chipset = targetAttr.getChip().str(); + } + + FailureOr maybeChipset = amdgpu::Chipset::parse(chipset); + if (failed(maybeChipset)) { + m->emitError("invalid chipset name: " + chipset); + return signalPassFailure(); + } + auto llvmDataLayout = m->getAttrOfType( LLVM::LLVMDialect::getDataLayoutAttrName()); if (!llvmDataLayout) { @@ -232,12 +252,6 @@ struct LowerGpuOpsToROCDLOpsPass UnitAttr::get(ctx)); } - FailureOr maybeChipset = amdgpu::Chipset::parse(chipset); - if (failed(maybeChipset)) { - emitError(UnknownLoc::get(ctx), "Invalid chipset name: " + chipset); - return signalPassFailure(); - } - /// Customize the bitwidth used for the device side index computations. LowerToLLVMOptions options( ctx, DataLayout(cast(m.getOperation()))); @@ -337,8 +351,7 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) { LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>(); // These ops are legal for f32 type. target.addDynamicallyLegalOp([](Operation *op) { - return any_of(op->getOperandTypes(), - llvm::IsaPred); + return any_of(op->getOperandTypes(), llvm::IsaPred); }); // TODO: Remove once we support replacing non-root ops. target.addLegalOp(); @@ -350,7 +363,8 @@ static void populateOpPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns, StringRef f32Func, StringRef f64Func, StringRef f16Func) { patterns.add>(converter); - patterns.add>(converter, f32Func, f64Func, f16Func); + patterns.add>(converter, f32Func, f64Func, + f16Func); } void mlir::populateGpuToROCDLConversionPatterns( diff --git a/external/llvm-project/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir b/external/llvm-project/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir index 771f3185904b..a338d35525eb 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUCommon/lower-memory-space-attrs.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=CHECK,ROCDL +// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900' | FileCheck %s --check-prefixes=CHECK,ROCDL // RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=CHECK,NVVM gpu.module @kernel { diff --git a/external/llvm-project/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir b/external/llvm-project/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir index 4fc19b8e9364..b1291e07c060 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUCommon/memory-attrbution.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-nvvm --split-input-file %s | FileCheck --check-prefix=NVVM %s -// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-rocdl --split-input-file %s | FileCheck --check-prefix=ROCDL %s +// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-rocdl='chipset=gfx900' --split-input-file %s | FileCheck --check-prefix=ROCDL %s gpu.module @kernel { // NVVM-LABEL: llvm.func @private diff --git a/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir b/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir index e7c742067b4e..3c3082c47389 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-attrs.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=0' | FileCheck %s --check-prefixes=CHECK,ROCDL +// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=0' | FileCheck %s --check-prefixes=CHECK,ROCDL // RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm='use-bare-ptr-memref-call-conv=0' | FileCheck %s --check-prefixes=CHECK,NVVM gpu.module @kernel { diff --git a/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir b/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir index 33cdc3348e51..d17214d1f229 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-attrs.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=1' | FileCheck %s --check-prefixes=CHECK,ROCDL +// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=1' | FileCheck %s --check-prefixes=CHECK,ROCDL // RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm='use-bare-ptr-memref-call-conv=1' | FileCheck %s --check-prefixes=CHECK,NVVM gpu.module @kernel { diff --git a/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir b/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir index 793df7380d78..ab98be59a2c8 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUCommon/memref-arg-noalias-warning.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=0' -verify-diagnostics +// RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=0' -verify-diagnostics gpu.module @kernel { // expected-warning @+1 {{Cannot copy noalias with non-bare pointers.}} diff --git a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir index 1b904fa142ba..3e3b43c6d4f4 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-hip.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-gpu-to-rocdl='runtime=HIP' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900 runtime=HIP' -split-input-file | FileCheck %s gpu.module @test_module { // CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL0:[A-Za-z0-9_]+]]("Hello, world\0A\00") diff --git a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-infer-target.mlir b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-infer-target.mlir new file mode 100644 index 000000000000..4ef6fd004b13 --- /dev/null +++ b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-infer-target.mlir @@ -0,0 +1,30 @@ +// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file --verify-diagnostics | FileCheck --check-prefix=CHECK_TARGET %s + +// CHECK_TARGET: @test_module [#rocdl.target] attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"} { +gpu.module @test_module [#rocdl.target] { + // CHECK_TARGET-LABEL: @kernel_func + // CHECK_TARGET: attributes + // CHECK_TARGET: gpu.kernel + // CHECK_TARGET: rocdl.kernel + gpu.func @kernel_func() kernel { + gpu.return + } +} + +// ----- + +// expected-error@below {{there are no target attributes to infer}} +gpu.module @test_module { + gpu.func @kernel_func() kernel { + gpu.return + } +} + +// ----- + +// expected-error@below {{invalid chipset name: gfx90a,gfx900}} +gpu.module @test_module [#rocdl.target] { + gpu.func @kernel_func() kernel { + gpu.return + } +} diff --git a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir index 870f5c5016ec..fa01801972d6 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-opencl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-gpu-to-rocdl='runtime=OpenCL' | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900 runtime=OpenCL' | FileCheck %s gpu.module @test_module { // CHECK: llvm.mlir.global internal constant @[[$PRINT_GLOBAL:[A-Za-z0-9_]+]]("Hello: %d\0A\00") {addr_space = 4 : i32} diff --git a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index da54dc836a90..356d21cbf1ba 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s -// RUN: mlir-opt %s -convert-gpu-to-rocdl='index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900 index-bitwidth=32' -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: @test_module // CHECK-SAME: llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" diff --git a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/memref.mlir b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/memref.mlir index e645481c8923..debf899dd687 100644 --- a/external/llvm-project/mlir/test/Conversion/GPUToROCDL/memref.mlir +++ b/external/llvm-project/mlir/test/Conversion/GPUToROCDL/memref.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx900' -split-input-file | FileCheck %s // RUN: mlir-opt %s \ -// RUN: -convert-gpu-to-rocdl='use-bare-ptr-memref-call-conv=true' \ +// RUN: -convert-gpu-to-rocdl='chipset=gfx900 use-bare-ptr-memref-call-conv=true' \ // RUN: -split-input-file \ // RUN: | FileCheck %s --check-prefix=BARE diff --git a/external/llvm-project/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir b/external/llvm-project/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir index 3c8f3b1d0cbf..edb75ee81224 100644 --- a/external/llvm-project/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir +++ b/external/llvm-project/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl),rocdl-attach-target{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl))' \ // RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_rocm_runtime \ diff --git a/external/llvm-project/mlir/test/Integration/GPU/ROCM/printf.mlir b/external/llvm-project/mlir/test/Integration/GPU/ROCM/printf.mlir index d5e6e3757540..e8feeaa69c29 100644 --- a/external/llvm-project/mlir/test/Integration/GPU/ROCM/printf.mlir +++ b/external/llvm-project/mlir/test/Integration/GPU/ROCM/printf.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s \ -// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP}),rocdl-attach-target{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP}))' \ // RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_rocm_runtime \ diff --git a/external/llvm-project/mlir/test/Integration/GPU/ROCM/two-modules.mlir b/external/llvm-project/mlir/test/Integration/GPU/ROCM/two-modules.mlir index d49d3957abbe..d20f71d16280 100644 --- a/external/llvm-project/mlir/test/Integration/GPU/ROCM/two-modules.mlir +++ b/external/llvm-project/mlir/test/Integration/GPU/ROCM/two-modules.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl),rocdl-attach-target{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl))' \ // RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_rocm_runtime \ diff --git a/external/llvm-project/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/external/llvm-project/mlir/test/Integration/GPU/ROCM/vecadd.mlir index 986d8239427e..0ac391cd5f8e 100644 --- a/external/llvm-project/mlir/test/Integration/GPU/ROCM/vecadd.mlir +++ b/external/llvm-project/mlir/test/Integration/GPU/ROCM/vecadd.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -convert-scf-to-cf \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true}),rocdl-attach-target{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true}))' \ // RUN: | mlir-opt -gpu-to-llvm=use-bare-pointers-for-kernels=true -reconcile-unrealized-casts -gpu-module-to-binary \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_rocm_runtime \ diff --git a/external/llvm-project/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/external/llvm-project/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir index 575d967dcc9a..417f67e64669 100644 --- a/external/llvm-project/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir +++ b/external/llvm-project/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ // RUN: | mlir-opt -convert-scf-to-cf \ // RUN: | mlir-opt -gpu-kernel-outlining \ -// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32}),rocdl-attach-target{chip=%chip})' \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo),rocdl-attach-target{chip=%chip}, gpu.module(convert-gpu-to-rocdl{index-bitwidth=32}))' \ // RUN: | mlir-opt -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_rocm_runtime \ diff --git a/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp b/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp index adfaff9141ed..f3e30e8e6743 100644 --- a/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp +++ b/mlir/lib/Dialect/Rock/Pipelines/Pipelines.cpp @@ -236,23 +236,24 @@ void rock::buildBackendPipeline(OpPassManager &pm, // We need to lower affine again, because the expand strided metadata pass // adds back affine.apply for memref.subview gpuPm.addPass(createLowerAffinePass()); - gpuPm.addPass(createLowerGpuOpsToROCDLOpsPass( - options.chip, /*indexBitwidth=*/kDeriveIndexBitwidthFromDataLayout, + GpuROCDLAttachTargetOptions opts; + opts.triple = options.triple; + opts.chip = options.chip; + opts.features = options.features; + opts.optLevel = options.optLevel; + pm.addPass(createGpuROCDLAttachTarget(opts)); + auto &gpuPm2 = pm.nest(); + gpuPm2.addPass(createLowerGpuOpsToROCDLOpsPass( + /*chipset=*/"infer", /*indexBitwidth=*/kDeriveIndexBitwidthFromDataLayout, /*useBarePtrCallConv=*/true, gpu::amd::Runtime::HIP)); // Ensure we only run passes on LLVM functions inside GPU modules. - auto &llvmFuncPm = gpuPm.nest(); + auto &llvmFuncPm = gpuPm2.nest(); // -canonicalize -cse so that we don't have to crawl through memref // descriptors. (Mainly we want the `extractvalue` fold). llvmFuncPm.addPass(createCanonicalizerPass()); llvmFuncPm.addPass(createCSEPass()); llvmFuncPm.addPass(rock::createRockPrepareLLVMPass()); if (options.compile) { - GpuROCDLAttachTargetOptions opts; - opts.triple = options.triple; - opts.chip = options.chip; - opts.features = options.features; - opts.optLevel = options.optLevel; - pm.addPass(createGpuROCDLAttachTarget(opts)); pm.addPass(createGpuModuleToBinaryPass()); pm.addPass(createRockCheckResidencyPass()); } diff --git a/mlir/test/rocmlir-driver/pipelines.mlir b/mlir/test/rocmlir-driver/pipelines.mlir index 192932a1624f..e4336de1cb80 100644 --- a/mlir/test/rocmlir-driver/pipelines.mlir +++ b/mlir/test/rocmlir-driver/pipelines.mlir @@ -50,12 +50,12 @@ // BINARY-NEXT:convert-arith-to-amdgpu{allow-packed-f16-round-to-zero=true chipset=gfx90a saturate-fp8-truncf=true}, // BINARY-NEXT:emulate-fp8-ext-trunc, // BINARY-NEXT:expand-strided-metadata, -// BINARY-NEXT:lower-affine, -// BINARY-NEXT:convert-gpu-to-rocdl{chipset=gfx90a index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true}, +// BINARY-NEXT:lower-affine), +// BINARY-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx90a correct-sqrt=true daz=false fast=false features= finite-only=false module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true}, +// BINARY-NEXT:gpu.module(convert-gpu-to-rocdl{chipset=infer index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true}, // BINARY-NEXT:llvm.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true}, // BINARY-NEXT:cse, // BINARY-NEXT:rock-prepare-llvm)), -// BINARY-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx90a correct-sqrt=true daz=false fast=false features= finite-only=false module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true}, // BINARY-NEXT:gpu-module-to-binary{format=fatbin opts= toolkit=}, // BINARY-NEXT:rock-check-residency, // BINARY-NEXT:emulate-fp8-ext-trunc) @@ -69,12 +69,12 @@ // BINARY_MI300-NEXT:f8E5M2} target-type=f32}, // BINARY_MI300-NEXT:convert-arith-to-amdgpu{allow-packed-f16-round-to-zero=true chipset=gfx940 saturate-fp8-truncf=true}, // BINARY_MI300-NEXT:expand-strided-metadata, -// BINARY_MI300-NEXT:lower-affine, -// BINARY_MI300-NEXT:convert-gpu-to-rocdl{chipset=gfx940 index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true}, +// BINARY_MI300-NEXT:lower-affine), +// BINARY_MI300-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx940 correct-sqrt=true daz=false fast=false features= finite-only=false module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true}, +// BINARY_MI300-NEXT:gpu.module(convert-gpu-to-rocdl{chipset=infer index-bitwidth=0 runtime=HIP use-bare-ptr-memref-call-conv=true}, // BINARY_MI300-NEXT:llvm.func(canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true}, // BINARY_MI300-NEXT:cse, // BINARY_MI300-NEXT:rock-prepare-llvm)), -// BINARY_MI300-NEXT:rocdl-attach-target{O=3 abi=500 chip=gfx940 correct-sqrt=true daz=false fast=false features= finite-only=false module= triple=amdgcn-amd-amdhsa unsafe-math=false wave64=true}, // BINARY_MI300-NEXT:gpu-module-to-binary{format=fatbin opts= toolkit=}, // BINARY_MI300-NEXT:rock-check-residency, // BINARY_MI300-NEXT:emulate-fp8-ext-trunc)