updates

lialan · lialan · commit 2208dbcfba57 · 2025-04-20T14:33:33.000-04:00
diff --git a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
@@ -43,11 +43,7 @@ void configureGpuToROCDLConversionLegality(ConversionTarget &target);
 /// index bitwidth used for the lowering of the device side index computations
 /// is configurable.
 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
-createLowerGpuOpsToROCDLOpsPass(
-    const std::string &chipset = "gfx900",
-    unsigned indexBitwidth = kDeriveIndexBitwidthFromDataLayout,
-    bool useBarePtrCallConv = false,
-    gpu::amd::Runtime runtime = gpu::amd::Runtime::Unknown);
+createLowerGpuOpsToROCDLOpsPass();
 
 } // namespace mlir
 
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -203,10 +203,11 @@ struct GPUSubgroupIdOpToROCDL final
     : ConvertOpToLLVMPattern<gpu::SubgroupIdOp> {
   using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
 
-  GPUSubgroupIdOpToROCDL(MLIRContext *ctx, mlir::amdgpu::Chipset chipset)
-      : ConvertOpToLLVMPattern(ctx), chipset(chipset) {}
+  GPUSubgroupIdOpToROCDL(const LLVMTypeConverter &converter,
+                         const mlir::amdgpu::Chipset &chipset)
+      : ConvertOpToLLVMPattern(converter), chipset(chipset) {}
 
-  mlir::amdgpu::Chipset chipset;
+  const mlir::amdgpu::Chipset chipset;
 
   LogicalResult
   matchAndRewrite(gpu::SubgroupIdOp op, gpu::SubgroupIdOp::Adaptor adaptor,
@@ -235,19 +236,7 @@ struct GPUSubgroupIdOpToROCDL final
 // code.
 struct LowerGpuOpsToROCDLOpsPass final
     : public impl::ConvertGpuOpsToROCDLOpsBase<LowerGpuOpsToROCDLOpsPass> {
-  LowerGpuOpsToROCDLOpsPass() = default;
-  LowerGpuOpsToROCDLOpsPass(const std::string &chipset, unsigned indexBitwidth,
-                            bool useBarePtrCallConv,
-                            gpu::amd::Runtime runtime) {
-    if (this->chipset.getNumOccurrences() == 0)
-      this->chipset = chipset;
-    if (this->indexBitwidth.getNumOccurrences() == 0)
-      this->indexBitwidth = indexBitwidth;
-    if (this->useBarePtrCallConv.getNumOccurrences() == 0)
-      this->useBarePtrCallConv = useBarePtrCallConv;
-    if (this->runtime.getNumOccurrences() == 0)
-      this->runtime = runtime;
-  }
+  using Base::Base;
 
   void getDependentDialects(DialectRegistry &registry) const override {
     Base::getDependentDialects(registry);
@@ -442,16 +431,11 @@ void mlir::populateGpuToROCDLConversionPatterns(
   patterns.add<GPUDynamicSharedMemoryOpLowering>(converter);
 
   patterns.add<GPUShuffleOpLowering, GPULaneIdOpToROCDL>(converter);
-  patterns.add(
-      std::make_unique<GPUSubgroupIdOpToROCDL>(patterns.getContext(), chipset));
+  patterns.add<GPUSubgroupIdOpToROCDL>(converter, chipset);
   populateMathToROCDLConversionPatterns(converter, patterns);
 }
 
 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
-mlir::createLowerGpuOpsToROCDLOpsPass(const std::string &chipset,
-                                      unsigned indexBitwidth,
-                                      bool useBarePtrCallConv,
-                                      gpu::amd::Runtime runtime) {
-  return std::make_unique<LowerGpuOpsToROCDLOpsPass>(
-      chipset, indexBitwidth, useBarePtrCallConv, runtime);
+mlir::createLowerGpuOpsToROCDLOpsPass() {
+  return std::make_unique<LowerGpuOpsToROCDLOpsPass>();
 }
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-chipset.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl-chipset.mlir
@@ -0,0 +1,13 @@
+// RUN: mlir-opt %s -convert-gpu-to-rocdl='chipset=gfx1030' -split-input-file | FileCheck %s
+
+gpu.module @test_module {
+  // CHECK-LABEL: func @gpu_subgroup_id()
+  func.func @gpu_subgroup_id() -> (index) {
+    // expected-error@+1 {{failed to legalize operation 'gpu.subgroup_id' that was explicitly marked illegal}}
+    // CHECK: = rocdl.wave_id : i32
+    // CHECK: = llvm.sext %{{.*}} : i32 to i64
+    %waveId = gpu.subgroup_id : index
+    func.return  %waveId :  index
+  }
+}
+
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -11,7 +11,7 @@ gpu.module @test_module {
   func.func @gpu_index_ops()
       -> (index, index, index, index, index, index,
           index, index, index, index, index, index,
-          index, index) {
+          index) {
     // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
 
     // CHECK: rocdl.workitem.id.x : i32
@@ -59,16 +59,12 @@ gpu.module @test_module {
     // CHECK: = llvm.sext %{{.*}} : i32 to i64
     %laneId = gpu.lane_id
 
-    // CHECK: = rocdl.wave_id : i32
-    // CHECK: = llvm.sext %{{.*}} : i32 to i64
-    %waveId = gpu.subgroup_id : index
-
     func.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ,
                %bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ,
-               %laneId, %waveId
+               %laneId
         : index, index, index, index, index, index,
           index, index, index, index, index, index,
-          index, index
+          index
   }
 }