Skip to content

Commit 3c22209

Browse files
committed
[mlir][nvvm] Fix the PTX lowering of wgmma.mma_async
The default layout of A and B matrices is row- and column-major respectively, meaning that the transpose flags have opposite meanings between those two operands.
1 parent b223aeb commit 3c22209

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ void NVVM::WgmmaMmaAsyncOp::getAsmValues(
10031003
{makeConstantI32(rewriter, static_cast<int>(getLayoutA())),
10041004
mlir::NVVM::PTXRegisterMod::Read});
10051005
asmValues.push_back(
1006-
{makeConstantI32(rewriter, static_cast<int>(getLayoutB())),
1006+
{makeConstantI32(rewriter, 1 - static_cast<int>(getLayoutB())),
10071007
mlir::NVVM::PTXRegisterMod::Read});
10081008
}
10091009
}

mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ func.func @wgmma_f32_f16_f16(%descA : i64, %descB : i64) -> !mat64f32{
297297
// CHECK: %[[A2:.*]] = llvm.mlir.constant(-1 : i32) : i32
298298
// CHECK: %[[A3:.*]] = llvm.mlir.constant(-1 : i32) : i32
299299
// CHECK: %[[A4:.*]] = llvm.mlir.constant(1 : i32) : i32
300-
// CHECK: %[[A5:.*]] = llvm.mlir.constant(1 : i32) : i32
300+
// CHECK: %[[A5:.*]] = llvm.mlir.constant(0 : i32) : i32
301301
// CHECK: %[[V0:.*]] = llvm.extractvalue %[[RES]][0] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>
302302
// CHECK: %[[V4:.*]] = llvm.extractvalue %[[RES]][4] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>
303303
// CHECK: %[[V11:.*]] = llvm.extractvalue %[[RES]][11] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>

0 commit comments

Comments
 (0)