@@ -18330,8 +18330,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18330
18330
// D = A * B + C
18331
18331
// We need to specify one type for matrices AB and one for matrices CD.
18332
18332
SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18333
- // Some intrinsics expect "false" as an extra bool argument.
18334
- bool AppendExtraBoolArg = false;
18333
+ // On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18334
+ // There is no need for the variable opsel argument, so always set it to
18335
+ // "false".
18336
+ bool AppendFalseForOpselArg = false;
18335
18337
unsigned BuiltinWMMAOp;
18336
18338
18337
18339
switch (BuiltinID) {
@@ -18351,7 +18353,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18351
18353
break;
18352
18354
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
18353
18355
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:
18354
- AppendExtraBoolArg = true;
18356
+ AppendFalseForOpselArg = true;
18355
18357
LLVM_FALLTHROUGH;
18356
18358
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18357
18359
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
@@ -18360,7 +18362,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18360
18362
break;
18361
18363
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
18362
18364
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:
18363
- AppendExtraBoolArg = true;
18365
+ AppendFalseForOpselArg = true;
18364
18366
LLVM_FALLTHROUGH;
18365
18367
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18366
18368
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
@@ -18476,7 +18478,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18476
18478
SmallVector<Value *, 6> Args;
18477
18479
for (int i = 0, e = E->getNumArgs(); i != e; ++i)
18478
18480
Args.push_back(EmitScalarExpr(E->getArg(i)));
18479
- if (AppendExtraBoolArg )
18481
+ if (AppendFalseForOpselArg )
18480
18482
Args.push_back(Builder.getFalse());
18481
18483
18482
18484
SmallVector<llvm::Type *, 6> ArgTypes;
0 commit comments