@@ -18329,6 +18329,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18329
18329
// the form:
18330
18330
// D = A * B + C
18331
18331
// We need to specify one type for matrices AB and one for matrices CD.
18332
+ // Sparse matrix operations can have different types for A and B as well as
18333
+ // an additional type for sparsity index.
18334
+ // Destination type should be put before types used for source operands.
18332
18335
SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;
18333
18336
// On GFX12, the intrinsics with 16-bit accumulator use a packed layout.
18334
18337
// There is no need for the variable opsel argument, so always set it to
@@ -18341,14 +18344,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18341
18344
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
18342
18345
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
18343
18346
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
18344
- ArgsForMatchingMatrixTypes = {2, 0};
18347
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18345
18348
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
18346
18349
break;
18347
18350
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
18348
18351
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
18349
18352
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
18350
18353
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
18351
- ArgsForMatchingMatrixTypes = {2, 0};
18354
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18352
18355
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
18353
18356
break;
18354
18357
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
@@ -18357,7 +18360,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18357
18360
LLVM_FALLTHROUGH;
18358
18361
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
18359
18362
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
18360
- ArgsForMatchingMatrixTypes = {2, 0};
18363
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18361
18364
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
18362
18365
break;
18363
18366
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
@@ -18366,111 +18369,111 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18366
18369
LLVM_FALLTHROUGH;
18367
18370
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
18368
18371
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
18369
- ArgsForMatchingMatrixTypes = {2, 0};
18372
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18370
18373
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
18371
18374
break;
18372
18375
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
18373
18376
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
18374
- ArgsForMatchingMatrixTypes = {2, 0};
18377
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18375
18378
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
18376
18379
break;
18377
18380
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
18378
18381
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
18379
- ArgsForMatchingMatrixTypes = {2, 0};
18382
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18380
18383
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
18381
18384
break;
18382
18385
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
18383
18386
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
18384
18387
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
18385
18388
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
18386
- ArgsForMatchingMatrixTypes = {4, 1};
18389
+ ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18387
18390
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
18388
18391
break;
18389
18392
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
18390
18393
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
18391
18394
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
18392
18395
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
18393
- ArgsForMatchingMatrixTypes = {4, 1};
18396
+ ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18394
18397
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
18395
18398
break;
18396
18399
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
18397
18400
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
18398
- ArgsForMatchingMatrixTypes = {2, 0};
18401
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18399
18402
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
18400
18403
break;
18401
18404
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
18402
18405
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
18403
- ArgsForMatchingMatrixTypes = {2, 0};
18406
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18404
18407
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
18405
18408
break;
18406
18409
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
18407
18410
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
18408
- ArgsForMatchingMatrixTypes = {2, 0};
18411
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18409
18412
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
18410
18413
break;
18411
18414
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
18412
18415
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
18413
- ArgsForMatchingMatrixTypes = {2, 0};
18416
+ ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB
18414
18417
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
18415
18418
break;
18416
18419
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
18417
18420
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
18418
- ArgsForMatchingMatrixTypes = {4, 1};
18421
+ ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB
18419
18422
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
18420
18423
break;
18421
18424
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
18422
18425
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:
18423
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18426
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18424
18427
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;
18425
18428
break;
18426
18429
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:
18427
18430
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:
18428
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18431
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18429
18432
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;
18430
18433
break;
18431
18434
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:
18432
18435
case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:
18433
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18436
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18434
18437
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;
18435
18438
break;
18436
18439
case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:
18437
18440
case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:
18438
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18441
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18439
18442
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;
18440
18443
break;
18441
18444
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:
18442
18445
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:
18443
- ArgsForMatchingMatrixTypes = {1, 3, 4, 5};
18446
+ ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18444
18447
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;
18445
18448
break;
18446
18449
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:
18447
18450
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:
18448
- ArgsForMatchingMatrixTypes = {1, 3, 4, 5};
18451
+ ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18449
18452
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;
18450
18453
break;
18451
18454
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:
18452
18455
case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:
18453
- ArgsForMatchingMatrixTypes = {1, 3, 4, 5};
18456
+ ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index
18454
18457
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;
18455
18458
break;
18456
18459
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:
18457
18460
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:
18458
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18461
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18459
18462
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;
18460
18463
break;
18461
18464
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:
18462
18465
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:
18463
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18466
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18464
18467
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;
18465
18468
break;
18466
18469
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
18467
18470
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
18468
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18471
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18469
18472
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;
18470
18473
break;
18471
18474
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
18472
18475
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
18473
- ArgsForMatchingMatrixTypes = {0, 1, 2, 3};
18476
+ ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index
18474
18477
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
18475
18478
break;
18476
18479
}
0 commit comments