Skip to content

Commit cb4fca9

Browse files
authored
[AMDGPU] Extend llvm.amdgcn.update.dpp intrinsic to support f64 (#91190)
Follow up patch to #89217, before we make changes to atomic optimizer.
1 parent b1f04d5 commit cb4fca9

File tree

5 files changed

+457
-17
lines changed

5 files changed

+457
-17
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3325,13 +3325,15 @@ def : GCNPat <
33253325
(as_i1timm $bound_ctrl))
33263326
>;
33273327

3328+
foreach vt = Reg64Types.types in {
33283329
def : GCNPat <
3329-
(i64 (int_amdgcn_update_dpp i64:$old, i64:$src, timm:$dpp_ctrl, timm:$row_mask,
3330+
(vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl, timm:$row_mask,
33303331
timm:$bank_mask, timm:$bound_ctrl)),
33313332
(V_MOV_B64_DPP_PSEUDO VReg_64_Align2:$old, VReg_64_Align2:$src, (as_i32timm $dpp_ctrl),
33323333
(as_i32timm $row_mask), (as_i32timm $bank_mask),
33333334
(as_i1timm $bound_ctrl))
33343335
>;
3336+
}
33353337

33363338
//===----------------------------------------------------------------------===//
33373339
// Fract Patterns

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,7 @@ class RegisterTypes<list<ValueType> reg_types> {
586586

587587
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
588588
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
589+
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0]>;
589590

590591
let HasVGPR = 1 in {
591592
// VOP3 and VINTERP can access 256 lo and 256 hi registers.

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,19 +1341,16 @@ def : GCNPat <
13411341
(as_i1timm $bound_ctrl))
13421342
>;
13431343

1344-
class UpdateDPPPat<ValueType vt> : GCNPat <
1344+
foreach vt = Reg32Types.types in {
1345+
def : GCNPat <
13451346
(vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
13461347
timm:$row_mask, timm:$bank_mask,
13471348
timm:$bound_ctrl)),
13481349
(V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
13491350
(as_i32timm $row_mask), (as_i32timm $bank_mask),
13501351
(as_i1timm $bound_ctrl))
13511352
>;
1352-
1353-
def : UpdateDPPPat<i32>;
1354-
def : UpdateDPPPat<f32>;
1355-
def : UpdateDPPPat<v2i16>;
1356-
def : UpdateDPPPat<v2f16>;
1353+
}
13571354

13581355
} // End OtherPredicates = [isGFX8Plus]
13591356

0 commit comments

Comments
 (0)