Skip to content

Commit 89dfa75

Browse files
committed
fix bfe for true16 mode
1 parent c19e900 commit 89dfa75

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2560,6 +2560,8 @@ def : GCNPat<
25602560
(i32 (DivergentSextInreg<i1> i32:$src)),
25612561
(V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
25622562

2563+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
2564+
let True16Predicate = p in {
25632565
def : GCNPat <
25642566
(i16 (DivergentSextInreg<i1> i16:$src)),
25652567
(V_BFE_I32_e64 $src, (i32 0), (i32 1))
@@ -2569,6 +2571,23 @@ def : GCNPat <
25692571
(i16 (DivergentSextInreg<i8> i16:$src)),
25702572
(V_BFE_I32_e64 $src, (i32 0), (i32 8))
25712573
>;
2574+
}
2575+
2576+
let True16Predicate = UseRealTrue16Insts in {
2577+
def : GCNPat <
2578+
(i16 (DivergentSextInreg<i1> i16:$src)),
2579+
(V_BFE_I32_e64
2580+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
2581+
(i32 0), (i32 1))
2582+
>;
2583+
2584+
def : GCNPat <
2585+
(i16 (DivergentSextInreg<i8> i16:$src)),
2586+
(V_BFE_I32_e64
2587+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
2588+
(i32 0), (i32 8))
2589+
>;
2590+
}
25722591

25732592
def : GCNPat<
25742593
(i32 (DivergentSextInreg<i8> i32:$src)),

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,11 +319,21 @@ let SchedRW = [Write64Bit] in {
319319
} // End SchedRW = [Write64Bit]
320320
} // End isReMaterializable = 1
321321

322+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
323+
let True16Predicate = p in
322324
def : GCNPat<
323325
(i32 (DivergentUnaryFrag<sext> i16:$src)),
324326
(i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
325327
>;
326328

329+
let True16Predicate = UseRealTrue16Insts in
330+
def : GCNPat<
331+
(i32 (DivergentUnaryFrag<sext> i16:$src)),
332+
(i32 (V_BFE_I32_e64
333+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
334+
(i32 0), (i32 0x10)))
335+
>;
336+
327337
let isReMaterializable = 1 in {
328338
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
329339
defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -423,6 +433,8 @@ def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32
423433

424434
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
425435

436+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
437+
let True16Predicate = p in
426438
def : GCNPat<
427439
(i64 (DivergentUnaryFrag<sext> i16:$src)),
428440
(REG_SEQUENCE VReg_64,
@@ -432,6 +444,18 @@ def : GCNPat<
432444
), VGPR_32)), sub1)
433445
>;
434446

447+
let True16Predicate = UseRealTrue16Insts in
448+
def : GCNPat<
449+
(i64 (DivergentUnaryFrag<sext> i16:$src)),
450+
(REG_SEQUENCE VReg_64,
451+
(i32 (V_BFE_I32_e64
452+
(REG_SEQUENCE VGPR_32, VGPR_16:$src, lo16, (i16 (IMPLICIT_DEF)), hi16),
453+
(S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0,
454+
(i32 (COPY_TO_REGCLASS
455+
(V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
456+
), VGPR_32)), sub1)
457+
>;
458+
435459
let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus] in {
436460
def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
437461
def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;

0 commit comments

Comments
 (0)