Skip to content

Commit 85e6677

Browse files
committed
[DAG] Fold (vt trunc (extload (vt x))) -> (vt load x)
We were only folding cases which remained extloads, but DAG.getExtLoad can also handle the cases where don't need to extend at all. reduceLoadWidth can handle this for scalar loads, but not for vectors. Noticed while triaging D152928
1 parent fd527de commit 85e6677

File tree

4 files changed

+94
-262
lines changed

4 files changed

+94
-262
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14818,11 +14818,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1481814818
if (SDValue Reduced = reduceLoadWidth(N))
1481914819
return Reduced;
1482014820

14821-
// Handle the case where the load remains an extending load even
14822-
// after truncation.
14821+
// Handle the case where the truncated result is at least as wide as the
14822+
// loaded type.
1482314823
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
1482414824
auto *LN0 = cast<LoadSDNode>(N0);
14825-
if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
14825+
if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
1482614826
SDValue NewLoad = DAG.getExtLoad(
1482714827
LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
1482814828
LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());

llvm/test/CodeGen/AMDGPU/ctpop16.ll

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,50 +1553,48 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
15531553
; EG: ; %bb.0: ; %entry
15541554
; EG-NEXT: ALU 0, @20, KC0[], KC1[]
15551555
; EG-NEXT: TEX 0 @14
1556-
; EG-NEXT: ALU_PUSH_BEFORE 6, @21, KC0[], KC1[]
1556+
; EG-NEXT: ALU_PUSH_BEFORE 4, @21, KC0[], KC1[]
15571557
; EG-NEXT: JUMP @7 POP:1
1558-
; EG-NEXT: ALU 0, @28, KC0[CB0:0-32], KC1[]
1558+
; EG-NEXT: ALU 0, @26, KC0[CB0:0-32], KC1[]
15591559
; EG-NEXT: TEX 0 @16
1560-
; EG-NEXT: ALU_POP_AFTER 1, @29, KC0[], KC1[]
1561-
; EG-NEXT: ALU_PUSH_BEFORE 2, @31, KC0[CB0:0-32], KC1[]
1560+
; EG-NEXT: ALU_POP_AFTER 1, @27, KC0[], KC1[]
1561+
; EG-NEXT: ALU_PUSH_BEFORE 2, @29, KC0[CB0:0-32], KC1[]
15621562
; EG-NEXT: JUMP @11 POP:1
15631563
; EG-NEXT: TEX 0 @18
1564-
; EG-NEXT: ALU_POP_AFTER 0, @34, KC0[], KC1[]
1565-
; EG-NEXT: ALU 11, @35, KC0[], KC1[]
1564+
; EG-NEXT: ALU_POP_AFTER 0, @32, KC0[], KC1[]
1565+
; EG-NEXT: ALU 11, @33, KC0[], KC1[]
15661566
; EG-NEXT: MEM_RAT MSKOR T1.XW, T0.X
15671567
; EG-NEXT: CF_END
15681568
; EG-NEXT: Fetch clause starting at 14:
1569-
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
1569+
; EG-NEXT: VTX_READ_16 T2.X, T1.X, 46, #3
15701570
; EG-NEXT: Fetch clause starting at 16:
1571-
; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
1571+
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
15721572
; EG-NEXT: Fetch clause starting at 18:
1573-
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
1573+
; EG-NEXT: VTX_READ_16 T0.X, T1.X, 44, #3
15741574
; EG-NEXT: ALU clause starting at 20:
1575-
; EG-NEXT: MOV * T0.X, 0.0,
1575+
; EG-NEXT: MOV * T1.X, 0.0,
15761576
; EG-NEXT: ALU clause starting at 21:
1577-
; EG-NEXT: AND_INT * T0.W, T1.X, literal.x,
1578-
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
1579-
; EG-NEXT: MOV T1.X, literal.x,
1577+
; EG-NEXT: MOV T0.X, literal.x,
15801578
; EG-NEXT: MOV T1.W, literal.y,
1581-
; EG-NEXT: SETNE_INT * T0.W, PV.W, 0.0,
1579+
; EG-NEXT: SETNE_INT * T0.W, T2.X, 0.0,
15821580
; EG-NEXT: 0(0.000000e+00), 1(1.401298e-45)
15831581
; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
1584-
; EG-NEXT: ALU clause starting at 28:
1585-
; EG-NEXT: MOV * T1.X, KC0[2].Z,
1586-
; EG-NEXT: ALU clause starting at 29:
1582+
; EG-NEXT: ALU clause starting at 26:
1583+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
1584+
; EG-NEXT: ALU clause starting at 27:
15871585
; EG-NEXT: MOV * T1.W, literal.x,
15881586
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
1589-
; EG-NEXT: ALU clause starting at 31:
1587+
; EG-NEXT: ALU clause starting at 29:
15901588
; EG-NEXT: MOV T0.W, KC0[2].Y,
15911589
; EG-NEXT: SETE_INT * T1.W, T1.W, 0.0,
15921590
; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
1593-
; EG-NEXT: ALU clause starting at 34:
1594-
; EG-NEXT: BCNT_INT * T1.X, T0.X,
1595-
; EG-NEXT: ALU clause starting at 35:
1591+
; EG-NEXT: ALU clause starting at 32:
1592+
; EG-NEXT: BCNT_INT * T0.X, T0.X,
1593+
; EG-NEXT: ALU clause starting at 33:
15961594
; EG-NEXT: LSHL * T1.W, T0.W, literal.x,
15971595
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
15981596
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
1599-
; EG-NEXT: AND_INT * T2.W, T1.X, literal.y,
1597+
; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
16001598
; EG-NEXT: 24(3.363116e-44), 65535(9.183409e-41)
16011599
; EG-NEXT: LSHL T1.X, PS, PV.W,
16021600
; EG-NEXT: LSHL * T1.W, literal.x, PV.W,

llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(ptr addrspace(1) noa
331331
; EG: ; %bb.0:
332332
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
333333
; EG-NEXT: TEX 0 @6
334-
; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
334+
; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[]
335335
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
336336
; EG-NEXT: CF_END
337337
; EG-NEXT: PAD
@@ -340,9 +340,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(ptr addrspace(1) noa
340340
; EG-NEXT: ALU clause starting at 8:
341341
; EG-NEXT: MOV * T0.X, 0.0,
342342
; EG-NEXT: ALU clause starting at 9:
343-
; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
344-
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
345-
; EG-NEXT: FFBL_INT T0.W, PV.W,
343+
; EG-NEXT: FFBL_INT T0.W, T0.X,
346344
; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x,
347345
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
348346
; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
@@ -402,7 +400,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(ptr addrspace(1) no
402400
; EG: ; %bb.0:
403401
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
404402
; EG-NEXT: TEX 0 @6
405-
; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
403+
; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[]
406404
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
407405
; EG-NEXT: CF_END
408406
; EG-NEXT: PAD
@@ -411,9 +409,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(ptr addrspace(1) no
411409
; EG-NEXT: ALU clause starting at 8:
412410
; EG-NEXT: MOV * T0.X, 0.0,
413411
; EG-NEXT: ALU clause starting at 9:
414-
; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
415-
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
416-
; EG-NEXT: FFBL_INT T0.W, PV.W,
412+
; EG-NEXT: FFBL_INT T0.W, T0.X,
417413
; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x,
418414
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
419415
; EG-NEXT: AND_INT T0.W, PV.W, literal.x,

0 commit comments

Comments
 (0)