Skip to content

[DAG] Fold (vt trunc (extload (vt x))) -> (vt load x) #75229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14818,11 +14818,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SDValue Reduced = reduceLoadWidth(N))
return Reduced;

// Handle the case where the load remains an extending load even
// after truncation.
// Handle the case where the truncated result is at least as wide as the
// loaded type.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
auto *LN0 = cast<LoadSDNode>(N0);
if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
SDValue NewLoad = DAG.getExtLoad(
LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
Expand Down
42 changes: 20 additions & 22 deletions llvm/test/CodeGen/AMDGPU/ctpop16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1553,50 +1553,48 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; EG: ; %bb.0: ; %entry
; EG-NEXT: ALU 0, @20, KC0[], KC1[]
; EG-NEXT: TEX 0 @14
; EG-NEXT: ALU_PUSH_BEFORE 6, @21, KC0[], KC1[]
; EG-NEXT: ALU_PUSH_BEFORE 4, @21, KC0[], KC1[]
; EG-NEXT: JUMP @7 POP:1
; EG-NEXT: ALU 0, @28, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 0, @26, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @16
; EG-NEXT: ALU_POP_AFTER 1, @29, KC0[], KC1[]
; EG-NEXT: ALU_PUSH_BEFORE 2, @31, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU_POP_AFTER 1, @27, KC0[], KC1[]
; EG-NEXT: ALU_PUSH_BEFORE 2, @29, KC0[CB0:0-32], KC1[]
; EG-NEXT: JUMP @11 POP:1
; EG-NEXT: TEX 0 @18
; EG-NEXT: ALU_POP_AFTER 0, @34, KC0[], KC1[]
; EG-NEXT: ALU 11, @35, KC0[], KC1[]
; EG-NEXT: ALU_POP_AFTER 0, @32, KC0[], KC1[]
; EG-NEXT: ALU 11, @33, KC0[], KC1[]
; EG-NEXT: MEM_RAT MSKOR T1.XW, T0.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 14:
; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
; EG-NEXT: VTX_READ_16 T2.X, T1.X, 46, #3
; EG-NEXT: Fetch clause starting at 16:
; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
; EG-NEXT: Fetch clause starting at 18:
; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
; EG-NEXT: VTX_READ_16 T0.X, T1.X, 44, #3
; EG-NEXT: ALU clause starting at 20:
; EG-NEXT: MOV * T0.X, 0.0,
; EG-NEXT: MOV * T1.X, 0.0,
; EG-NEXT: ALU clause starting at 21:
; EG-NEXT: AND_INT * T0.W, T1.X, literal.x,
; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
; EG-NEXT: MOV T1.X, literal.x,
; EG-NEXT: MOV T0.X, literal.x,
; EG-NEXT: MOV T1.W, literal.y,
; EG-NEXT: SETNE_INT * T0.W, PV.W, 0.0,
; EG-NEXT: SETNE_INT * T0.W, T2.X, 0.0,
; EG-NEXT: 0(0.000000e+00), 1(1.401298e-45)
; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
; EG-NEXT: ALU clause starting at 28:
; EG-NEXT: MOV * T1.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 29:
; EG-NEXT: ALU clause starting at 26:
; EG-NEXT: MOV * T0.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 27:
; EG-NEXT: MOV * T1.W, literal.x,
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
; EG-NEXT: ALU clause starting at 31:
; EG-NEXT: ALU clause starting at 29:
; EG-NEXT: MOV T0.W, KC0[2].Y,
; EG-NEXT: SETE_INT * T1.W, T1.W, 0.0,
; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
; EG-NEXT: ALU clause starting at 34:
; EG-NEXT: BCNT_INT * T1.X, T0.X,
; EG-NEXT: ALU clause starting at 35:
; EG-NEXT: ALU clause starting at 32:
; EG-NEXT: BCNT_INT * T0.X, T0.X,
; EG-NEXT: ALU clause starting at 33:
; EG-NEXT: LSHL * T1.W, T0.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
; EG-NEXT: AND_INT * T2.W, T1.X, literal.y,
; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
; EG-NEXT: 24(3.363116e-44), 65535(9.183409e-41)
; EG-NEXT: LSHL T1.X, PS, PV.W,
; EG-NEXT: LSHL * T1.W, literal.x, PV.W,
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(ptr addrspace(1) noa
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
Expand All @@ -340,9 +340,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(ptr addrspace(1) noa
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, 0.0,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
; EG-NEXT: FFBL_INT T0.W, PV.W,
; EG-NEXT: FFBL_INT T0.W, T0.X,
; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
Expand Down Expand Up @@ -402,7 +400,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(ptr addrspace(1) no
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @8, KC0[], KC1[]
; EG-NEXT: TEX 0 @6
; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[]
; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
; EG-NEXT: CF_END
; EG-NEXT: PAD
Expand All @@ -411,9 +409,7 @@ define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(ptr addrspace(1) no
; EG-NEXT: ALU clause starting at 8:
; EG-NEXT: MOV * T0.X, 0.0,
; EG-NEXT: ALU clause starting at 9:
; EG-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x,
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
; EG-NEXT: FFBL_INT T0.W, PV.W,
; EG-NEXT: FFBL_INT T0.W, T0.X,
; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
Expand Down
Loading