-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Reland [SelectionDAG] Folding ZERO-EXTEND/SIGN_EXTEND poison to Poison value in getNode #136701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… in getNode (llvm#122741) The PR will fix the issue llvm#122728 This patch addresses the signed/zero extension of poison by using a poison value of the extended type instead of a constant zero of the extended type.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: zhijian lin (diggerlin) ChangesThe PR will fix the issue #122728 This patch addresses the signed/zero extension of poison by using a poison value of the extended type instead of a constant zero of the extended type. Patch is 44.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136701.diff 31 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5269962ea2062..5fd6d604daa98 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6322,6 +6322,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Flags.setNonNeg(N1->getFlags().hasNonNeg());
return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
}
+
+ if (OpOpcode == ISD::POISON)
+ return getPOISON(VT);
+
if (N1.isUndef())
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
@@ -6342,6 +6346,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Flags.setNonNeg(N1->getFlags().hasNonNeg());
return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags);
}
+
+ if (OpOpcode == ISD::POISON)
+ return getPOISON(VT);
+
if (N1.isUndef())
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 62ff4bbcc9c27..4b1fff642e5f5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -1019,7 +1019,7 @@ define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
define void @sameOperandBFI(i64 %src, i64 %src2, ptr %ptr) {
; LLC-LABEL: sameOperandBFI:
; LLC: // %bb.0: // %entry
-; LLC-NEXT: cbnz wzr, .LBB30_2
+; LLC-NEXT: cbnz w8, .LBB30_2
; LLC-NEXT: // %bb.1: // %if.else
; LLC-NEXT: lsr x8, x0, #47
; LLC-NEXT: and w9, w1, #0x3
diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
index fdf972990e745..8a56360935717 100644
--- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
+++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
@@ -16,7 +16,7 @@ define void @func() uwtable {
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cbnz w8, .LBB0_3
; CHECK-NEXT: // %bb.1: // %b1
-; CHECK-NEXT: cbz wzr, .LBB0_4
+; CHECK-NEXT: cbz w8, .LBB0_4
; CHECK-NEXT: // %bb.2: // %b3
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: and w0, w8, #0x100
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
index 9ebeb098c60c0..c340df1385124 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -523,7 +523,6 @@ define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_undef_lane_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 poison
ret i32 %b
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
index b3d9e61b65b6f..b0627c3d4e77d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
@@ -92,27 +92,17 @@ define i16 @bitcast_f16_to_i16(half %a, i32 %b) {
; GCN-LABEL: bitcast_f16_to_i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT: s_cbranch_execnz .LBB1_3
-; GCN-NEXT: ; %bb.1: ; %Flow
-; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GCN-NEXT: s_cbranch_execnz .LBB1_4
-; GCN-NEXT: .LBB1_2: ; %end
-; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT: s_setpc_b64 s[30:31]
-; GCN-NEXT: .LBB1_3: ; %cmp.false
-; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GCN-NEXT: s_cbranch_execz .LBB1_2
-; GCN-NEXT: .LBB1_4: ; %cmp.true
-; GCN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GCN-NEXT: ; %bb.1:
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
; GCN-NEXT: v_add_f32_e32 v0, 0x38000000, v0
; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: .LBB1_2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -249,10 +239,9 @@ define i16 @bitcast_bf16_to_i16(bfloat %a, i32 %b) {
; GCN-LABEL: bitcast_bf16_to_i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v2
+; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v0
+; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GCN-NEXT: s_cbranch_execnz .LBB3_3
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index e2dfcf55b7856..c019c83da5ef8 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -8383,10 +8383,10 @@ define amdgpu_kernel void @uniform_or_i16(ptr addrspace(1) %result, ptr addrspac
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7LESS-NEXT: s_load_dword s6, s[4:5], 0xd
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v1, exec_lo, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v1, exec_hi, v1
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-NEXT: ; implicit-def: $vgpr0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB15_2
; GFX7LESS-NEXT: ; %bb.1:
@@ -8731,10 +8731,10 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
; GFX7LESS-NEXT: s_mov_b64 s[6:7], exec
; GFX7LESS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7LESS-NEXT: s_load_dword s10, s[4:5], 0xd
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v1, s6, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v1
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v0
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7LESS-NEXT: ; implicit-def: $vgpr0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_4
; GFX7LESS-NEXT: ; %bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 6bf126af5ade2..fb418afb8b039 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -1292,7 +1292,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB14_4:
-; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: ; implicit-def: $vgpr0
; SI-NEXT: s_branch .LBB14_2
;
; VI-LABEL: ctpop_i16_in_br:
@@ -1329,48 +1329,47 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; EG: ; %bb.0: ; %entry
; EG-NEXT: ALU 0, @20, KC0[], KC1[]
; EG-NEXT: TEX 0 @14
-; EG-NEXT: ALU_PUSH_BEFORE 4, @21, KC0[], KC1[]
+; EG-NEXT: ALU_PUSH_BEFORE 3, @21, KC0[], KC1[]
; EG-NEXT: JUMP @7 POP:1
-; EG-NEXT: ALU 0, @26, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @25, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @16
-; EG-NEXT: ALU_POP_AFTER 1, @27, KC0[], KC1[]
-; EG-NEXT: ALU_PUSH_BEFORE 2, @29, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU_POP_AFTER 1, @26, KC0[], KC1[]
+; EG-NEXT: ALU_PUSH_BEFORE 2, @28, KC0[CB0:0-32], KC1[]
; EG-NEXT: JUMP @11 POP:1
; EG-NEXT: TEX 0 @18
-; EG-NEXT: ALU_POP_AFTER 0, @32, KC0[], KC1[]
-; EG-NEXT: ALU 11, @33, KC0[], KC1[]
+; EG-NEXT: ALU_POP_AFTER 0, @31, KC0[], KC1[]
+; EG-NEXT: ALU 11, @32, KC0[], KC1[]
; EG-NEXT: MEM_RAT MSKOR T1.XW, T0.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 14:
-; EG-NEXT: VTX_READ_16 T2.X, T1.X, 46, #3
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
; EG-NEXT: Fetch clause starting at 16:
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
+; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
; EG-NEXT: Fetch clause starting at 18:
-; EG-NEXT: VTX_READ_16 T0.X, T1.X, 44, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
; EG-NEXT: ALU clause starting at 20:
-; EG-NEXT: MOV * T1.X, 0.0,
+; EG-NEXT: MOV * T0.X, 0.0,
; EG-NEXT: ALU clause starting at 21:
-; EG-NEXT: MOV T0.X, literal.x,
-; EG-NEXT: MOV T1.W, literal.y,
-; EG-NEXT: SETNE_INT * T0.W, T2.X, 0.0,
-; EG-NEXT: 0(0.000000e+00), 1(1.401298e-45)
+; EG-NEXT: MOV T1.W, literal.x,
+; EG-NEXT: SETNE_INT * T0.W, T1.X, 0.0,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT: ALU clause starting at 25:
+; EG-NEXT: MOV * T1.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 26:
-; EG-NEXT: MOV * T0.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 27:
; EG-NEXT: MOV * T1.W, literal.x,
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 29:
+; EG-NEXT: ALU clause starting at 28:
; EG-NEXT: MOV T0.W, KC0[2].Y,
; EG-NEXT: SETE_INT * T1.W, T1.W, 0.0,
; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT: ALU clause starting at 31:
+; EG-NEXT: BCNT_INT * T1.X, T0.X,
; EG-NEXT: ALU clause starting at 32:
-; EG-NEXT: BCNT_INT * T0.X, T0.X,
-; EG-NEXT: ALU clause starting at 33:
; EG-NEXT: LSHL * T1.W, T0.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T1.X, literal.y,
; EG-NEXT: 24(3.363116e-44), 65535(9.183409e-41)
; EG-NEXT: LSHL T1.X, PS, PV.W,
; EG-NEXT: LSHL * T1.W, literal.x, PV.W,
diff --git a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
index d616fecfdc1ff..f68b035334fd5 100644
--- a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
+++ b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
@@ -6,15 +6,16 @@
define amdgpu_kernel void @foo() {
; CHECK-LABEL: foo:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_cbranch_execnz .LBB0_2
-; CHECK-NEXT: ; %bb.1: ; %LeafBlock1
-; CHECK-NEXT: .LBB0_2: ; %foo.exit
+; CHECK-NEXT: ; %bb.1: ; %LeafBlock1
+; CHECK-NEXT: s_cmp_eq_u32 s0, 10
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_3
+; CHECK-NEXT: ; %bb.2:
; CHECK-NEXT: s_mov_b32 s3, 0xf000
; CHECK-NEXT: s_mov_b32 s2, -1
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
; CHECK-NEXT: s_endpgm
-; CHECK-NEXT: ; %bb.3: ; %sw.bb10
+; CHECK-NEXT: .LBB0_3:
entry:
switch i8 poison, label %foo.exit [
i8 4, label %sw.bb4
diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
index 3bb840eb51690..4b8ef2c9613a5 100644
--- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
@@ -29,17 +29,13 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: s_cmp_eq_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.3: ; %if.end60
-; CHECK-NEXT: s_mov_b64 vcc, exec
; CHECK-NEXT: s_cbranch_execz .LBB0_11
; CHECK-NEXT: ; %bb.4: ; %if.end5.i
-; CHECK-NEXT: s_mov_b64 vcc, vcc
-; CHECK-NEXT: s_cbranch_vccz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.5: ; %if.end5.i314
-; CHECK-NEXT: s_mov_b64 vcc, exec
-; CHECK-NEXT: s_cbranch_execz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.6: ; %if.end5.i338
-; CHECK-NEXT: s_mov_b64 vcc, vcc
-; CHECK-NEXT: s_cbranch_vccz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.7: ; %if.end5.i362
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
@@ -50,7 +46,7 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: buffer_store_byte v0, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v1, off, s[0:3], 0 offset:257
-; CHECK-NEXT: s_cbranch_execz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.8: ; %if.end5.i400
; CHECK-NEXT: flat_load_ubyte v0, v[0:1]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/PowerPC/undef-args.ll b/llvm/test/CodeGen/PowerPC/undef-args.ll
index 6b698ef478b51..cdaaec2add3ee 100644
--- a/llvm/test/CodeGen/PowerPC/undef-args.ll
+++ b/llvm/test/CodeGen/PowerPC/undef-args.ll
@@ -1,5 +1,7 @@
-;; Tests that extending poison results in undef.
-;; Also tests that there are redundant instructions loading 0 into argument registers for unused arguments.
+;; Tests that extending poison results in poison.
+;; Also tests that there are no redundant instructions loading 0 into argument registers for unused arguments.
+
+; REQUIRES: asserts
; REQUIRES: asserts
@@ -62,7 +64,7 @@ entry:
; CHECKISEL64-NEXT: t7: i64 = Register $x1
; CHECKISEL64-NEXT: t0: ch,glue = EntryToken
; CHECKISEL64-NEXT: t6: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT: t11: ch,glue = CopyToReg t6, Register:i64 $x3, Constant:i64<0>
+; CHECKISEL64-NEXT: t11: ch,glue = CopyToReg t6, Register:i64 $x3, poison:i64
; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t11, Register:i64 $x4, Constant:i64<255>, t11:1
; CHECKISEL64-NEXT: t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t13:1
; CHECKISEL64-NEXT: t18: ch,glue = callseq_end t17, TargetConstant:i64<112>, TargetConstant:i64<0>, t17:1
@@ -72,7 +74,6 @@ entry:
; CHECKASM64-NEXT: # %bb.0: # %entry
; CHECKASM64-NEXT: mflr 0
; CHECKASM64-NEXT: stdu 1, -112(1)
-; CHECKASM64-NEXT: li 3, 0
; CHECKASM64-NEXT: li 4, 255
; CHECKASM64-NEXT: std 0, 128(1)
; CHECKASM64-NEXT: bl .bar32
@@ -104,7 +105,7 @@ entry:
; CHECKISEL32-NEXT: t9: i32 = Register $r1
; CHECKISEL32-NEXT: t0: ch,glue = EntryToken
; CHECKISEL32-NEXT: t8: ch,glue = callseq_start t0, TargetConstant:i32<56>, TargetConstant:i32<0>
-; CHECKISEL32-NEXT: t11: ch,glue = CopyToReg t8, Register:i32 $r3, Constant:i32<0>
+; CHECKISEL32-NEXT: t11: ch,glue = CopyToReg t8, Register:i32 $r3, poison:i32
; CHECKISEL32-NEXT: t13: ch,glue = CopyToReg t11, Register:i32 $r4, Constant:i32<255>, t11:1
; CHECKISEL32-NEXT: t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i32, Register:i32 $r3, Register:i32 $r4, Register:i32 $r2, RegisterMask:Untyped, t13:1
; CHECKISEL32-NEXT: t18: ch,glue = callseq_end t17, TargetConstant:i32<56>, TargetConstant:i32<0>, t17:1
@@ -114,7 +115,6 @@ entry:
; CHECKASM32-NEXT: # %bb.0: # %entry
; CHECKASM32-NEXT: mflr 0
; CHECKASM32-NEXT: stwu 1, -64(1)
-; CHECKASM32-NEXT: li 3, 0
; CHECKASM32-NEXT: li 4, 255
; CHECKASM32-NEXT: stw 0, 72(1)
; CHECKASM32-NEXT: bl .bar8
@@ -128,7 +128,6 @@ entry:
; CHECKASM64-NEXT: # %bb.0: # %entry
; CHECKASM64-NEXT: mflr 0
; CHECKASM64-NEXT: stdu 1, -112(1)
-; CHECKASM64-NEXT: li 3, 0
; CHECKASM64-NEXT: li 4, 255
; CHECKASM64-NEXT: std 0, 128(1)
; CHECKASM64-NEXT: bl .bar8
@@ -143,12 +142,12 @@ entry:
; CHECKISEL64-NEXT: t1: i64 = GlobalAddress<ptr @bar8> 0
; CHECKISEL64-NEXT: t2: i8 = poison
; CHECKISEL64-NEXT: t3: i8 = Constant<-1>
-; CHECKISEL64-NEXT: t4: i32 = Constant<0>
+; CHECKISEL64-NEXT: t4: i32 = poison
; CHECKISEL64-NEXT: t5: i32 = Constant<255>
; CHECKISEL64-NEXT: t9: i64 = Register $x1
; CHECKISEL64-NEXT: t0: ch,glue = EntryToken
; CHECKISEL64-NEXT: t8: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t8, Register:i64 $x3, Constant:i64<0>
+; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t8, Register:i64 $x3, poison:i64
; CHECKISEL64-NEXT: t15: ch,glue = CopyToReg t13, Register:i64 $x4, Constant:i64<255>, t13:1
; CHECKISEL64-NEXT: t19: ch,glue = PPCISD::CALL_NOP t15, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t15:1
; CHECKISEL64-NEXT: t20: ch,glue = callseq_end t19, TargetConstant:i64<112>, TargetConstant:i64<0>, t19:1
diff --git a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
index a5847365159a8..395fc99ea0536 100644
--- a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
+++ b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
@@ -27,14 +27,6 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 {
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: li a7, 0
; CHECK-NEXT: call _Z3fooiiiiiiiiiiPi
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: .Ltmp1:
diff --git a/llvm/test/CodeGen/VE/Vector/ticket-64420.ll b/llvm/test/CodeGen/VE/Vector/ticket-64420.ll
index c3fe443f911c9..41e28fe82661b 100644
--- a/llvm/test/CodeGen/VE/Vector/ticket-64420.ll
+++ b/llvm/test/CodeGen/VE/Vector/ticket-64420.ll
@@ -20,6 +20,7 @@
; SCALAR-LABEL: func:
; SCALAR: # %bb.1:
+; SCALAR: or %s1, 0, (0)1
; SCALAR-NEXT: st %s1, 8(, %s0)
; SCALAR-NEXT: st %s1, (, %s0)
; SCALAR-NEXT: b.l.t (, %s10)
diff --git a/llvm/test/CodeGen/X86/avx512-i1test.ll b/llvm/test/CodeGen/X86/avx512-i1test.ll
index d8683df5cbf7a..d43f05bbd5a1d 100644
--- a/llvm/test/CodeGen/X86/avx512-i1test.ll
+++ b/llvm/test/CodeGen/X86/avx512-i1test.ll
@@ -8,18 +8,19 @@ target triple = "x86_64-unknown-linux-gnu"
define void @func() {
; CHECK-LABEL: func:
; CHECK: # %bb.0: # %bb1
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: # %bb.3: # %L_30
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB0_1: # %bb56
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_2: # %bb33
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jmp .LBB0_2
+; CHECK-NEXT: .LBB0_1: # %bb33
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_1
+; CHECK-NEXT: # %bb.2: # %bb35
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jmp .LBB0_1
bb1:
br i1 poison, label %L_10, label %L_10
diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll
index 8449107f39e91..684e2921b789e 100644
--- a/llvm/test/CodeGen/X86/bfloat.ll
+++ b/llvm/test/CodeGen/X86/bfloat.ll
@@ -842,7 +842,6 @@ define <32 x bfloat> @pr63017_2() nounwind {
;
; SSE2-LABEL: pr63017_2:
; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: testb %al, %al
; SSE2-NEXT: jne .LBB16_1
; SSE2-NEXT: # %bb.2: # %cond.load
@@ -1087,7 +1086,6 @@ define <32 x bfloat> @pr63017_2() nounwind {
; AVXNC-LABEL: pr63017_2:
; AVXNC: # %bb.0:
; AVXNC-NEXT: vbroadcastss {{.*#+}} ymm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024]
-; AVXNC-NEXT: xorl %eax, %eax
; AVXNC-NEXT: testb %al, %al
...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: zhijian lin (diggerlin) ChangesThe PR will fix the issue #122728 This patch addresses the signed/zero extension of poison by using a poison value of the extended type instead of a constant zero of the extended type. Patch is 44.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136701.diff 31 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5269962ea2062..5fd6d604daa98 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6322,6 +6322,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Flags.setNonNeg(N1->getFlags().hasNonNeg());
return getNode(OpOpcode, DL, VT, N1.getOperand(0), Flags);
}
+
+ if (OpOpcode == ISD::POISON)
+ return getPOISON(VT);
+
if (N1.isUndef())
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
@@ -6342,6 +6346,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
Flags.setNonNeg(N1->getFlags().hasNonNeg());
return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0), Flags);
}
+
+ if (OpOpcode == ISD::POISON)
+ return getPOISON(VT);
+
if (N1.isUndef())
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 62ff4bbcc9c27..4b1fff642e5f5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -1019,7 +1019,7 @@ define i16 @test_ignored_rightbits(i32 %dst, i32 %in) {
define void @sameOperandBFI(i64 %src, i64 %src2, ptr %ptr) {
; LLC-LABEL: sameOperandBFI:
; LLC: // %bb.0: // %entry
-; LLC-NEXT: cbnz wzr, .LBB30_2
+; LLC-NEXT: cbnz w8, .LBB30_2
; LLC-NEXT: // %bb.1: // %if.else
; LLC-NEXT: lsr x8, x0, #47
; LLC-NEXT: and w9, w1, #0x3
diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
index fdf972990e745..8a56360935717 100644
--- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
+++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll
@@ -16,7 +16,7 @@ define void @func() uwtable {
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: cbnz w8, .LBB0_3
; CHECK-NEXT: // %bb.1: // %b1
-; CHECK-NEXT: cbz wzr, .LBB0_4
+; CHECK-NEXT: cbz w8, .LBB0_4
; CHECK-NEXT: // %bb.2: // %b3
; CHECK-NEXT: ldr w8, [x8]
; CHECK-NEXT: and w0, w8, #0x100
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
index 9ebeb098c60c0..c340df1385124 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -523,7 +523,6 @@ define double @test_lanex_2xf64(<vscale x 2 x double> %a, i32 %x) #0 {
define i32 @test_undef_lane_4xi32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: test_undef_lane_4xi32:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = extractelement <vscale x 4 x i32> %a, i32 poison
ret i32 %b
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
index b3d9e61b65b6f..b0627c3d4e77d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll
@@ -92,27 +92,17 @@ define i16 @bitcast_f16_to_i16(half %a, i32 %b) {
; GCN-LABEL: bitcast_f16_to_i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GCN-NEXT: s_cbranch_execnz .LBB1_3
-; GCN-NEXT: ; %bb.1: ; %Flow
-; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GCN-NEXT: s_cbranch_execnz .LBB1_4
-; GCN-NEXT: .LBB1_2: ; %end
-; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
-; GCN-NEXT: s_setpc_b64 s[30:31]
-; GCN-NEXT: .LBB1_3: ; %cmp.false
-; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; GCN-NEXT: s_cbranch_execz .LBB1_2
-; GCN-NEXT: .LBB1_4: ; %cmp.true
-; GCN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GCN-NEXT: ; %bb.1:
+; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
; GCN-NEXT: v_add_f32_e32 v0, 0x38000000, v0
; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT: .LBB1_2:
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_setpc_b64 s[30:31]
;
@@ -249,10 +239,9 @@ define i16 @bitcast_bf16_to_i16(bfloat %a, i32 %b) {
; GCN-LABEL: bitcast_bf16_to_i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mov_b32_e32 v2, v0
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
-; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v2
+; GCN-NEXT: v_mul_f32_e32 v1, 1.0, v0
+; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GCN-NEXT: s_cbranch_execnz .LBB3_3
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index e2dfcf55b7856..c019c83da5ef8 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -8383,10 +8383,10 @@ define amdgpu_kernel void @uniform_or_i16(ptr addrspace(1) %result, ptr addrspac
; GFX7LESS: ; %bb.0:
; GFX7LESS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7LESS-NEXT: s_load_dword s6, s[4:5], 0xd
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v1, exec_lo, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v1, exec_hi, v1
-; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
+; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX7LESS-NEXT: ; implicit-def: $vgpr0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB15_2
; GFX7LESS-NEXT: ; %bb.1:
@@ -8731,10 +8731,10 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
; GFX7LESS-NEXT: s_mov_b64 s[6:7], exec
; GFX7LESS-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7LESS-NEXT: s_load_dword s10, s[4:5], 0xd
-; GFX7LESS-NEXT: v_mov_b32_e32 v0, 0
-; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v1, s6, 0
-; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v1
+; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
+; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v0
; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
+; GFX7LESS-NEXT: ; implicit-def: $vgpr0
; GFX7LESS-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX7LESS-NEXT: s_cbranch_execz .LBB16_4
; GFX7LESS-NEXT: ; %bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index 6bf126af5ade2..fb418afb8b039 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -1292,7 +1292,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
; SI-NEXT: .LBB14_4:
-; SI-NEXT: v_mov_b32_e32 v0, 0
+; SI-NEXT: ; implicit-def: $vgpr0
; SI-NEXT: s_branch .LBB14_2
;
; VI-LABEL: ctpop_i16_in_br:
@@ -1329,48 +1329,47 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
; EG: ; %bb.0: ; %entry
; EG-NEXT: ALU 0, @20, KC0[], KC1[]
; EG-NEXT: TEX 0 @14
-; EG-NEXT: ALU_PUSH_BEFORE 4, @21, KC0[], KC1[]
+; EG-NEXT: ALU_PUSH_BEFORE 3, @21, KC0[], KC1[]
; EG-NEXT: JUMP @7 POP:1
-; EG-NEXT: ALU 0, @26, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU 0, @25, KC0[CB0:0-32], KC1[]
; EG-NEXT: TEX 0 @16
-; EG-NEXT: ALU_POP_AFTER 1, @27, KC0[], KC1[]
-; EG-NEXT: ALU_PUSH_BEFORE 2, @29, KC0[CB0:0-32], KC1[]
+; EG-NEXT: ALU_POP_AFTER 1, @26, KC0[], KC1[]
+; EG-NEXT: ALU_PUSH_BEFORE 2, @28, KC0[CB0:0-32], KC1[]
; EG-NEXT: JUMP @11 POP:1
; EG-NEXT: TEX 0 @18
-; EG-NEXT: ALU_POP_AFTER 0, @32, KC0[], KC1[]
-; EG-NEXT: ALU 11, @33, KC0[], KC1[]
+; EG-NEXT: ALU_POP_AFTER 0, @31, KC0[], KC1[]
+; EG-NEXT: ALU 11, @32, KC0[], KC1[]
; EG-NEXT: MEM_RAT MSKOR T1.XW, T0.X
; EG-NEXT: CF_END
; EG-NEXT: Fetch clause starting at 14:
-; EG-NEXT: VTX_READ_16 T2.X, T1.X, 46, #3
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 46, #3
; EG-NEXT: Fetch clause starting at 16:
-; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1
+; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1
; EG-NEXT: Fetch clause starting at 18:
-; EG-NEXT: VTX_READ_16 T0.X, T1.X, 44, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
; EG-NEXT: ALU clause starting at 20:
-; EG-NEXT: MOV * T1.X, 0.0,
+; EG-NEXT: MOV * T0.X, 0.0,
; EG-NEXT: ALU clause starting at 21:
-; EG-NEXT: MOV T0.X, literal.x,
-; EG-NEXT: MOV T1.W, literal.y,
-; EG-NEXT: SETNE_INT * T0.W, T2.X, 0.0,
-; EG-NEXT: 0(0.000000e+00), 1(1.401298e-45)
+; EG-NEXT: MOV T1.W, literal.x,
+; EG-NEXT: SETNE_INT * T0.W, T1.X, 0.0,
+; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00)
; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT: ALU clause starting at 25:
+; EG-NEXT: MOV * T1.X, KC0[2].Z,
; EG-NEXT: ALU clause starting at 26:
-; EG-NEXT: MOV * T0.X, KC0[2].Z,
-; EG-NEXT: ALU clause starting at 27:
; EG-NEXT: MOV * T1.W, literal.x,
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
-; EG-NEXT: ALU clause starting at 29:
+; EG-NEXT: ALU clause starting at 28:
; EG-NEXT: MOV T0.W, KC0[2].Y,
; EG-NEXT: SETE_INT * T1.W, T1.W, 0.0,
; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PS, 0.0,
+; EG-NEXT: ALU clause starting at 31:
+; EG-NEXT: BCNT_INT * T1.X, T0.X,
; EG-NEXT: ALU clause starting at 32:
-; EG-NEXT: BCNT_INT * T0.X, T0.X,
-; EG-NEXT: ALU clause starting at 33:
; EG-NEXT: LSHL * T1.W, T0.W, literal.x,
; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT: AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT: AND_INT * T2.W, T1.X, literal.y,
; EG-NEXT: 24(3.363116e-44), 65535(9.183409e-41)
; EG-NEXT: LSHL T1.X, PS, PV.W,
; EG-NEXT: LSHL * T1.W, literal.x, PV.W,
diff --git a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
index d616fecfdc1ff..f68b035334fd5 100644
--- a/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
+++ b/llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll
@@ -6,15 +6,16 @@
define amdgpu_kernel void @foo() {
; CHECK-LABEL: foo:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: s_cbranch_execnz .LBB0_2
-; CHECK-NEXT: ; %bb.1: ; %LeafBlock1
-; CHECK-NEXT: .LBB0_2: ; %foo.exit
+; CHECK-NEXT: ; %bb.1: ; %LeafBlock1
+; CHECK-NEXT: s_cmp_eq_u32 s0, 10
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_3
+; CHECK-NEXT: ; %bb.2:
; CHECK-NEXT: s_mov_b32 s3, 0xf000
; CHECK-NEXT: s_mov_b32 s2, -1
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
; CHECK-NEXT: s_endpgm
-; CHECK-NEXT: ; %bb.3: ; %sw.bb10
+; CHECK-NEXT: .LBB0_3:
entry:
switch i8 poison, label %foo.exit [
i8 4, label %sw.bb4
diff --git a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
index 3bb840eb51690..4b8ef2c9613a5 100644
--- a/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll
@@ -29,17 +29,13 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: s_cmp_eq_u32 s4, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_2
; CHECK-NEXT: ; %bb.3: ; %if.end60
-; CHECK-NEXT: s_mov_b64 vcc, exec
; CHECK-NEXT: s_cbranch_execz .LBB0_11
; CHECK-NEXT: ; %bb.4: ; %if.end5.i
-; CHECK-NEXT: s_mov_b64 vcc, vcc
-; CHECK-NEXT: s_cbranch_vccz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.5: ; %if.end5.i314
-; CHECK-NEXT: s_mov_b64 vcc, exec
-; CHECK-NEXT: s_cbranch_execz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.6: ; %if.end5.i338
-; CHECK-NEXT: s_mov_b64 vcc, vcc
-; CHECK-NEXT: s_cbranch_vccz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.7: ; %if.end5.i362
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_getpc_b64 s[4:5]
@@ -50,7 +46,7 @@ define protected amdgpu_kernel void @_RSENC_PRInit______________________________
; CHECK-NEXT: buffer_store_byte v0, v0, s[0:3], 0 offen
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: buffer_store_byte v1, off, s[0:3], 0 offset:257
-; CHECK-NEXT: s_cbranch_execz .LBB0_11
+; CHECK-NEXT: s_cbranch_scc0 .LBB0_11
; CHECK-NEXT: ; %bb.8: ; %if.end5.i400
; CHECK-NEXT: flat_load_ubyte v0, v[0:1]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/PowerPC/undef-args.ll b/llvm/test/CodeGen/PowerPC/undef-args.ll
index 6b698ef478b51..cdaaec2add3ee 100644
--- a/llvm/test/CodeGen/PowerPC/undef-args.ll
+++ b/llvm/test/CodeGen/PowerPC/undef-args.ll
@@ -1,5 +1,7 @@
-;; Tests that extending poison results in undef.
-;; Also tests that there are redundant instructions loading 0 into argument registers for unused arguments.
+;; Tests that extending poison results in poison.
+;; Also tests that there are no redundant instructions loading 0 into argument registers for unused arguments.
+
+; REQUIRES: asserts
; REQUIRES: asserts
@@ -62,7 +64,7 @@ entry:
; CHECKISEL64-NEXT: t7: i64 = Register $x1
; CHECKISEL64-NEXT: t0: ch,glue = EntryToken
; CHECKISEL64-NEXT: t6: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT: t11: ch,glue = CopyToReg t6, Register:i64 $x3, Constant:i64<0>
+; CHECKISEL64-NEXT: t11: ch,glue = CopyToReg t6, Register:i64 $x3, poison:i64
; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t11, Register:i64 $x4, Constant:i64<255>, t11:1
; CHECKISEL64-NEXT: t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t13:1
; CHECKISEL64-NEXT: t18: ch,glue = callseq_end t17, TargetConstant:i64<112>, TargetConstant:i64<0>, t17:1
@@ -72,7 +74,6 @@ entry:
; CHECKASM64-NEXT: # %bb.0: # %entry
; CHECKASM64-NEXT: mflr 0
; CHECKASM64-NEXT: stdu 1, -112(1)
-; CHECKASM64-NEXT: li 3, 0
; CHECKASM64-NEXT: li 4, 255
; CHECKASM64-NEXT: std 0, 128(1)
; CHECKASM64-NEXT: bl .bar32
@@ -104,7 +105,7 @@ entry:
; CHECKISEL32-NEXT: t9: i32 = Register $r1
; CHECKISEL32-NEXT: t0: ch,glue = EntryToken
; CHECKISEL32-NEXT: t8: ch,glue = callseq_start t0, TargetConstant:i32<56>, TargetConstant:i32<0>
-; CHECKISEL32-NEXT: t11: ch,glue = CopyToReg t8, Register:i32 $r3, Constant:i32<0>
+; CHECKISEL32-NEXT: t11: ch,glue = CopyToReg t8, Register:i32 $r3, poison:i32
; CHECKISEL32-NEXT: t13: ch,glue = CopyToReg t11, Register:i32 $r4, Constant:i32<255>, t11:1
; CHECKISEL32-NEXT: t17: ch,glue = PPCISD::CALL_NOP t13, MCSymbol:i32, Register:i32 $r3, Register:i32 $r4, Register:i32 $r2, RegisterMask:Untyped, t13:1
; CHECKISEL32-NEXT: t18: ch,glue = callseq_end t17, TargetConstant:i32<56>, TargetConstant:i32<0>, t17:1
@@ -114,7 +115,6 @@ entry:
; CHECKASM32-NEXT: # %bb.0: # %entry
; CHECKASM32-NEXT: mflr 0
; CHECKASM32-NEXT: stwu 1, -64(1)
-; CHECKASM32-NEXT: li 3, 0
; CHECKASM32-NEXT: li 4, 255
; CHECKASM32-NEXT: stw 0, 72(1)
; CHECKASM32-NEXT: bl .bar8
@@ -128,7 +128,6 @@ entry:
; CHECKASM64-NEXT: # %bb.0: # %entry
; CHECKASM64-NEXT: mflr 0
; CHECKASM64-NEXT: stdu 1, -112(1)
-; CHECKASM64-NEXT: li 3, 0
; CHECKASM64-NEXT: li 4, 255
; CHECKASM64-NEXT: std 0, 128(1)
; CHECKASM64-NEXT: bl .bar8
@@ -143,12 +142,12 @@ entry:
; CHECKISEL64-NEXT: t1: i64 = GlobalAddress<ptr @bar8> 0
; CHECKISEL64-NEXT: t2: i8 = poison
; CHECKISEL64-NEXT: t3: i8 = Constant<-1>
-; CHECKISEL64-NEXT: t4: i32 = Constant<0>
+; CHECKISEL64-NEXT: t4: i32 = poison
; CHECKISEL64-NEXT: t5: i32 = Constant<255>
; CHECKISEL64-NEXT: t9: i64 = Register $x1
; CHECKISEL64-NEXT: t0: ch,glue = EntryToken
; CHECKISEL64-NEXT: t8: ch,glue = callseq_start t0, TargetConstant:i64<112>, TargetConstant:i64<0>
-; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t8, Register:i64 $x3, Constant:i64<0>
+; CHECKISEL64-NEXT: t13: ch,glue = CopyToReg t8, Register:i64 $x3, poison:i64
; CHECKISEL64-NEXT: t15: ch,glue = CopyToReg t13, Register:i64 $x4, Constant:i64<255>, t13:1
; CHECKISEL64-NEXT: t19: ch,glue = PPCISD::CALL_NOP t15, MCSymbol:i64, Register:i64 $x3, Register:i64 $x4, Register:i64 $x2, RegisterMask:Untyped, t15:1
; CHECKISEL64-NEXT: t20: ch,glue = callseq_end t19, TargetConstant:i64<112>, TargetConstant:i64<0>, t19:1
diff --git a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
index a5847365159a8..395fc99ea0536 100644
--- a/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
+++ b/llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll
@@ -27,14 +27,6 @@ define signext i32 @foo() #1 personality ptr @__gxx_personality_v0 {
; CHECK-NEXT: .cfi_remember_state
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: addi sp, sp, -32
-; CHECK-NEXT: li a0, 0
-; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: li a2, 0
-; CHECK-NEXT: li a3, 0
-; CHECK-NEXT: li a4, 0
-; CHECK-NEXT: li a5, 0
-; CHECK-NEXT: li a6, 0
-; CHECK-NEXT: li a7, 0
; CHECK-NEXT: call _Z3fooiiiiiiiiiiPi
; CHECK-NEXT: addi sp, sp, 32
; CHECK-NEXT: .Ltmp1:
diff --git a/llvm/test/CodeGen/VE/Vector/ticket-64420.ll b/llvm/test/CodeGen/VE/Vector/ticket-64420.ll
index c3fe443f911c9..41e28fe82661b 100644
--- a/llvm/test/CodeGen/VE/Vector/ticket-64420.ll
+++ b/llvm/test/CodeGen/VE/Vector/ticket-64420.ll
@@ -20,6 +20,7 @@
; SCALAR-LABEL: func:
; SCALAR: # %bb.1:
+; SCALAR: or %s1, 0, (0)1
; SCALAR-NEXT: st %s1, 8(, %s0)
; SCALAR-NEXT: st %s1, (, %s0)
; SCALAR-NEXT: b.l.t (, %s10)
diff --git a/llvm/test/CodeGen/X86/avx512-i1test.ll b/llvm/test/CodeGen/X86/avx512-i1test.ll
index d8683df5cbf7a..d43f05bbd5a1d 100644
--- a/llvm/test/CodeGen/X86/avx512-i1test.ll
+++ b/llvm/test/CodeGen/X86/avx512-i1test.ll
@@ -8,18 +8,19 @@ target triple = "x86_64-unknown-linux-gnu"
define void @func() {
; CHECK-LABEL: func:
; CHECK: # %bb.0: # %bb1
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB0_1
; CHECK-NEXT: # %bb.3: # %L_30
; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB0_1: # %bb56
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_2: # %bb33
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jmp .LBB0_2
+; CHECK-NEXT: .LBB0_1: # %bb33
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_1
+; CHECK-NEXT: # %bb.2: # %bb35
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jmp .LBB0_1
bb1:
br i1 poison, label %L_10, label %L_10
diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll
index 8449107f39e91..684e2921b789e 100644
--- a/llvm/test/CodeGen/X86/bfloat.ll
+++ b/llvm/test/CodeGen/X86/bfloat.ll
@@ -842,7 +842,6 @@ define <32 x bfloat> @pr63017_2() nounwind {
;
; SSE2-LABEL: pr63017_2:
; SSE2: # %bb.0:
-; SSE2-NEXT: xorl %eax, %eax
; SSE2-NEXT: testb %al, %al
; SSE2-NEXT: jne .LBB16_1
; SSE2-NEXT: # %bb.2: # %cond.load
@@ -1087,7 +1086,6 @@ define <32 x bfloat> @pr63017_2() nounwind {
; AVXNC-LABEL: pr63017_2:
; AVXNC: # %bb.0:
; AVXNC-NEXT: vbroadcastss {{.*#+}} ymm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024]
-; AVXNC-NEXT: xorl %eax, %eax
; AVXNC-NEXT: testb %al, %al
...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll llvm/test/CodeGen/AArch64/optimize-cond-branch.ll llvm/test/CodeGen/AArch64/sve-extract-element.ll llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll llvm/test/CodeGen/AMDGPU/ctpop16.ll llvm/test/CodeGen/AMDGPU/dead-machine-elim-after-dead-lane.ll llvm/test/CodeGen/AMDGPU/mdt-preserving-crash.ll llvm/test/CodeGen/PowerPC/undef-args.ll llvm/test/CodeGen/RISCV/miss-sp-restore-eh.ll llvm/test/CodeGen/VE/Vector/ticket-64420.ll llvm/test/CodeGen/X86/avx512-i1test.ll llvm/test/CodeGen/X86/bfloat.ll llvm/test/CodeGen/X86/clobber_frame_ptr.ll llvm/test/CodeGen/X86/concat-fpext-v2bf16.ll llvm/test/CodeGen/X86/jump_sign.ll llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll llvm/test/CodeGen/X86/pr50254.ll llvm/test/CodeGen/X86/pr57673.ll llvm/test/CodeGen/X86/pr63108.ll llvm/test/CodeGen/X86/pr91005.ll llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll llvm/test/CodeGen/X86/shift-combine.ll llvm/test/CodeGen/X86/shuffle-combine-crash.ll llvm/test/CodeGen/X86/shuffle-half.ll llvm/test/CodeGen/X86/swifterror.ll llvm/test/CodeGen/X86/tailcall-cgp-dup.ll llvm/test/CodeGen/X86/vaargs-prolog-insert.ll llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll llvm/test/CodeGen/X86/x86-shrink-wrapping.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
…n value in getNode (llvm#136701) This patch addresses the signed/zero extension of poison by using a poison value of the extended type instead of a constant zero of the extended type.
…n value in getNode (llvm#136701) This patch addresses the signed/zero extension of poison by using a poison value of the extended type instead of a constant zero of the extended type.
…n value in getNode (llvm#136701) This patch addresses the signed/zero extension of poison by using a poison value of the extended type instead of a constant zero of the extended type.
Reland the #122741 after fix fail test cases #136636
The PR will fix the issue #122728
This patch addresses the signed/zero extension of poison by using a poison value of the extended type instead of a constant zero of the extended type.