-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAG] SimplifyDemandedBits - don't fold sext(x) -> aext(x) if we lose an 0/-1 allsignbits mask #77296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… an 0/-1 allsignbits mask For targets that use 0/-1 boolean results, we want to keep this pattern through extensions/truncations as much as possible - so avoid simplifying to any_extend even if we don't demand the upper bits. Noticed in triage for https://reviews.llvm.org/D152928
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) ChangesFor targets that use 0/-1 boolean results, we want to keep this pattern through extensions/truncations as much as possible - so avoid simplifying to any_extend even if we don't demand the upper bits. Noticed in triage for https://reviews.llvm.org/D152928 Full diff: https://github.com/llvm/llvm-project/pull/77296.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f8400e8e94df61..e3e3e375d6a6a4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2444,6 +2444,13 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
+ APInt InDemandedElts = DemandedElts.zext(InElts);
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+
+ // Since some of the sign extended bits are demanded, we know that the sign
+ // bit is demanded.
+ InDemandedBits.setBit(InBits - 1);
+
// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
@@ -2452,19 +2459,17 @@ bool TargetLowering::SimplifyDemandedBits(
VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
- unsigned Opc =
- IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
- if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
+ if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
+ TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
+ InBits) {
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
}
- APInt InDemandedBits = DemandedBits.trunc(InBits);
- APInt InDemandedElts = DemandedElts.zext(InElts);
-
- // Since some of the sign extended bits are demanded, we know that the sign
- // bit is demanded.
- InDemandedBits.setBit(InBits - 1);
-
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index e22b57c8af44a8..c6e3c3540f6e91 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -328,7 +328,7 @@ define <4 x i32> @shuffle_zip3(<4 x i32> %arg) {
; CHECK-NEXT: zip2.4h v0, v0, v1
; CHECK-NEXT: movi.4s v1, #1
; CHECK-NEXT: zip1.4h v0, v0, v0
-; CHECK-NEXT: ushll.4s v0, v0, #0
+; CHECK-NEXT: sshll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
bb:
diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll
index b80955665c74f1..0b90343a40c839 100644
--- a/llvm/test/CodeGen/AArch64/vselect-ext.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll
@@ -219,17 +219,17 @@ define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8>
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x8, lCPI9_0@PAGE
+; CHECK-NEXT: movi.2d v3, #0x0000ff000000ff
; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF]
; CHECK-NEXT: mov.h v0[1], w1
; CHECK-NEXT: mov.h v0[2], w2
-; CHECK-NEXT: fmov d1, d0
-; CHECK-NEXT: bic.4h v1, #255, lsl #8
-; CHECK-NEXT: cmhi.4h v1, v1, v2
-; CHECK-NEXT: and.8b v0, v0, v1
-; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: and.16b v0, v0, v1
+; CHECK-NEXT: ushll.4s v1, v0, #0
+; CHECK-NEXT: bic.4h v0, #255, lsl #8
+; CHECK-NEXT: cmhi.4h v0, v0, v2
+; CHECK-NEXT: and.16b v1, v1, v3
+; CHECK-NEXT: sshll.4s v0, v0, #0
+; CHECK-NEXT: and.16b v0, v1, v0
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
%ext = zext <3 x i8> %a to <3 x i32>
diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
index fb3ece96017b70..0b392676fa3ec3 100644
--- a/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-perm-14.ll
@@ -1,16 +1,14 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
-;
-; Test that only one vperm of the vector compare is needed for both extracts.
+; Test that no vperm of the vector compare is needed for the extracts.
define void @fun() {
; CHECK-LABEL: fun:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: vlrepf %v0, 0(%r1)
; CHECK-NEXT: vgbm %v1, 0
-; CHECK-NEXT: larl %r1, .LCPI0_0
; CHECK-NEXT: vceqb %v0, %v0, %v1
-; CHECK-NEXT: vl %v1, 0(%r1), 3
-; CHECK-NEXT: vperm %v0, %v0, %v0, %v1
+; CHECK-NEXT: vuphb %v0, %v0
+; CHECK-NEXT: vuphh %v0, %v0
; CHECK-NEXT: vlgvf %r0, %v0, 0
; CHECK-NEXT: tmll %r0, 1
; CHECK-NEXT: je .LBB0_2
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index f05459f751bce3..51a00d211421ba 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -68,8 +68,8 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
; CHECK-X64-NEXT: je .LBB1_3
; CHECK-X64-NEXT: # %bb.1:
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
-; CHECK-X64-NEXT: pextrw $4, %xmm0, %eax
+; CHECK-X64-NEXT: pslld $8, %xmm0
+; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
; CHECK-X64-NEXT: testb $1, %al
; CHECK-X64-NEXT: jne .LBB1_3
; CHECK-X64-NEXT: # %bb.2: # %no
diff --git a/llvm/test/CodeGen/X86/vec_setcc.ll b/llvm/test/CodeGen/X86/vec_setcc.ll
index e7232a34f47122..87e29261eaa490 100644
--- a/llvm/test/CodeGen/X86/vec_setcc.ll
+++ b/llvm/test/CodeGen/X86/vec_setcc.ll
@@ -308,9 +308,9 @@ define <3 x i1> @test_setcc_v3i1_v3i16(ptr %a) nounwind {
; SSE2-LABEL: test_setcc_v3i1_v3i16:
; SSE2: # %bb.0:
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
|
The SystemZ changes look good to me (and are a clear improvement). Thanks! |
… an 0/-1 allsignbits mask (llvm#77296) For targets that use 0/-1 boolean results, we want to keep this pattern through extensions/truncations as much as possible - so avoid simplifying to any_extend even if we don't demand the upper bits. Noticed in triage for https://reviews.llvm.org/D152928
For targets that use 0/-1 boolean results, we want to keep this pattern through extensions/truncations as much as possible - so avoid simplifying to any_extend even if we don't demand the upper bits.
Noticed in triage for https://reviews.llvm.org/D152928