Skip to content

[DAG] SimplifyDemandedBits - don't fold sext(x) -> aext(x) if we lose an 0/-1 allsignbits mask #77296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 16 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2444,6 +2444,13 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;

APInt InDemandedElts = DemandedElts.zext(InElts);
APInt InDemandedBits = DemandedBits.trunc(InBits);

// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
InDemandedBits.setBit(InBits - 1);

// If none of the top bits are demanded, convert this into an any_extend.
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
Expand All @@ -2452,19 +2459,17 @@ bool TargetLowering::SimplifyDemandedBits(
VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));

unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
InBits) {
unsigned Opc =
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
}
}

APInt InDemandedBits = DemandedBits.trunc(InBits);
APInt InDemandedElts = DemandedElts.zext(InElts);

// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
InDemandedBits.setBit(InBits - 1);

if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
Depth + 1))
return true;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/arm64-zip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ define <4 x i32> @shuffle_zip3(<4 x i32> %arg) {
; CHECK-NEXT: zip2.4h v0, v0, v1
; CHECK-NEXT: movi.4s v1, #1
; CHECK-NEXT: zip1.4h v0, v0, v0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: sshll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ret
bb:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AArch64/vselect-ext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -219,17 +219,17 @@ define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8>
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: Lloh0:
; CHECK-NEXT: adrp x8, lCPI9_0@PAGE
; CHECK-NEXT: movi.2d v3, #0x0000ff000000ff
; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF]
; CHECK-NEXT: mov.h v0[1], w1
; CHECK-NEXT: mov.h v0[2], w2
; CHECK-NEXT: fmov d1, d0
; CHECK-NEXT: bic.4h v1, #255, lsl #8
; CHECK-NEXT: cmhi.4h v1, v1, v2
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: ushll.4s v1, v0, #0
; CHECK-NEXT: bic.4h v0, #255, lsl #8
; CHECK-NEXT: cmhi.4h v0, v0, v2
; CHECK-NEXT: and.16b v1, v1, v3
; CHECK-NEXT: sshll.4s v0, v0, #0
; CHECK-NEXT: and.16b v0, v1, v0
; CHECK-NEXT: ret
; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
%ext = zext <3 x i8> %a to <3 x i32>
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/SystemZ/vec-perm-14.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
;
; Test that only one vperm of the vector compare is needed for both extracts.

; Test that no vperm of the vector compare is needed for the extracts.
define void @fun() {
; CHECK-LABEL: fun:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: vlrepf %v0, 0(%r1)
; CHECK-NEXT: vgbm %v1, 0
; CHECK-NEXT: larl %r1, .LCPI0_0
; CHECK-NEXT: vceqb %v0, %v0, %v1
; CHECK-NEXT: vl %v1, 0(%r1), 3
; CHECK-NEXT: vperm %v0, %v0, %v0, %v1
; CHECK-NEXT: vuphb %v0, %v0
; CHECK-NEXT: vuphh %v0, %v0
; CHECK-NEXT: vlgvf %r0, %v0, 0
; CHECK-NEXT: tmll %r0, 1
; CHECK-NEXT: je .LBB0_2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/test-shrink-bug.ll
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
; CHECK-X64-NEXT: je .LBB1_3
; CHECK-X64-NEXT: # %bb.1:
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
; CHECK-X64-NEXT: pextrw $4, %xmm0, %eax
; CHECK-X64-NEXT: pslld $8, %xmm0
; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
; CHECK-X64-NEXT: testb $1, %al
; CHECK-X64-NEXT: jne .LBB1_3
; CHECK-X64-NEXT: # %bb.2: # %no
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/vec_setcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,9 @@ define <3 x i1> @test_setcc_v3i1_v3i16(ptr %a) nounwind {
; SSE2-LABEL: test_setcc_v3i1_v3i16:
; SSE2: # %bb.0:
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
Expand Down