Skip to content

Commit d460c1d

Browse files
authored
[DAG] SimplifyDemandedBits - don't fold sext(x) -> aext(x) if we lose an 0/-1 allsignbits mask (#77296)
For targets that use 0/-1 boolean results, we want to keep this pattern through extensions/truncations as much as possible - so avoid simplifying to any_extend even if we don't demand the upper bits. Noticed in triage for https://reviews.llvm.org/D152928
1 parent 5351ded commit d460c1d

File tree

6 files changed

+30
-27
lines changed

6 files changed

+30
-27
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2444,6 +2444,13 @@ bool TargetLowering::SimplifyDemandedBits(
24442444
unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
24452445
bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
24462446

2447+
APInt InDemandedElts = DemandedElts.zext(InElts);
2448+
APInt InDemandedBits = DemandedBits.trunc(InBits);
2449+
2450+
// Since some of the sign extended bits are demanded, we know that the sign
2451+
// bit is demanded.
2452+
InDemandedBits.setBit(InBits - 1);
2453+
24472454
// If none of the top bits are demanded, convert this into an any_extend.
24482455
if (DemandedBits.getActiveBits() <= InBits) {
24492456
// If we only need the non-extended bits of the bottom element
@@ -2452,19 +2459,17 @@ bool TargetLowering::SimplifyDemandedBits(
24522459
VT.getSizeInBits() == SrcVT.getSizeInBits())
24532460
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
24542461

2455-
unsigned Opc =
2456-
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2457-
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2458-
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2462+
// Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2463+
if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2464+
TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2465+
InBits) {
2466+
unsigned Opc =
2467+
IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2468+
if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2469+
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2470+
}
24592471
}
24602472

2461-
APInt InDemandedBits = DemandedBits.trunc(InBits);
2462-
APInt InDemandedElts = DemandedElts.zext(InElts);
2463-
2464-
// Since some of the sign extended bits are demanded, we know that the sign
2465-
// bit is demanded.
2466-
InDemandedBits.setBit(InBits - 1);
2467-
24682473
if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
24692474
Depth + 1))
24702475
return true;

llvm/test/CodeGen/AArch64/arm64-zip.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ define <4 x i32> @shuffle_zip3(<4 x i32> %arg) {
328328
; CHECK-NEXT: zip2.4h v0, v0, v1
329329
; CHECK-NEXT: movi.4s v1, #1
330330
; CHECK-NEXT: zip1.4h v0, v0, v0
331-
; CHECK-NEXT: ushll.4s v0, v0, #0
331+
; CHECK-NEXT: sshll.4s v0, v0, #0
332332
; CHECK-NEXT: and.16b v0, v0, v1
333333
; CHECK-NEXT: ret
334334
bb:

llvm/test/CodeGen/AArch64/vselect-ext.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -219,17 +219,17 @@ define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8>
219219
; CHECK-NEXT: fmov s0, w0
220220
; CHECK-NEXT: Lloh0:
221221
; CHECK-NEXT: adrp x8, lCPI9_0@PAGE
222+
; CHECK-NEXT: movi.2d v3, #0x0000ff000000ff
222223
; CHECK-NEXT: Lloh1:
223224
; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF]
224225
; CHECK-NEXT: mov.h v0[1], w1
225226
; CHECK-NEXT: mov.h v0[2], w2
226-
; CHECK-NEXT: fmov d1, d0
227-
; CHECK-NEXT: bic.4h v1, #255, lsl #8
228-
; CHECK-NEXT: cmhi.4h v1, v1, v2
229-
; CHECK-NEXT: and.8b v0, v0, v1
230-
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
231-
; CHECK-NEXT: ushll.4s v0, v0, #0
232-
; CHECK-NEXT: and.16b v0, v0, v1
227+
; CHECK-NEXT: ushll.4s v1, v0, #0
228+
; CHECK-NEXT: bic.4h v0, #255, lsl #8
229+
; CHECK-NEXT: cmhi.4h v0, v0, v2
230+
; CHECK-NEXT: and.16b v1, v1, v3
231+
; CHECK-NEXT: sshll.4s v0, v0, #0
232+
; CHECK-NEXT: and.16b v0, v1, v0
233233
; CHECK-NEXT: ret
234234
; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
235235
%ext = zext <3 x i8> %a to <3 x i32>

llvm/test/CodeGen/SystemZ/vec-perm-14.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
2-
;
3-
; Test that only one vperm of the vector compare is needed for both extracts.
42

3+
; Test that no vperm of the vector compare is needed for the extracts.
54
define void @fun() {
65
; CHECK-LABEL: fun:
76
; CHECK: # %bb.0: # %bb
87
; CHECK-NEXT: vlrepf %v0, 0(%r1)
98
; CHECK-NEXT: vgbm %v1, 0
10-
; CHECK-NEXT: larl %r1, .LCPI0_0
119
; CHECK-NEXT: vceqb %v0, %v0, %v1
12-
; CHECK-NEXT: vl %v1, 0(%r1), 3
13-
; CHECK-NEXT: vperm %v0, %v0, %v0, %v1
10+
; CHECK-NEXT: vuphb %v0, %v0
11+
; CHECK-NEXT: vuphh %v0, %v0
1412
; CHECK-NEXT: vlgvf %r0, %v0, 0
1513
; CHECK-NEXT: tmll %r0, 1
1614
; CHECK-NEXT: je .LBB0_2

llvm/test/CodeGen/X86/test-shrink-bug.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
6868
; CHECK-X64-NEXT: je .LBB1_3
6969
; CHECK-X64-NEXT: # %bb.1:
7070
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
71-
; CHECK-X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
72-
; CHECK-X64-NEXT: pextrw $4, %xmm0, %eax
71+
; CHECK-X64-NEXT: pslld $8, %xmm0
72+
; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
7373
; CHECK-X64-NEXT: testb $1, %al
7474
; CHECK-X64-NEXT: jne .LBB1_3
7575
; CHECK-X64-NEXT: # %bb.2: # %no

llvm/test/CodeGen/X86/vec_setcc.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,9 @@ define <3 x i1> @test_setcc_v3i1_v3i16(ptr %a) nounwind {
308308
; SSE2-LABEL: test_setcc_v3i1_v3i16:
309309
; SSE2: # %bb.0:
310310
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
311+
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
311312
; SSE2-NEXT: pxor %xmm1, %xmm1
312313
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
313-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
314314
; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
315315
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
316316
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx

0 commit comments

Comments
 (0)