Skip to content

Commit 171aeb2

Browse files
authored
[DAG] SelectionDAG.computeKnownBits - add NSW/NUW flags support to ISD::SHL handling (#89877)
fix #89414
1 parent d00ed83 commit 171aeb2

File tree

4 files changed

+142
-53
lines changed

4 files changed

+142
-53
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3527,16 +3527,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
35273527
Known.Zero.setBitsFrom(1);
35283528
break;
35293529
}
3530-
case ISD::SHL:
3530+
case ISD::SHL: {
35313531
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
35323532
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
3533-
Known = KnownBits::shl(Known, Known2);
3533+
3534+
bool NUW = Op->getFlags().hasNoUnsignedWrap();
3535+
bool NSW = Op->getFlags().hasNoSignedWrap();
3536+
3537+
bool ShAmtNonZero = Known2.isNonZero();
3538+
3539+
Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero);
35343540

35353541
// Minimum shift low bits are known zero.
35363542
if (const APInt *ShMinAmt =
35373543
getValidMinimumShiftAmountConstant(Op, DemandedElts))
35383544
Known.Zero.setLowBits(ShMinAmt->getZExtValue());
35393545
break;
3546+
}
35403547
case ISD::SRL:
35413548
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
35423549
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 16 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -840,44 +840,18 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
840840
define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
841841
; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
842842
; CHECK-SSE: # %bb.0:
843-
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
844-
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2,2]
845-
; CHECK-SSE-NEXT: movdqa %xmm3, %xmm1
846-
; CHECK-SSE-NEXT: psllq %xmm2, %xmm1
847-
; CHECK-SSE-NEXT: psllq %xmm0, %xmm3
848-
; CHECK-SSE-NEXT: movq %xmm3, %rax
849-
; CHECK-SSE-NEXT: testq %rax, %rax
850-
; CHECK-SSE-NEXT: js .LBB12_1
851-
; CHECK-SSE-NEXT: # %bb.2:
852-
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
853-
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
854-
; CHECK-SSE-NEXT: jmp .LBB12_3
855-
; CHECK-SSE-NEXT: .LBB12_1:
856-
; CHECK-SSE-NEXT: movq %rax, %rcx
857-
; CHECK-SSE-NEXT: shrq %rcx
858-
; CHECK-SSE-NEXT: andl $1, %eax
859-
; CHECK-SSE-NEXT: orq %rcx, %rax
843+
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
844+
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [2,2]
845+
; CHECK-SSE-NEXT: movdqa %xmm2, %xmm3
846+
; CHECK-SSE-NEXT: psllq %xmm1, %xmm3
847+
; CHECK-SSE-NEXT: psllq %xmm0, %xmm2
848+
; CHECK-SSE-NEXT: movq %xmm2, %rax
860849
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
861850
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
862-
; CHECK-SSE-NEXT: addss %xmm0, %xmm0
863-
; CHECK-SSE-NEXT: .LBB12_3:
864-
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
851+
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
865852
; CHECK-SSE-NEXT: movq %xmm1, %rax
866-
; CHECK-SSE-NEXT: testq %rax, %rax
867-
; CHECK-SSE-NEXT: js .LBB12_4
868-
; CHECK-SSE-NEXT: # %bb.5:
869-
; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
870-
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
871-
; CHECK-SSE-NEXT: jmp .LBB12_6
872-
; CHECK-SSE-NEXT: .LBB12_4:
873-
; CHECK-SSE-NEXT: movq %rax, %rcx
874-
; CHECK-SSE-NEXT: shrq %rcx
875-
; CHECK-SSE-NEXT: andl $1, %eax
876-
; CHECK-SSE-NEXT: orq %rcx, %rax
877853
; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
878854
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
879-
; CHECK-SSE-NEXT: addss %xmm1, %xmm1
880-
; CHECK-SSE-NEXT: .LBB12_6:
881855
; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
882856
; CHECK-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
883857
; CHECK-SSE-NEXT: retq
@@ -886,18 +860,11 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
886860
; CHECK-AVX2: # %bb.0:
887861
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
888862
; CHECK-AVX2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
889-
; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm1
890-
; CHECK-AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
891-
; CHECK-AVX2-NEXT: vpextrq $1, %xmm1, %rax
892-
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
893-
; CHECK-AVX2-NEXT: vmovq %xmm1, %rax
894-
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
895-
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
896-
; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
897-
; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
898-
; CHECK-AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
899-
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
900-
; CHECK-AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
863+
; CHECK-AVX2-NEXT: vpextrq $1, %xmm0, %rax
864+
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
865+
; CHECK-AVX2-NEXT: vmovq %xmm0, %rax
866+
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
867+
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
901868
; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
902869
; CHECK-AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0
903870
; CHECK-AVX2-NEXT: retq
@@ -907,9 +874,9 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
907874
; CHECK-NO-FASTFMA-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
908875
; CHECK-NO-FASTFMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
909876
; CHECK-NO-FASTFMA-NEXT: vpextrq $1, %xmm0, %rax
910-
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
877+
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
911878
; CHECK-NO-FASTFMA-NEXT: vmovq %xmm0, %rax
912-
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
879+
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
913880
; CHECK-NO-FASTFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
914881
; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
915882
; CHECK-NO-FASTFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@@ -919,7 +886,7 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
919886
; CHECK-FMA: # %bb.0:
920887
; CHECK-FMA-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
921888
; CHECK-FMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
922-
; CHECK-FMA-NEXT: vcvtuqq2ps %xmm0, %xmm0
889+
; CHECK-FMA-NEXT: vcvtqq2ps %xmm0, %xmm0
923890
; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
924891
; CHECK-FMA-NEXT: retq
925892
%shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
@@ -986,7 +953,7 @@ define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float
986953
; CHECK-FMA: # %bb.0:
987954
; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2]
988955
; CHECK-FMA-NEXT: vpsllvd %xmm0, %xmm2, %xmm0
989-
; CHECK-FMA-NEXT: vcvtudq2ps %xmm0, %xmm0
956+
; CHECK-FMA-NEXT: vcvtdq2ps %xmm0, %xmm0
990957
; CHECK-FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
991958
; CHECK-FMA-NEXT: retq
992959
%shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt

llvm/test/CodeGen/X86/known-never-zero.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1612,7 +1612,7 @@ define i32 @sext_known_nonzero(i16 %xx) {
16121612
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
16131613
; X86-NEXT: movl $256, %eax # imm = 0x100
16141614
; X86-NEXT: shll %cl, %eax
1615-
; X86-NEXT: cwtl
1615+
; X86-NEXT: movzwl %ax, %eax
16161616
; X86-NEXT: rep bsfl %eax, %eax
16171617
; X86-NEXT: retl
16181618
;
@@ -1622,7 +1622,7 @@ define i32 @sext_known_nonzero(i16 %xx) {
16221622
; X64-NEXT: movl $256, %eax # imm = 0x100
16231623
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
16241624
; X64-NEXT: shll %cl, %eax
1625-
; X64-NEXT: cwtl
1625+
; X64-NEXT: movzwl %ax, %eax
16261626
; X64-NEXT: rep bsfl %eax, %eax
16271627
; X64-NEXT: retq
16281628
%x = shl nuw nsw i16 256, %xx

llvm/test/CodeGen/X86/pr89877.ll

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
4+
5+
define i32 @sext_known_nonzero(i16 %xx) {
6+
; X86-LABEL: sext_known_nonzero:
7+
; X86: # %bb.0:
8+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
9+
; X86-NEXT: movl $256, %eax # imm = 0x100
10+
; X86-NEXT: shll %cl, %eax
11+
; X86-NEXT: cwtl
12+
; X86-NEXT: testl %eax, %eax
13+
; X86-NEXT: je .LBB0_1
14+
; X86-NEXT: # %bb.2: # %cond.false
15+
; X86-NEXT: rep bsfl %eax, %eax
16+
; X86-NEXT: retl
17+
; X86-NEXT: .LBB0_1:
18+
; X86-NEXT: movl $32, %eax
19+
; X86-NEXT: retl
20+
;
21+
; X64-LABEL: sext_known_nonzero:
22+
; X64: # %bb.0:
23+
; X64-NEXT: movl %edi, %ecx
24+
; X64-NEXT: movl $256, %eax # imm = 0x100
25+
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
26+
; X64-NEXT: shll %cl, %eax
27+
; X64-NEXT: cwtl
28+
; X64-NEXT: testl %eax, %eax
29+
; X64-NEXT: je .LBB0_1
30+
; X64-NEXT: # %bb.2: # %cond.false
31+
; X64-NEXT: rep bsfl %eax, %eax
32+
; X64-NEXT: retq
33+
; X64-NEXT: .LBB0_1:
34+
; X64-NEXT: movl $32, %eax
35+
; X64-NEXT: retq
36+
%x = shl i16 256, %xx
37+
%z = sext i16 %x to i32
38+
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
39+
ret i32 %r
40+
}
41+
42+
define i32 @sext_known_nonzero_nuw(i16 %xx) {
43+
; X86-LABEL: sext_known_nonzero_nuw:
44+
; X86: # %bb.0:
45+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
46+
; X86-NEXT: movl $256, %eax # imm = 0x100
47+
; X86-NEXT: shll %cl, %eax
48+
; X86-NEXT: cwtl
49+
; X86-NEXT: rep bsfl %eax, %eax
50+
; X86-NEXT: retl
51+
;
52+
; X64-LABEL: sext_known_nonzero_nuw:
53+
; X64: # %bb.0:
54+
; X64-NEXT: movl %edi, %ecx
55+
; X64-NEXT: movl $256, %eax # imm = 0x100
56+
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
57+
; X64-NEXT: shll %cl, %eax
58+
; X64-NEXT: cwtl
59+
; X64-NEXT: rep bsfl %eax, %eax
60+
; X64-NEXT: retq
61+
%x = shl nuw i16 256, %xx
62+
%z = sext i16 %x to i32
63+
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
64+
ret i32 %r
65+
}
66+
67+
define i32 @sext_known_nonzero_nsw(i16 %xx) {
68+
; X86-LABEL: sext_known_nonzero_nsw:
69+
; X86: # %bb.0:
70+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
71+
; X86-NEXT: movl $256, %eax # imm = 0x100
72+
; X86-NEXT: shll %cl, %eax
73+
; X86-NEXT: movzwl %ax, %eax
74+
; X86-NEXT: rep bsfl %eax, %eax
75+
; X86-NEXT: retl
76+
;
77+
; X64-LABEL: sext_known_nonzero_nsw:
78+
; X64: # %bb.0:
79+
; X64-NEXT: movl %edi, %ecx
80+
; X64-NEXT: movl $256, %eax # imm = 0x100
81+
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
82+
; X64-NEXT: shll %cl, %eax
83+
; X64-NEXT: movzwl %ax, %eax
84+
; X64-NEXT: rep bsfl %eax, %eax
85+
; X64-NEXT: retq
86+
%x = shl nsw i16 256, %xx
87+
%z = sext i16 %x to i32
88+
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
89+
ret i32 %r
90+
}
91+
92+
define i32 @sext_known_nonzero_nuw_nsw(i16 %xx) {
93+
; X86-LABEL: sext_known_nonzero_nuw_nsw:
94+
; X86: # %bb.0:
95+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
96+
; X86-NEXT: movl $256, %eax # imm = 0x100
97+
; X86-NEXT: shll %cl, %eax
98+
; X86-NEXT: movzwl %ax, %eax
99+
; X86-NEXT: rep bsfl %eax, %eax
100+
; X86-NEXT: retl
101+
;
102+
; X64-LABEL: sext_known_nonzero_nuw_nsw:
103+
; X64: # %bb.0:
104+
; X64-NEXT: movl %edi, %ecx
105+
; X64-NEXT: movl $256, %eax # imm = 0x100
106+
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
107+
; X64-NEXT: shll %cl, %eax
108+
; X64-NEXT: movzwl %ax, %eax
109+
; X64-NEXT: rep bsfl %eax, %eax
110+
; X64-NEXT: retq
111+
%x = shl nuw nsw i16 256, %xx
112+
%z = sext i16 %x to i32
113+
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
114+
ret i32 %r
115+
}

0 commit comments

Comments
 (0)