Skip to content

[DAG] SelectionDAG.computeKnownBits - add NSW/NUW flags support to ISD::SHL handling #89877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 2, 2024
11 changes: 9 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3492,16 +3492,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
}
case ISD::SHL:
case ISD::SHL: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::shl(Known, Known2);

bool NUW = Op->getFlags().hasNoUnsignedWrap();
bool NSW = Op->getFlags().hasNoSignedWrap();

bool ShAmtNonZero = Known2.isNonZero();

Known = KnownBits::shl(Known, Known2, NUW, NSW, ShAmtNonZero);

// Minimum shift low bits are known zero.
if (const APInt *ShMinAmt =
getValidMinimumShiftAmountConstant(Op, DemandedElts))
Known.Zero.setLowBits(ShMinAmt->getZExtValue());
break;
}
case ISD::SRL:
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Expand Down
65 changes: 16 additions & 49 deletions llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -840,44 +840,18 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
; CHECK-SSE: # %bb.0:
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2,2]
; CHECK-SSE-NEXT: movdqa %xmm3, %xmm1
; CHECK-SSE-NEXT: psllq %xmm2, %xmm1
; CHECK-SSE-NEXT: psllq %xmm0, %xmm3
; CHECK-SSE-NEXT: movq %xmm3, %rax
; CHECK-SSE-NEXT: testq %rax, %rax
; CHECK-SSE-NEXT: js .LBB12_1
; CHECK-SSE-NEXT: # %bb.2:
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
; CHECK-SSE-NEXT: jmp .LBB12_3
; CHECK-SSE-NEXT: .LBB12_1:
; CHECK-SSE-NEXT: movq %rax, %rcx
; CHECK-SSE-NEXT: shrq %rcx
; CHECK-SSE-NEXT: andl $1, %eax
; CHECK-SSE-NEXT: orq %rcx, %rax
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [2,2]
; CHECK-SSE-NEXT: movdqa %xmm2, %xmm3
; CHECK-SSE-NEXT: psllq %xmm1, %xmm3
; CHECK-SSE-NEXT: psllq %xmm0, %xmm2
; CHECK-SSE-NEXT: movq %xmm2, %rax
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0
; CHECK-SSE-NEXT: addss %xmm0, %xmm0
; CHECK-SSE-NEXT: .LBB12_3:
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
; CHECK-SSE-NEXT: movq %xmm1, %rax
; CHECK-SSE-NEXT: testq %rax, %rax
; CHECK-SSE-NEXT: js .LBB12_4
; CHECK-SSE-NEXT: # %bb.5:
; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-SSE-NEXT: jmp .LBB12_6
; CHECK-SSE-NEXT: .LBB12_4:
; CHECK-SSE-NEXT: movq %rax, %rcx
; CHECK-SSE-NEXT: shrq %rcx
; CHECK-SSE-NEXT: andl $1, %eax
; CHECK-SSE-NEXT: orq %rcx, %rax
; CHECK-SSE-NEXT: xorps %xmm1, %xmm1
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
; CHECK-SSE-NEXT: addss %xmm1, %xmm1
; CHECK-SSE-NEXT: .LBB12_6:
; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-SSE-NEXT: retq
Expand All @@ -886,18 +860,11 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
; CHECK-AVX2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpsrlq $1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
; CHECK-AVX2-NEXT: vpextrq $1, %xmm1, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; CHECK-AVX2-NEXT: vmovq %xmm1, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
; CHECK-AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-AVX2-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; CHECK-AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; CHECK-AVX2-NEXT: vmovq %xmm0, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
; CHECK-AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
Expand All @@ -907,9 +874,9 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-NO-FASTFMA-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
; CHECK-NO-FASTFMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-NO-FASTFMA-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; CHECK-NO-FASTFMA-NEXT: vmovq %xmm0, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; CHECK-NO-FASTFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
; CHECK-NO-FASTFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
Expand All @@ -919,7 +886,7 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
; CHECK-FMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-FMA-NEXT: vcvtuqq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtqq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-FMA-NEXT: retq
%shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
Expand Down Expand Up @@ -986,7 +953,7 @@ define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2,2,2,2]
; CHECK-FMA-NEXT: vpsllvd %xmm0, %xmm2, %xmm0
; CHECK-FMA-NEXT: vcvtudq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
; CHECK-FMA-NEXT: retq
%shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/known-never-zero.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1606,7 +1606,7 @@ define i32 @sext_known_nonzero(i16 %xx) {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: cwtl
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
Expand All @@ -1616,7 +1616,7 @@ define i32 @sext_known_nonzero(i16 %xx) {
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cwtl
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw nsw i16 256, %xx
Expand Down
115 changes: 115 additions & 0 deletions llvm/test/CodeGen/X86/pr89877.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64

define i32 @sext_known_nonzero(i16 %xx) {
; X86-LABEL: sext_known_nonzero:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: cwtl
; X86-NEXT: testl %eax, %eax
; X86-NEXT: je .LBB0_1
; X86-NEXT: # %bb.2: # %cond.false
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
; X86-NEXT: .LBB0_1:
; X86-NEXT: movl $32, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cwtl
; X64-NEXT: testl %eax, %eax
; X64-NEXT: je .LBB0_1
; X64-NEXT: # %bb.2: # %cond.false
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB0_1:
; X64-NEXT: movl $32, %eax
; X64-NEXT: retq
%x = shl i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}

define i32 @sext_known_nonzero_nuw(i16 %xx) {
; X86-LABEL: sext_known_nonzero_nuw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: cwtl
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero_nuw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: cwtl
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}

define i32 @sext_known_nonzero_nsw(i16 %xx) {
; X86-LABEL: sext_known_nonzero_nsw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero_nsw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nsw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}

define i32 @sext_known_nonzero_nuw_nsw(i16 %xx) {
; X86-LABEL: sext_known_nonzero_nuw_nsw:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $256, %eax # imm = 0x100
; X86-NEXT: shll %cl, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: rep bsfl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sext_known_nonzero_nuw_nsw:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: movl $256, %eax # imm = 0x100
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NEXT: shll %cl, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: rep bsfl %eax, %eax
; X64-NEXT: retq
%x = shl nuw nsw i16 256, %xx
%z = sext i16 %x to i32
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
ret i32 %r
}