Skip to content

Commit a139564

Browse files
committed
[InstCombine] fold funnel shift amount based on demanded bits
The shift amount of a funnel shift is modulo the scalar bitwidth: http://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic ...so we can use demanded bits analysis on that operand to simplify it when we have a power-of-2 bitwidth. This is another step towards canonicalizing {shift/shift/or} to the intrinsics in IR. Differential Revision: https://reviews.llvm.org/D54478 llvm-svn: 346814
1 parent e0c0071 commit a139564

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,6 +1990,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
19901990
return I;
19911991
break;
19921992

1993+
case Intrinsic::fshl:
1994+
case Intrinsic::fshr: {
1995+
// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
1996+
// so only the low bits of the shift amount are demanded if the bitwidth is
1997+
// a power-of-2.
1998+
unsigned BitWidth = II->getType()->getScalarSizeInBits();
1999+
if (!isPowerOf2_32(BitWidth))
2000+
break;
2001+
APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
2002+
KnownBits Op2Known(BitWidth);
2003+
if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2004+
return &CI;
2005+
break;
2006+
}
19932007
case Intrinsic::uadd_with_overflow:
19942008
case Intrinsic::sadd_with_overflow:
19952009
case Intrinsic::umul_with_overflow:

llvm/test/Transforms/InstCombine/fsh.ll

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ declare <2 x i31> @llvm.fshl.v2i31(<2 x i31>, <2 x i31>, <2 x i31>)
1010

1111
define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) {
1212
; CHECK-LABEL: @fshl_mask_simplify1(
13-
; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 32
14-
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
15-
; CHECK-NEXT: ret i32 [[R]]
13+
; CHECK-NEXT: ret i32 [[X:%.*]]
1614
;
1715
%maskedsh = and i32 %sh, 32
1816
%r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %maskedsh)
@@ -21,9 +19,7 @@ define i32 @fshl_mask_simplify1(i32 %x, i32 %y, i32 %sh) {
2119

2220
define <2 x i32> @fshr_mask_simplify2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
2321
; CHECK-LABEL: @fshr_mask_simplify2(
24-
; CHECK-NEXT: [[MASKEDSH:%.*]] = and <2 x i32> [[SH:%.*]], <i32 64, i32 64>
25-
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[MASKEDSH]])
26-
; CHECK-NEXT: ret <2 x i32> [[R]]
22+
; CHECK-NEXT: ret <2 x i32> [[Y:%.*]]
2723
;
2824
%maskedsh = and <2 x i32> %sh, <i32 64, i32 64>
2925
%r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %maskedsh)
@@ -43,7 +39,7 @@ define i32 @fshl_mask_simplify3(i32 %x, i32 %y, i32 %sh) {
4339
ret i32 %r
4440
}
4541

46-
; Check again with weird bitwidths; log2(33) means we demand the low 6 bits.
42+
; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
4743

4844
define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) {
4945
; CHECK-LABEL: @fshr_mask_simplify1(
@@ -56,7 +52,7 @@ define i33 @fshr_mask_simplify1(i33 %x, i33 %y, i33 %sh) {
5652
ret i33 %r
5753
}
5854

59-
; Check again with weird bitwidths; log2(31) means we demand the low 5 bits.
55+
; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
6056

6157
define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
6258
; CHECK-LABEL: @fshl_mask_simplify2(
@@ -69,7 +65,7 @@ define <2 x i31> @fshl_mask_simplify2(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh)
6965
ret <2 x i31> %r
7066
}
7167

72-
; Negative test.
68+
; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
7369

7470
define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) {
7571
; CHECK-LABEL: @fshr_mask_simplify3(
@@ -86,8 +82,7 @@ define i33 @fshr_mask_simplify3(i33 %x, i33 %y, i33 %sh) {
8682

8783
define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) {
8884
; CHECK-LABEL: @fshl_mask_not_required(
89-
; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 31
90-
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
85+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[SH:%.*]])
9186
; CHECK-NEXT: ret i32 [[R]]
9287
;
9388
%maskedsh = and i32 %sh, 31
@@ -99,7 +94,7 @@ define i32 @fshl_mask_not_required(i32 %x, i32 %y, i32 %sh) {
9994

10095
define i32 @fshl_mask_reduce_constant(i32 %x, i32 %y, i32 %sh) {
10196
; CHECK-LABEL: @fshl_mask_reduce_constant(
102-
; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 33
97+
; CHECK-NEXT: [[MASKEDSH:%.*]] = and i32 [[SH:%.*]], 1
10398
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[MASKEDSH]])
10499
; CHECK-NEXT: ret i32 [[R]]
105100
;
@@ -125,15 +120,16 @@ define i32 @fshl_mask_negative(i32 %x, i32 %y, i32 %sh) {
125120

126121
define <2 x i32> @fshr_set_but_not_demanded_vec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %sh) {
127122
; CHECK-LABEL: @fshr_set_but_not_demanded_vec(
128-
; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i32> [[SH:%.*]], <i32 32, i32 32>
129-
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[BOGUSBITS]])
123+
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> [[SH:%.*]])
130124
; CHECK-NEXT: ret <2 x i32> [[R]]
131125
;
132126
%bogusbits = or <2 x i32> %sh, <i32 32, i32 32>
133127
%r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %bogusbits)
134128
ret <2 x i32> %r
135129
}
136130

131+
; Check again with weird bitwidths - the analysis is invalid with non-power-of-2.
132+
137133
define <2 x i31> @fshl_set_but_not_demanded_vec(<2 x i31> %x, <2 x i31> %y, <2 x i31> %sh) {
138134
; CHECK-LABEL: @fshl_set_but_not_demanded_vec(
139135
; CHECK-NEXT: [[BOGUSBITS:%.*]] = or <2 x i31> [[SH:%.*]], <i31 32, i31 32>

0 commit comments

Comments
 (0)