Skip to content

Commit 25a97c3

Browse files
committed
[InstCombine] visitCallInst - retain undefs in vector funnel shift amounts
By always performing a modulo on the shift amount constants this was causing undef amounts being replaced with zero, meaning we were losing funnel shift by splat (with undef) patterns. Tweaked the shift amount bounds check to support (passthrough) undefs, and use Constant::mergeUndefsWith to preserve the undefs after folding.
1 parent 6756d43 commit 25a97c3

File tree

3 files changed

+13
-13
lines changed

3 files changed

+13
-13
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -885,15 +885,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
885885
Constant *ShAmtC;
886886
if (match(II->getArgOperand(2), m_Constant(ShAmtC)) &&
887887
!isa<ConstantExpr>(ShAmtC) && !ShAmtC->containsConstantExpression()) {
888-
// Canonicalize a shift amount constant operand to modulo the bit-width.
889888
Constant *WidthC = ConstantInt::get(Ty, BitWidth);
890-
Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
891-
if (ModuloC != ShAmtC)
892-
return replaceOperand(*II, 2, ModuloC);
893889

894-
assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
895-
ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
896-
"Shift amount expected to be modulo bitwidth");
890+
// Canonicalize a shift amount constant operand to modulo the bit-width.
891+
if (!match(ShAmtC, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT,
892+
APInt(BitWidth, BitWidth)))) {
893+
Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
894+
ModuloC = Constant::mergeUndefsWith(ModuloC, ShAmtC);
895+
return replaceOperand(*II, 2, ModuloC);
896+
}
897897

898898
// Canonicalize funnel shift right by constant to funnel shift left. This
899899
// is not entirely arbitrary. For historical reasons, the backend may

llvm/test/Transforms/InstCombine/funnel.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) {
116116

117117
define <2 x i32> @fshr_v2i32_constant_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y) {
118118
; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef0(
119-
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 0, i32 13>)
119+
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 undef, i32 13>)
120120
; CHECK-NEXT: ret <2 x i32> [[R]]
121121
;
122122
%shr = lshr <2 x i32> %x, <i32 undef, i32 19>
@@ -127,7 +127,7 @@ define <2 x i32> @fshr_v2i32_constant_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y
127127

128128
define <2 x i32> @fshr_v2i32_constant_nonsplat_undef1(<2 x i32> %x, <2 x i32> %y) {
129129
; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef1(
130-
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 15, i32 0>)
130+
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 15, i32 undef>)
131131
; CHECK-NEXT: ret <2 x i32> [[R]]
132132
;
133133
%shr = lshr <2 x i32> %x, <i32 17, i32 19>
@@ -149,7 +149,7 @@ define <2 x i36> @fshl_v2i36_constant_nonsplat(<2 x i36> %x, <2 x i36> %y) {
149149

150150
define <3 x i36> @fshl_v3i36_constant_nonsplat_undef0(<3 x i36> %x, <3 x i36> %y) {
151151
; CHECK-LABEL: @fshl_v3i36_constant_nonsplat_undef0(
152-
; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[Y:%.*]], <3 x i36> <i36 21, i36 11, i36 0>)
152+
; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[Y:%.*]], <3 x i36> <i36 21, i36 11, i36 undef>)
153153
; CHECK-NEXT: ret <3 x i36> [[R]]
154154
;
155155
%shl = shl <3 x i36> %x, <i36 21, i36 11, i36 undef>

llvm/test/Transforms/InstCombine/rotate.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) {
138138

139139
define <2 x i32> @rotr_v2i32_constant_nonsplat_undef0(<2 x i32> %x) {
140140
; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef0(
141-
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 0, i32 19>)
141+
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 undef, i32 19>)
142142
; CHECK-NEXT: ret <2 x i32> [[R]]
143143
;
144144
%shl = shl <2 x i32> %x, <i32 undef, i32 19>
@@ -149,7 +149,7 @@ define <2 x i32> @rotr_v2i32_constant_nonsplat_undef0(<2 x i32> %x) {
149149

150150
define <2 x i32> @rotr_v2i32_constant_nonsplat_undef1(<2 x i32> %x) {
151151
; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef1(
152-
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 0>)
152+
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 undef>)
153153
; CHECK-NEXT: ret <2 x i32> [[R]]
154154
;
155155
%shl = shl <2 x i32> %x, <i32 17, i32 19>
@@ -171,7 +171,7 @@ define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) {
171171

172172
define <3 x i36> @rotl_v3i36_constant_nonsplat_undef0(<3 x i36> %x) {
173173
; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_undef0(
174-
; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 0>)
174+
; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 undef>)
175175
; CHECK-NEXT: ret <3 x i36> [[R]]
176176
;
177177
%shl = shl <3 x i36> %x, <i36 21, i36 11, i36 undef>

0 commit comments

Comments
 (0)