Skip to content

Commit 99c4da0

Browse files
committed
[InstCombine] Fold rotate patterns with ZExt/Trunc
Rotation pattern now fold with Trunc or if changes in the width happen at different points for the left and right side. https://alive2.llvm.org/ce/z/RkALLB fixes llvm#138334
1 parent dec8f13 commit 99c4da0

File tree

4 files changed

+217
-18
lines changed

4 files changed

+217
-18
lines changed

llvm/include/llvm/IR/PatternMatch.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2160,6 +2160,13 @@ m_ZExtOrSelf(const OpTy &Op) {
21602160
return m_CombineOr(m_ZExt(Op), Op);
21612161
}
21622162

2163+
template <typename OpTy>
2164+
inline match_combine_or<CastInst_match<OpTy, TruncInst>,
2165+
match_combine_or<CastInst_match<OpTy, ZExtInst>, OpTy>>
2166+
m_TruncOrZExtOrSelf(const OpTy &Op) {
2167+
return m_CombineOr(m_Trunc(Op), m_ZExtOrSelf(Op));
2168+
}
2169+
21632170
template <typename OpTy>
21642171
inline match_combine_or<CastInst_match<OpTy, SExtInst>, OpTy>
21652172
m_SExtOrSelf(const OpTy &Op) {

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2969,31 +2969,53 @@ InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) {
29692969
if (!isPowerOf2_32(Width))
29702970
return nullptr;
29712971

2972-
// The shift amount may be masked with negation:
2973-
// (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2972+
// Check that L and R operate on the same value X. Since the bitwidth of X
2973+
// can differ from L and R, there are multiple possible locations of ZExt
2974+
// or Trunc.
29742975
Value *X;
2976+
const APInt *LMask = nullptr;
2977+
const APInt *RMask = nullptr;
29752978
unsigned Mask = Width - 1;
2976-
if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
2977-
match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
2978-
return X;
2979+
// L is essentially a no-op except for changing the type of X.
2980+
// There are multiple pattern like X & LMask or ZExt/Trunc
2981+
match(L, m_TruncOrZExtOrSelf(m_CombineOr(
2982+
m_And(m_TruncOrZExtOrSelf(m_Value(X)), m_APInt(LMask)),
2983+
m_Value(X))));
2984+
2985+
// R should be -X, sometimes (-X) & RMask is used, which is equivalent if
2986+
// RMask >= BitWidth - 1
2987+
const Value *ValueToNegate = nullptr;
2988+
if (!match(R, m_TruncOrZExtOrSelf(m_CombineOr(
2989+
m_And(m_Neg(m_Value(ValueToNegate)), m_APInt(RMask)),
2990+
m_Neg(m_Value(ValueToNegate))))) ||
2991+
(RMask && RMask->ult(Mask)))
2992+
return nullptr;
29792993

2980-
// (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1)))
2981-
if (match(R, m_And(m_Neg(m_Specific(L)), m_SpecificInt(Mask))))
2982-
return L;
2994+
// ValueToNegate can be L if the rotate uses a bitwise-and on the shift
2995+
// amount before the rotate pattern.
2996+
if (!match(ValueToNegate, m_TruncOrZExtOrSelf(
2997+
m_CombineOr(m_Specific(X), m_Specific(L)))))
2998+
return nullptr;
29832999

2984-
// Similar to above, but the shift amount may be extended after masking,
2985-
// so return the extended value as the parameter for the intrinsic.
2986-
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2987-
match(R,
2988-
m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
2989-
m_SpecificInt(Mask))))
3000+
// L is a no-op, and L is guaranteed to be the same type as the rotate.
3001+
// We reuse the existing Zext/Trunc.
3002+
if (!LMask)
29903003
return L;
29913004

2992-
if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
2993-
match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
2994-
return L;
3005+
// We can still fold with an LMask < Mask if R soley depends on L (not on
3006+
// X directly)
3007+
if (LMask->ult(Mask))
3008+
return (match(ValueToNegate, m_TruncOrZExtOrSelf(m_Specific(L))))
3009+
? L
3010+
: nullptr;
29953011

2996-
return nullptr;
3012+
// X has the same width as L and LMask >= BitWidth - 1, so L is a no-op.
3013+
Value *matchedX;
3014+
if (match(L, m_And(m_Value(matchedX), m_Value())))
3015+
return matchedX;
3016+
3017+
// L is Zext(And(...)), we can't reuse the Zext/Trunc.
3018+
return L;
29973019
};
29983020

29993021
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);

llvm/test/Transforms/InstCombine/rotate.ll

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,104 @@ define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) {
698698
ret i64 %10
699699
}
700700

701+
define i64 @rotateright_64_zext_double_conversion(i64 %x, i32 %y) {
702+
; CHECK-LABEL: @rotateright_64_zext_double_conversion(
703+
; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[Y:%.*]] to i64
704+
; CHECK-NEXT: [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
705+
; CHECK-NEXT: ret i64 [[OR]]
706+
;
707+
%z = zext i32 %y to i64
708+
%neg = sub nsw i32 0, %y
709+
%and2 = and i32 %neg, 63
710+
%conv = zext i32 %and2 to i64
711+
%shl = shl i64 %x, %conv
712+
%shr = lshr i64 %x, %z
713+
%or = or i64 %shr, %shl
714+
ret i64 %or
715+
}
716+
717+
define i32 @rotateright_32_trunc_early(i32 %x, i64 %y) {
718+
; CHECK-LABEL: @rotateright_32_trunc_early(
719+
; CHECK-NEXT: [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
720+
; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
721+
; CHECK-NEXT: ret i32 [[OR]]
722+
;
723+
%z = trunc i64 %y to i32
724+
%neg = sub nsw i32 0, %z
725+
%and2 = and i32 %neg, 31
726+
%shl = shl i32 %x, %and2
727+
%shr = lshr i32 %x, %z
728+
%or = or i32 %shr, %shl
729+
ret i32 %or
730+
}
731+
732+
define i32 @rotateright_32_trunc_neg_mask_amount(i32 %x, i64 %y) {
733+
; CHECK-LABEL: @rotateright_32_trunc_neg_mask_amount(
734+
; CHECK-NEXT: [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
735+
; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
736+
; CHECK-NEXT: ret i32 [[OR]]
737+
;
738+
%z = trunc i64 %y to i32
739+
%neg = sub i64 0, %y
740+
%and2 = and i64 %neg, 31
741+
%conv = trunc i64 %and2 to i32
742+
%shl = shl i32 %x, %conv
743+
%shr = lshr i32 %x, %z
744+
%or = or i32 %shr, %shl
745+
ret i32 %or
746+
}
747+
748+
; restrict the shift amount before rotating
749+
750+
define i32 @rotateleft_32_restricted_shamt(i32 %x, i32 %shAmt) {
751+
; CHECK-LABEL: @rotateleft_32_restricted_shamt(
752+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 30
753+
; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 [[AND]])
754+
; CHECK-NEXT: ret i32 [[OR]]
755+
;
756+
%and = and i32 %x, 30
757+
%shl = shl i32 %x, %and
758+
%sub = sub i32 0, %and
759+
%shr = lshr i32 %x, %sub
760+
%or = or i32 %shl, %shr
761+
ret i32 %or
762+
}
763+
764+
; unncessarily large and masks
765+
766+
define i32 @rotateleft_32_non_restricted_shamt(i32 %x, i32 %t) {
767+
; CHECK-LABEL: @rotateleft_32_non_restricted_shamt(
768+
; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[T:%.*]])
769+
; CHECK-NEXT: ret i32 [[OR]]
770+
;
771+
%and = and i32 %t, 31
772+
%shl = shl i32 %x, %and
773+
%sub = sub nsw i32 0, %and
774+
%and2 = and i32 %sub, 31
775+
%shr = lshr i32 %x, %and2
776+
%or = or i32 %shl, %shr
777+
ret i32 %or
778+
}
779+
780+
; negative test - right and mask is too small (should be >=31)
781+
782+
define i32 @rotateleft_32_incorrect_right_mask(i32 %x, i32 %t) {
783+
; CHECK-LABEL: @rotateleft_32_incorrect_right_mask(
784+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[T:%.*]]
785+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[T]]
786+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], 30
787+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[AND]]
788+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
789+
; CHECK-NEXT: ret i32 [[OR]]
790+
;
791+
%shl = shl i32 %x, %t
792+
%sub = sub nsw i32 0, %t
793+
%and = and i32 %sub, 30
794+
%shr = lshr i32 %x, %and
795+
%or = or i32 %shl, %shr
796+
ret i32 %or
797+
}
798+
701799
; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
702800

703801
define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
@@ -1086,3 +1184,42 @@ define i32 @not_rotl_i32_add_less(i32 %x, i32 %y) {
10861184
%r = add i32 %shr, %shl
10871185
ret i32 %r
10881186
}
1187+
1188+
; multi-use tests
1189+
define i32 @rotateleft_32_use_zext(i32 %x, i16 %shAmt) {
1190+
; CHECK-LABEL: @rotateleft_32_use_zext(
1191+
; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[SHAMT:%.*]] to i32
1192+
; CHECK-NEXT: call void @use(i32 [[CONV]])
1193+
; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[CONV]])
1194+
; CHECK-NEXT: ret i32 [[OR]]
1195+
;
1196+
%conv = zext i16 %shAmt to i32
1197+
call void @use(i32 %conv)
1198+
%shl = shl i32 %x, %conv
1199+
%sub = sub i32 0, %conv
1200+
%shr = lshr i32 %x, %sub
1201+
%or = or i32 %shl, %shr
1202+
ret i32 %or
1203+
}
1204+
1205+
define i64 @rotateleft_64_use_and(i64 %x, i32 %y) {
1206+
; CHECK-LABEL: @rotateleft_64_use_and(
1207+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], 63
1208+
; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[AND]] to i64
1209+
; CHECK-NEXT: call void @use(i64 [[Z]])
1210+
; CHECK-NEXT: [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
1211+
; CHECK-NEXT: ret i64 [[OR]]
1212+
;
1213+
%and = and i32 %y, 63
1214+
%z = zext i32 %and to i64
1215+
call void @use(i64 %z)
1216+
%neg = sub nsw i32 0, %y
1217+
%and2 = and i32 %neg, 63
1218+
%conv = zext i32 %and2 to i64
1219+
%shl = shl i64 %x, %conv
1220+
%shr = lshr i64 %x, %z
1221+
%or = or i64 %shr, %shl
1222+
ret i64 %or
1223+
}
1224+
1225+
declare void @use(i32)

llvm/unittests/IR/PatternMatch.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,39 @@ TEST_F(PatternMatchTest, ZExtSExtSelf) {
635635
EXPECT_TRUE(m_ZExtOrSExtOrSelf(m_One()).match(One64S));
636636
}
637637

638+
TEST_F(PatternMatchTest, TruncZextSelf) {
639+
LLVMContext &Ctx = IRB.getContext();
640+
641+
Value *One32 = IRB.getInt32(1);
642+
Value *One64 = IRB.getInt64(1);
643+
Value *One32T = IRB.CreateTrunc(One64, IRB.getInt32Ty());
644+
Value *One64Z = IRB.CreateZExt(One32, IntegerType::getInt64Ty(Ctx));
645+
Value *One64S = IRB.CreateSExt(One32, IntegerType::getInt64Ty(Ctx));
646+
647+
EXPECT_TRUE(m_One().match(One32));
648+
EXPECT_TRUE(m_One().match(One64));
649+
EXPECT_FALSE(m_One().match(One32T));
650+
EXPECT_FALSE(m_One().match(One64Z));
651+
EXPECT_FALSE(m_One().match(One64S));
652+
653+
EXPECT_FALSE(m_Trunc(m_One()).match(One32));
654+
EXPECT_TRUE(m_Trunc(m_One()).match(One32T));
655+
EXPECT_FALSE(m_Trunc(m_One()).match(One64Z));
656+
EXPECT_FALSE(m_Trunc(m_One()).match(One64S));
657+
658+
EXPECT_FALSE(m_ZExt(m_One()).match(One32));
659+
EXPECT_FALSE(m_ZExt(m_One()).match(One64));
660+
EXPECT_FALSE(m_ZExt(m_One()).match(One32T));
661+
EXPECT_TRUE(m_ZExt(m_One()).match(One64Z));
662+
EXPECT_FALSE(m_ZExt(m_One()).match(One64S));
663+
664+
EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32));
665+
EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64));
666+
EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32T));
667+
EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64Z));
668+
EXPECT_FALSE(m_TruncOrZExtOrSelf(m_One()).match(One64S));
669+
}
670+
638671
TEST_F(PatternMatchTest, BitCast) {
639672
Value *OneDouble = ConstantFP::get(IRB.getDoubleTy(), APFloat(1.0));
640673
Value *ScalableDouble = ConstantFP::get(

0 commit comments

Comments
 (0)