Skip to content

Commit 59ced72

Browse files
authored
[ValueTracking] Add rotate idiom to haveNoCommonBitsSet special cases (#122165)
An occasional idiom for rotation is "(A << B) + (A >> (BitWidth - B))". Currently this is not well handled on targets with native funnel-shift/rotate support. Add a special case to haveNoCommonBitsSet to ensure that the addition is converted to a disjoint or in InstCombine so during instruction selection the idiom can be converted to an efficient rotation implementation. Proof: https://alive2.llvm.org/ce/z/WdCZsN
1 parent 70e96dc commit 59ced72

File tree

2 files changed

+124
-6
lines changed

2 files changed

+124
-6
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,19 @@ static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
229229
return true;
230230
}
231231

232+
// Look for: (X << V) op (Y >> (BitWidth - V))
233+
// or (X >> V) op (Y << (BitWidth - V))
234+
{
235+
const Value *V;
236+
const APInt *R;
237+
if (((match(RHS, m_Shl(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
238+
match(LHS, m_LShr(m_Value(), m_Specific(V)))) ||
239+
(match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
240+
match(LHS, m_Shl(m_Value(), m_Specific(V))))) &&
241+
R->uge(LHS->getType()->getScalarSizeInBits()))
242+
return true;
243+
}
244+
232245
return false;
233246
}
234247

llvm/test/Transforms/InstCombine/rotate.ll

Lines changed: 111 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ define i32 @rotl_i32(i32 %x, i32 %y) {
191191
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
192192
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
193193
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
194-
; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
194+
; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
195195
; CHECK-NEXT: ret i32 [[R]]
196196
;
197197
%sub = sub i32 32, %y
@@ -208,7 +208,7 @@ define i37 @rotr_i37(i37 %x, i37 %y) {
208208
; CHECK-NEXT: [[SUB:%.*]] = sub i37 37, [[Y:%.*]]
209209
; CHECK-NEXT: [[SHL:%.*]] = shl i37 [[X:%.*]], [[SUB]]
210210
; CHECK-NEXT: [[SHR:%.*]] = lshr i37 [[X]], [[Y]]
211-
; CHECK-NEXT: [[R:%.*]] = or i37 [[SHR]], [[SHL]]
211+
; CHECK-NEXT: [[R:%.*]] = or disjoint i37 [[SHR]], [[SHL]]
212212
; CHECK-NEXT: ret i37 [[R]]
213213
;
214214
%sub = sub i37 37, %y
@@ -225,7 +225,7 @@ define i8 @rotr_i8_commute(i8 %x, i8 %y) {
225225
; CHECK-NEXT: [[SUB:%.*]] = sub i8 8, [[Y:%.*]]
226226
; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[X:%.*]], [[SUB]]
227227
; CHECK-NEXT: [[SHR:%.*]] = lshr i8 [[X]], [[Y]]
228-
; CHECK-NEXT: [[R:%.*]] = or i8 [[SHL]], [[SHR]]
228+
; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SHL]], [[SHR]]
229229
; CHECK-NEXT: ret i8 [[R]]
230230
;
231231
%sub = sub i8 8, %y
@@ -242,7 +242,7 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
242242
; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]]
243243
; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
244244
; CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
245-
; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
245+
; CHECK-NEXT: [[R:%.*]] = or disjoint <4 x i32> [[SHL]], [[SHR]]
246246
; CHECK-NEXT: ret <4 x i32> [[R]]
247247
;
248248
%sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
@@ -259,7 +259,7 @@ define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
259259
; CHECK-NEXT: [[SUB:%.*]] = sub <3 x i42> splat (i42 42), [[Y:%.*]]
260260
; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i42> [[X:%.*]], [[SUB]]
261261
; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[X]], [[Y]]
262-
; CHECK-NEXT: [[R:%.*]] = or <3 x i42> [[SHR]], [[SHL]]
262+
; CHECK-NEXT: [[R:%.*]] = or disjoint <3 x i42> [[SHR]], [[SHL]]
263263
; CHECK-NEXT: ret <3 x i42> [[R]]
264264
;
265265
%sub = sub <3 x i42> <i42 42, i42 42, i42 42>, %y
@@ -838,7 +838,7 @@ define i24 @rotl_select_weird_type(i24 %x, i24 %shamt) {
838838
; CHECK-NEXT: [[SUB:%.*]] = sub i24 24, [[SHAMT]]
839839
; CHECK-NEXT: [[SHR:%.*]] = lshr i24 [[X:%.*]], [[SUB]]
840840
; CHECK-NEXT: [[SHL:%.*]] = shl i24 [[X]], [[SHAMT]]
841-
; CHECK-NEXT: [[OR:%.*]] = or i24 [[SHL]], [[SHR]]
841+
; CHECK-NEXT: [[OR:%.*]] = or disjoint i24 [[SHL]], [[SHR]]
842842
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i24 [[X]], i24 [[OR]]
843843
; CHECK-NEXT: ret i24 [[R]]
844844
;
@@ -981,3 +981,108 @@ define i16 @check_rotate_masked_16bit(i8 %shamt, i32 %cond) {
981981
%trunc = trunc i32 %or to i16
982982
ret i16 %trunc
983983
}
984+
985+
define i32 @rotl_i32_add(i32 %x, i32 %y) {
986+
; CHECK-LABEL: @rotl_i32_add(
987+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
988+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
989+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
990+
; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
991+
; CHECK-NEXT: ret i32 [[R]]
992+
;
993+
%sub = sub i32 32, %y
994+
%shl = shl i32 %x, %y
995+
%shr = lshr i32 %x, %sub
996+
%r = add i32 %shr, %shl
997+
ret i32 %r
998+
}
999+
1000+
define i32 @rotr_i32_add(i32 %x, i32 %y) {
1001+
; CHECK-LABEL: @rotr_i32_add(
1002+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Y:%.*]]
1003+
; CHECK-NEXT: [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]]
1004+
; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[X]], [[SUB]]
1005+
; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1006+
; CHECK-NEXT: ret i32 [[R]]
1007+
;
1008+
%sub = sub i32 32, %y
1009+
%shl = lshr i32 %x, %y
1010+
%shr = shl i32 %x, %sub
1011+
%r = add i32 %shr, %shl
1012+
ret i32 %r
1013+
}
1014+
1015+
define i32 @fshr_i32_add(i32 %x, i32 %y, i32 %z) {
1016+
; CHECK-LABEL: @fshr_i32_add(
1017+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Z:%.*]]
1018+
; CHECK-NEXT: [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Z]]
1019+
; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[Y:%.*]], [[SUB]]
1020+
; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1021+
; CHECK-NEXT: ret i32 [[R]]
1022+
;
1023+
%sub = sub i32 32, %z
1024+
%shl = lshr i32 %x, %z
1025+
%shr = shl i32 %y, %sub
1026+
%r = add i32 %shr, %shl
1027+
ret i32 %r
1028+
}
1029+
1030+
define i32 @fshl_i32_add(i32 %x, i32 %y, i32 %z) {
1031+
; CHECK-LABEL: @fshl_i32_add(
1032+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[Z:%.*]]
1033+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[Y:%.*]], [[Z]]
1034+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], [[SUB]]
1035+
; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1036+
; CHECK-NEXT: ret i32 [[R]]
1037+
;
1038+
%sub = sub i32 32, %z
1039+
%shl = shl i32 %y, %z
1040+
%shr = lshr i32 %x, %sub
1041+
%r = add i32 %shr, %shl
1042+
ret i32 %r
1043+
}
1044+
1045+
define i32 @rotl_i32_add_greater(i32 %x, i32 %y) {
1046+
; CHECK-LABEL: @rotl_i32_add_greater(
1047+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 33, [[Y:%.*]]
1048+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
1049+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
1050+
; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1051+
; CHECK-NEXT: ret i32 [[R]]
1052+
;
1053+
%sub = sub i32 33, %y
1054+
%shl = shl i32 %x, %y
1055+
%shr = lshr i32 %x, %sub
1056+
%r = add i32 %shr, %shl
1057+
ret i32 %r
1058+
}
1059+
1060+
define i32 @rotr_i32_add_greater(i32 %x, i32 %y) {
1061+
; CHECK-LABEL: @rotr_i32_add_greater(
1062+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 34, [[Y:%.*]]
1063+
; CHECK-NEXT: [[SHL:%.*]] = lshr i32 [[X:%.*]], [[Y]]
1064+
; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[X]], [[SUB]]
1065+
; CHECK-NEXT: [[R:%.*]] = or disjoint i32 [[SHR]], [[SHL]]
1066+
; CHECK-NEXT: ret i32 [[R]]
1067+
;
1068+
%sub = sub i32 34, %y
1069+
%shl = lshr i32 %x, %y
1070+
%shr = shl i32 %x, %sub
1071+
%r = add i32 %shr, %shl
1072+
ret i32 %r
1073+
}
1074+
1075+
define i32 @not_rotl_i32_add_less(i32 %x, i32 %y) {
1076+
; CHECK-LABEL: @not_rotl_i32_add_less(
1077+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 31, [[Y:%.*]]
1078+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[Y]]
1079+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[SUB]]
1080+
; CHECK-NEXT: [[R:%.*]] = add i32 [[SHR]], [[SHL]]
1081+
; CHECK-NEXT: ret i32 [[R]]
1082+
;
1083+
%sub = sub i32 31, %y
1084+
%shl = shl i32 %x, %y
1085+
%shr = lshr i32 %x, %sub
1086+
%r = add i32 %shr, %shl
1087+
ret i32 %r
1088+
}

0 commit comments

Comments
 (0)