Skip to content

Commit 1ee9576

Browse files
authored
[InstCombine] Funnel shift with negative amount folds to funnel shift in opposite direction (#138334) (#138763)
Partially `fixes` #138334. Combine fshl(X,X,Neg(Y)) into fshr(X,X,Y) and fshr(X,X,Neg(Y)) into fshl(X,X,Y)
1 parent 3feb8b4 commit 1ee9576

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,6 +2300,18 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
23002300
return BitOp;
23012301
}
23022302

2303+
// fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2304+
// fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2305+
// if BitWidth is a power-of-2
2306+
Value *Y;
2307+
if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2308+
match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2309+
Module *Mod = II->getModule();
2310+
Function *OppositeShift = Intrinsic::getOrInsertDeclaration(
2311+
Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2312+
return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2313+
}
2314+
23032315
// fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
23042316
// power-of-2
23052317
if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&

llvm/test/Transforms/InstCombine/fsh.ll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,3 +1084,72 @@ define i8 @fshl_range_trunc(i1 %x) {
10841084
%tr = trunc nsw i32 %fshl to i8
10851085
ret i8 %tr
10861086
}
1087+
1088+
;; Issue #138334 negative rotate amounts can be folded into the opposite direction
1089+
define i32 @fshl_neg_amount(i32 %x, i32 %y) {
1090+
; CHECK-LABEL: @fshl_neg_amount(
1091+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
1092+
; CHECK-NEXT: ret i32 [[R]]
1093+
;
1094+
%n = sub i32 0, %y
1095+
%r = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %n)
1096+
ret i32 %r
1097+
}
1098+
1099+
define i32 @fshr_neg_amount(i32 %x, i32 %y) {
1100+
; CHECK-LABEL: @fshr_neg_amount(
1101+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
1102+
; CHECK-NEXT: ret i32 [[R]]
1103+
;
1104+
%n = sub i32 0, %y
1105+
%r = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %n)
1106+
ret i32 %r
1107+
}
1108+
1109+
;; negative test, funnel shift is not a rotate
1110+
1111+
define i32 @fshl_neg_amount_non_rotate(i32 %x, i32 %y, i32 %z) {
1112+
; CHECK-LABEL: @fshl_neg_amount_non_rotate(
1113+
; CHECK-NEXT: [[N:%.*]] = sub i32 0, [[Y:%.*]]
1114+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[Z:%.*]], i32 [[N]])
1115+
; CHECK-NEXT: ret i32 [[R]]
1116+
;
1117+
%n = sub i32 0, %y
1118+
%r = call i32 @llvm.fshl.i32(i32 %x, i32 %z, i32 %n)
1119+
ret i32 %r
1120+
}
1121+
1122+
define i32 @fshr_neg_amount_non_rotate(i32 %x, i32 %y, i32 %z) {
1123+
; CHECK-LABEL: @fshr_neg_amount_non_rotate(
1124+
; CHECK-NEXT: [[N:%.*]] = sub i32 0, [[Y:%.*]]
1125+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[Z:%.*]], i32 [[N]])
1126+
; CHECK-NEXT: ret i32 [[R]]
1127+
;
1128+
%n = sub i32 0, %y
1129+
%r = call i32 @llvm.fshr.i32(i32 %x, i32 %z, i32 %n)
1130+
ret i32 %r
1131+
}
1132+
1133+
;; negative test, bitwidth is not a power of two
1134+
1135+
define i31 @fshl_neg_amount_non_power_two(i31 %x, i31 %y) {
1136+
; CHECK-LABEL: @fshl_neg_amount_non_power_two(
1137+
; CHECK-NEXT: [[N:%.*]] = sub i31 0, [[Y:%.*]]
1138+
; CHECK-NEXT: [[R:%.*]] = call i31 @llvm.fshl.i31(i31 [[X:%.*]], i31 [[X]], i31 [[N]])
1139+
; CHECK-NEXT: ret i31 [[R]]
1140+
;
1141+
%n = sub i31 0, %y
1142+
%r = call i31 @llvm.fshl.i31(i31 %x, i31 %x, i31 %n)
1143+
ret i31 %r
1144+
}
1145+
1146+
define i31 @fshr_neg_amount_non_power_two(i31 %x, i31 %y) {
1147+
; CHECK-LABEL: @fshr_neg_amount_non_power_two(
1148+
; CHECK-NEXT: [[N:%.*]] = sub i31 0, [[Y:%.*]]
1149+
; CHECK-NEXT: [[R:%.*]] = call i31 @llvm.fshr.i31(i31 [[X:%.*]], i31 [[X]], i31 [[N]])
1150+
; CHECK-NEXT: ret i31 [[R]]
1151+
;
1152+
%n = sub i31 0, %y
1153+
%r = call i31 @llvm.fshr.i31(i31 %x, i31 %x, i31 %n)
1154+
ret i31 %r
1155+
}

0 commit comments

Comments
 (0)