Skip to content

Commit b2165f2

Browse files
authored
[CostModel] Account for power-2 urem in funnel shift costs (#127037)
As can be seen in https://godbolt.org/z/qvMqY79cK, a urem by a power-2 constant will be code-generated as an And of a mask. The cost model for funnel shifts tries to account for that by passing OP_PowerOf2 as the operand info for the second operand. As far as I can tell returning a lower cost for urem with a OP_PowerOf2 is only implemented on X86 though. This patch short-cuts that by calling getArithmeticInstrCost(And, ..) directly when we know the typesize will be a power-of-2. This is an alternative to the patch in #126912 which is a more general solution for power-2 udiv/urem costs, this more narrowly just fixes funnel shifts.
1 parent de09986 commit b2165f2

File tree

5 files changed

+58
-51
lines changed

5 files changed

+58
-51
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,10 +1891,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
18911891
const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X);
18921892
const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y);
18931893
const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);
1894-
const TTI::OperandValueInfo OpInfoBW =
1895-
{TTI::OK_UniformConstantValue,
1896-
isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1897-
: TTI::OP_None};
18981894

18991895
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
19001896
// fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
@@ -1909,10 +1905,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19091905
Cost += thisT()->getArithmeticInstrCost(
19101906
BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
19111907
{OpInfoZ.Kind, TTI::OP_None});
1912-
// Non-constant shift amounts requires a modulo.
1908+
// Non-constant shift amounts requires a modulo. If the typesize is a
1909+
// power-2 then this will be converted to an and, otherwise it will use a
1910+
// urem.
19131911
if (!OpInfoZ.isConstant())
1914-
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1915-
CostKind, OpInfoZ, OpInfoBW);
1912+
Cost += thisT()->getArithmeticInstrCost(
1913+
isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And
1914+
: BinaryOperator::URem,
1915+
RetTy, CostKind, OpInfoZ,
1916+
{TTI::OK_UniformConstantValue, TTI::OP_None});
19161917
// For non-rotates (X != Y) we must add shift-by-zero handling costs.
19171918
if (X != Y) {
19181919
Type *CondTy = RetTy->getWithNewBitWidth(1);
@@ -2611,8 +2612,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
26112612
thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, CostKind);
26122613
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
26132614
CostKind);
2614-
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
2615-
CostKind);
2615+
// Non-constant shift amounts requires a modulo. If the typesize is a
2616+
// power-2 then this will be converted to an and, otherwise it will use a
2617+
// urem.
2618+
Cost += thisT()->getArithmeticInstrCost(
2619+
isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And
2620+
: BinaryOperator::URem,
2621+
RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
2622+
{TTI::OK_UniformConstantValue, TTI::OP_None});
26162623
// Shift-by-zero handling.
26172624
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
26182625
CmpInst::ICMP_EQ, CostKind);

llvm/test/Analysis/CostModel/AArch64/fshl.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ entry:
1515

1616
define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
1717
; CHECK-LABEL: 'fshl_i8_3rd_arg_var'
18-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
18+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
1919
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl
2020
;
2121
entry:
@@ -49,7 +49,7 @@ entry:
4949

5050
define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
5151
; CHECK-LABEL: 'fshl_i32_3rd_arg_var'
52-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
52+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
5353
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl
5454
;
5555
entry:
@@ -71,7 +71,7 @@ entry:
7171

7272
define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
7373
; CHECK-LABEL: 'fshl_i64_3rd_arg_var'
74-
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
74+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
7575
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
7676
;
7777
entry:
@@ -116,7 +116,7 @@ entry:
116116

117117
define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
118118
; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var'
119-
; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
119+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
120120
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
121121
;
122122
entry:
@@ -148,7 +148,7 @@ entry:
148148

149149
define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
150150
; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var'
151-
; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
151+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
152152
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
153153
;
154154
entry:
@@ -180,7 +180,7 @@ entry:
180180

181181
define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
182182
; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var'
183-
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
183+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
184184
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
185185
;
186186
entry:
@@ -212,7 +212,7 @@ entry:
212212

213213
define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
214214
; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var'
215-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
215+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
216216
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
217217
;
218218
entry:

llvm/test/Analysis/CostModel/AArch64/fshr.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ entry:
1515

1616
define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
1717
; CHECK-LABEL: 'fshr_i8_3rd_arg_var'
18-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
18+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
1919
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr
2020
;
2121
entry:
@@ -49,7 +49,7 @@ entry:
4949

5050
define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
5151
; CHECK-LABEL: 'fshr_i32_3rd_arg_var'
52-
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
52+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
5353
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr
5454
;
5555
entry:
@@ -71,7 +71,7 @@ entry:
7171

7272
define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
7373
; CHECK-LABEL: 'fshr_i64_3rd_arg_var'
74-
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
74+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
7575
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
7676
;
7777
entry:
@@ -116,7 +116,7 @@ entry:
116116

117117
define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
118118
; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var'
119-
; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
119+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
120120
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
121121
;
122122
entry:
@@ -148,7 +148,7 @@ entry:
148148

149149
define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
150150
; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var'
151-
; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
151+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
152152
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
153153
;
154154
entry:
@@ -180,7 +180,7 @@ entry:
180180

181181
define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
182182
; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var'
183-
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
183+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
184184
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
185185
;
186186
entry:
@@ -212,7 +212,7 @@ entry:
212212

213213
define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
214214
; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var'
215-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
215+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
216216
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
217217
;
218218
entry:

0 commit comments

Comments
 (0)