You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[CostModel] Account for power-2 urem in funnel shift costs
As can be seen in https://godbolt.org/z/qvMqY79cK, a urem by a power-2 constant
will be code-generated as an And of a mask. The cost model for funnel shifts
tries to account for that by passing OP_PowerOf2 as the operand info for the
second operand. As far as I can tell returning a lower cost for urem with a
OP_PowerOf2 is only implemented on X86 though.
This patch short-cuts that by calling getArithmeticInstrCost(And, ..) directly
when we know the typesize will be a power-of-2. This is an alternative to the
patch in llvm#126912 which is a more general solution for power-2 udiv/urem costs,
this more narrowly just fixes funnel shifts.
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
74
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
75
75
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl
76
76
;
77
77
entry:
@@ -116,7 +116,7 @@ entry:
116
116
117
117
define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
118
118
; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var'
119
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
119
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
120
120
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
121
121
;
122
122
entry:
@@ -148,7 +148,7 @@ entry:
148
148
149
149
define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
150
150
; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var'
151
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
151
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
152
152
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
153
153
;
154
154
entry:
@@ -180,7 +180,7 @@ entry:
180
180
181
181
define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
182
182
; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var'
183
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
183
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
184
184
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
185
185
;
186
186
entry:
@@ -212,7 +212,7 @@ entry:
212
212
213
213
define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
214
214
; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var'
215
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
215
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
216
216
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
74
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
75
75
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr
76
76
;
77
77
entry:
@@ -116,7 +116,7 @@ entry:
116
116
117
117
define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
118
118
; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var'
119
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
119
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
120
120
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
121
121
;
122
122
entry:
@@ -148,7 +148,7 @@ entry:
148
148
149
149
define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
150
150
; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var'
151
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
151
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
152
152
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
153
153
;
154
154
entry:
@@ -180,7 +180,7 @@ entry:
180
180
181
181
define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
182
182
; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var'
183
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
183
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
184
184
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
185
185
;
186
186
entry:
@@ -212,7 +212,7 @@ entry:
212
212
213
213
define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
214
214
; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var'
215
-
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
215
+
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
216
216
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1015
-
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1016
-
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1017
-
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1014
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1015
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1016
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1017
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1018
1018
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1019
1019
;
1020
1020
; CHECK-VSCALE-2-LABEL: 'fshr'
1021
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1022
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1023
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1024
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1021
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1022
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1023
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1024
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1025
1025
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1026
1026
;
1027
1027
; TYPE_BASED_ONLY-LABEL: 'fshr'
@@ -1040,17 +1040,17 @@ define void @fshr() #0 {
1040
1040
1041
1041
definevoid@fshl() #0 {
1042
1042
; CHECK-VSCALE-1-LABEL: 'fshl'
1043
-
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1044
-
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1045
-
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1046
-
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1043
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1044
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1045
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1046
+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1047
1047
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1048
1048
;
1049
1049
; CHECK-VSCALE-2-LABEL: 'fshl'
1050
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1051
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1052
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1053
-
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1050
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
1051
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
1052
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
1053
+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
1054
1054
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
definevoid@fshl(i32%a, i32%b, i32%c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
232
232
; THRU-LABEL: 'fshl'
233
233
; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
234
-
; THRU-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
234
+
; THRU-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
235
235
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
236
236
;
237
237
; LATE-LABEL: 'fshl'
238
238
; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
239
-
; LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
239
+
; LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
240
240
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
241
241
;
242
242
; SIZE-LABEL: 'fshl'
243
243
; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
244
-
; SIZE-NEXT: Cost Model: Found an estimated cost of 229 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
244
+
; SIZE-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
245
245
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
246
246
;
247
247
; SIZE_LATE-LABEL: 'fshl'
248
248
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
249
-
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
249
+
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
250
250
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
0 commit comments