Skip to content

Commit d74533a

Browse files
committed
[X86] Add sse4.1 RUNs lines to the min/max reduction cost model tests.
Mostly this matches the sse4.2 we already had command lines for. Except in the i64 case since sse4.1 doesn't have pcmpgtq.
1 parent 24562c6 commit d74533a

File tree

6 files changed

+156
-118
lines changed

6 files changed

+156
-118
lines changed

llvm/test/Analysis/CostModel/X86/reduce-fmax.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
33
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
4-
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
4+
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4,SSE41
5+
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4,SSE42
56
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
67
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
78
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
@@ -60,14 +61,14 @@ define i32 @reduce_f32(i32 %arg) {
6061
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
6162
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
6263
;
63-
; SSE42-LABEL: 'reduce_f32'
64-
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
65-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
66-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
67-
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
68-
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
69-
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
70-
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
64+
; SSE4-LABEL: 'reduce_f32'
65+
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)
66+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmax.v2f32(<2 x float> undef)
67+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> undef)
68+
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmax.v8f32(<8 x float> undef)
69+
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> undef)
70+
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmax.v32f32(<32 x float> undef)
71+
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7172
;
7273
; AVX-LABEL: 'reduce_f32'
7374
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> undef)

llvm/test/Analysis/CostModel/X86/reduce-fmin.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
33
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
4-
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
4+
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4,SSE41
5+
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4,SSE42
56
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
67
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
78
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
@@ -60,14 +61,14 @@ define i32 @reduce_f32(i32 %arg) {
6061
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
6162
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
6263
;
63-
; SSE42-LABEL: 'reduce_f32'
64-
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
65-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
66-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
67-
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
68-
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
69-
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
70-
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
64+
; SSE4-LABEL: 'reduce_f32'
65+
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)
66+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.experimental.vector.reduce.fmin.v2f32(<2 x float> undef)
67+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> undef)
68+
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.experimental.vector.reduce.fmin.v8f32(<8 x float> undef)
69+
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> undef)
70+
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call float @llvm.experimental.vector.reduce.fmin.v32f32(<32 x float> undef)
71+
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
7172
;
7273
; AVX-LABEL: 'reduce_f32'
7374
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> undef)

llvm/test/Analysis/CostModel/X86/reduce-smax.ll

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
33
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
4-
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
4+
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4,SSE41
5+
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4,SSE42
56
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
67
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
78
; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
@@ -25,6 +26,14 @@ define i32 @reduce_i64(i32 %arg) {
2526
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
2627
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
2728
;
29+
; SSE41-LABEL: 'reduce_i64'
30+
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
31+
; SSE41-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
32+
; SSE41-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.smax.v4i64(<4 x i64> undef)
33+
; SSE41-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.smax.v8i64(<8 x i64> undef)
34+
; SSE41-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.smax.v16i64(<16 x i64> undef)
35+
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
36+
;
2837
; SSE42-LABEL: 'reduce_i64'
2938
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.smax.v1i64(<1 x i64> undef)
3039
; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> undef)
@@ -82,13 +91,13 @@ define i32 @reduce_i32(i32 %arg) {
8291
; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
8392
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
8493
;
85-
; SSE42-LABEL: 'reduce_i32'
86-
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
87-
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
88-
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
89-
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
90-
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
91-
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
94+
; SSE4-LABEL: 'reduce_i32'
95+
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
96+
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> undef)
97+
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.smax.v8i32(<8 x i32> undef)
98+
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.smax.v16i32(<16 x i32> undef)
99+
; SSE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.smax.v32i32(<32 x i32> undef)
100+
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
92101
;
93102
; AVX1-LABEL: 'reduce_i32'
94103
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.smax.v2i32(<2 x i32> undef)
@@ -141,14 +150,14 @@ define i32 @reduce_i16(i32 %arg) {
141150
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
142151
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
143152
;
144-
; SSE42-LABEL: 'reduce_i16'
145-
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
146-
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
147-
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
148-
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
149-
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
150-
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
151-
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
153+
; SSE4-LABEL: 'reduce_i16'
154+
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
155+
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.smax.v4i16(<4 x i16> undef)
156+
; SSE4-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> undef)
157+
; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.smax.v16i16(<16 x i16> undef)
158+
; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.smax.v32i16(<32 x i16> undef)
159+
; SSE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.smax.v64i16(<64 x i16> undef)
160+
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
152161
;
153162
; AVX1-LABEL: 'reduce_i16'
154163
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.smax.v2i16(<2 x i16> undef)
@@ -225,15 +234,15 @@ define i32 @reduce_i8(i32 %arg) {
225234
; SSSE3-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
226235
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
227236
;
228-
; SSE42-LABEL: 'reduce_i8'
229-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
230-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
231-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
232-
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
233-
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
234-
; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
235-
; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
236-
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
237+
; SSE4-LABEL: 'reduce_i8'
238+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)
239+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> undef)
240+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> undef)
241+
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> undef)
242+
; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.smax.v32i8(<32 x i8> undef)
243+
; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.smax.v64i8(<64 x i8> undef)
244+
; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.smax.v128i8(<128 x i8> undef)
245+
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
237246
;
238247
; AVX1-LABEL: 'reduce_i8'
239248
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> undef)

0 commit comments

Comments
 (0)