Skip to content

Commit 5c47d3e

Browse files
committed
Remove -O3 in clang codegen test
- Remove an -O3 flag from a couple of clang x86 codegen tests so the tests do not need to be updated when optimizations in LLVM change. - Change the tests to use utils/update_cc_test_checks.sh - Change from apple/darwin triples to generic x86_64-- and i386-- because it was not relevant to the test but `update_cc_test_checks` seems to be unable to handle platforms that prepend `_` to function names.
1 parent 2ec5c69 commit 5c47d3e

File tree

4 files changed

+1426
-155
lines changed

4 files changed

+1426
-155
lines changed
Lines changed: 178 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,230 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
12
// REQUIRES: x86-registered-target
2-
// RUN: %clang_cc1 -O3 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512fp16 -S -o - | FileCheck %s
3+
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +avx -target-feature +avx512f -target-feature +avx512fp16 -emit-llvm -o - | FileCheck %s
34

45

56
#include <immintrin.h>
67

8+
// CHECK-LABEL: define dso_local <4 x double> @test_mm256_castpd128_pd256(
9+
// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
10+
// CHECK-NEXT: [[ENTRY:.*:]]
11+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <2 x double>, align 16
12+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
13+
// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
14+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
15+
// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__A_ADDR_I]], align 16
16+
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[__A_ADDR_I]], align 16
17+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <2 x double> poison
18+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
19+
// CHECK-NEXT: ret <4 x double> [[SHUFFLE_I]]
20+
//
721
__m256d test_mm256_castpd128_pd256(__m128d A) {
8-
// CHECK-LABEL: test_mm256_castpd128_pd256
9-
// CHECK: # %bb.0:
10-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
11-
// CHECK-NEXT: ret{{[l|q]}}
1222
return _mm256_castpd128_pd256(A);
1323
}
1424

25+
// CHECK-LABEL: define dso_local <8 x float> @test_mm256_castps128_ps256(
26+
// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
27+
// CHECK-NEXT: [[ENTRY:.*:]]
28+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16
29+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
30+
// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16
31+
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16
32+
// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[__A_ADDR_I]], align 16
33+
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[__A_ADDR_I]], align 16
34+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x float> poison
35+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
36+
// CHECK-NEXT: ret <8 x float> [[SHUFFLE_I]]
37+
//
1538
__m256 test_mm256_castps128_ps256(__m128 A) {
16-
// CHECK-LABEL: test_mm256_castps128_ps256
17-
// CHECK: # %bb.0:
18-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
19-
// CHECK-NEXT: ret{{[l|q]}}
2039
return _mm256_castps128_ps256(A);
2140
}
2241

42+
// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_castsi128_si256(
43+
// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
44+
// CHECK-NEXT: [[ENTRY:.*:]]
45+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <2 x i64>, align 16
46+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16
47+
// CHECK-NEXT: store <2 x i64> [[A]], ptr [[A_ADDR]], align 16
48+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[A_ADDR]], align 16
49+
// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[__A_ADDR_I]], align 16
50+
// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__A_ADDR_I]], align 16
51+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <2 x i64> poison
52+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
53+
// CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I]]
54+
//
2355
__m256i test_mm256_castsi128_si256(__m128i A) {
24-
// CHECK-LABEL: test_mm256_castsi128_si256
25-
// CHECK: # %bb.0:
26-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
27-
// CHECK-NEXT: ret{{[l|q]}}
2856
return _mm256_castsi128_si256(A);
2957
}
3058

59+
// CHECK-LABEL: define dso_local <16 x half> @test_mm256_castph128_ph256(
60+
// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] {
61+
// CHECK-NEXT: [[ENTRY:.*:]]
62+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <8 x half>, align 16
63+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
64+
// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16
65+
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16
66+
// CHECK-NEXT: store <8 x half> [[TMP0]], ptr [[__A_ADDR_I]], align 16
67+
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[__A_ADDR_I]], align 16
68+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <8 x half> poison
69+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
70+
// CHECK-NEXT: ret <16 x half> [[SHUFFLE_I]]
71+
//
3172
__m256h test_mm256_castph128_ph256(__m128h A) {
32-
// CHECK-LABEL: test_mm256_castph128_ph256
33-
// CHECK: # %bb.0:
34-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
35-
// CHECK-NEXT: ret{{[l|q]}}
3673
return _mm256_castph128_ph256(A);
3774
}
3875

76+
// CHECK-LABEL: define dso_local <32 x half> @test_mm512_castph128_ph512(
77+
// CHECK-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
78+
// CHECK-NEXT: [[ENTRY:.*:]]
79+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <8 x half>, align 16
80+
// CHECK-NEXT: [[__B_I:%.*]] = alloca <16 x half>, align 32
81+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x half>, align 16
82+
// CHECK-NEXT: store <8 x half> [[A]], ptr [[A_ADDR]], align 16
83+
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16
84+
// CHECK-NEXT: store <8 x half> [[TMP0]], ptr [[__A_ADDR_I]], align 16
85+
// CHECK-NEXT: [[TMP1:%.*]] = freeze <16 x half> poison
86+
// CHECK-NEXT: store <16 x half> [[TMP1]], ptr [[__B_I]], align 32
87+
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x half>, ptr [[__A_ADDR_I]], align 16
88+
// CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x half> poison
89+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
90+
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x half>, ptr [[__B_I]], align 32
91+
// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <16 x half> [[SHUFFLE_I]], <16 x half> [[TMP4]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
92+
// CHECK-NEXT: ret <32 x half> [[SHUFFLE1_I]]
93+
//
3994
__m512h test_mm512_castph128_ph512(__m128h A) {
40-
// CHECK-LABEL: test_mm512_castph128_ph512
41-
// CHECK: # %bb.0:
42-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
43-
// CHECK-NEXT: ret{{[l|q]}}
4495
return _mm512_castph128_ph512(A);
4596
}
4697

98+
// CHECK-LABEL: define dso_local <32 x half> @test_mm512_castph256_ph512(
99+
// CHECK-SAME: <16 x half> noundef [[A:%.*]]) #[[ATTR1]] {
100+
// CHECK-NEXT: [[ENTRY:.*:]]
101+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <16 x half>, align 32
102+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <16 x half>, align 32
103+
// CHECK-NEXT: store <16 x half> [[A]], ptr [[A_ADDR]], align 32
104+
// CHECK-NEXT: [[TMP0:%.*]] = load <16 x half>, ptr [[A_ADDR]], align 32
105+
// CHECK-NEXT: store <16 x half> [[TMP0]], ptr [[__A_ADDR_I]], align 32
106+
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x half>, ptr [[__A_ADDR_I]], align 32
107+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <16 x half> poison
108+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x half> [[TMP1]], <16 x half> [[TMP2]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
109+
// CHECK-NEXT: ret <32 x half> [[SHUFFLE_I]]
110+
//
47111
__m512h test_mm512_castph256_ph512(__m256h A) {
48-
// CHECK-LABEL: test_mm512_castph256_ph512
49-
// CHECK: # %bb.0:
50-
// CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
51-
// CHECK-NEXT: ret{{[l|q]}}
52112
return _mm512_castph256_ph512(A);
53113
}
54114

115+
// CHECK-LABEL: define dso_local <8 x double> @test_mm512_castpd256_pd512(
116+
// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR1]] {
117+
// CHECK-NEXT: [[ENTRY:.*:]]
118+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x double>, align 32
119+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x double>, align 32
120+
// CHECK-NEXT: store <4 x double> [[A]], ptr [[A_ADDR]], align 32
121+
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A_ADDR]], align 32
122+
// CHECK-NEXT: store <4 x double> [[TMP0]], ptr [[__A_ADDR_I]], align 32
123+
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[__A_ADDR_I]], align 32
124+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x double> poison
125+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
126+
// CHECK-NEXT: ret <8 x double> [[SHUFFLE_I]]
127+
//
55128
__m512d test_mm512_castpd256_pd512(__m256d A){
56-
// CHECK-LABEL: test_mm512_castpd256_pd512
57-
// CHECK: # %bb.0:
58-
// CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
59-
// CHECK-NEXT: ret{{[l|q]}}
60129
return _mm512_castpd256_pd512(A);
61130
}
62131

132+
// CHECK-LABEL: define dso_local <16 x float> @test_mm512_castps256_ps512(
133+
// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR1]] {
134+
// CHECK-NEXT: [[ENTRY:.*:]]
135+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <8 x float>, align 32
136+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <8 x float>, align 32
137+
// CHECK-NEXT: store <8 x float> [[A]], ptr [[A_ADDR]], align 32
138+
// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[A_ADDR]], align 32
139+
// CHECK-NEXT: store <8 x float> [[TMP0]], ptr [[__A_ADDR_I]], align 32
140+
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[__A_ADDR_I]], align 32
141+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <8 x float> poison
142+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
143+
// CHECK-NEXT: ret <16 x float> [[SHUFFLE_I]]
144+
//
63145
__m512 test_mm512_castps256_ps512(__m256 A){
64-
// CHECK-LABEL: test_mm512_castps256_ps512
65-
// CHECK: # %bb.0:
66-
// CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
67-
// CHECK-NEXT: ret{{[l|q]}}
68146
return _mm512_castps256_ps512(A);
69147
}
70148

149+
// CHECK-LABEL: define dso_local <8 x double> @test_mm512_castpd128_pd512(
150+
// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR1]] {
151+
// CHECK-NEXT: [[ENTRY:.*:]]
152+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <2 x double>, align 16
153+
// CHECK-NEXT: [[__B_I:%.*]] = alloca <4 x double>, align 32
154+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x double>, align 16
155+
// CHECK-NEXT: store <2 x double> [[A]], ptr [[A_ADDR]], align 16
156+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
157+
// CHECK-NEXT: store <2 x double> [[TMP0]], ptr [[__A_ADDR_I]], align 16
158+
// CHECK-NEXT: [[TMP1:%.*]] = freeze <4 x double> poison
159+
// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[__B_I]], align 32
160+
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[__A_ADDR_I]], align 16
161+
// CHECK-NEXT: [[TMP3:%.*]] = freeze <2 x double> poison
162+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163+
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, ptr [[__B_I]], align 32
164+
// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <4 x double> [[SHUFFLE_I]], <4 x double> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
165+
// CHECK-NEXT: ret <8 x double> [[SHUFFLE1_I]]
166+
//
71167
__m512d test_mm512_castpd128_pd512(__m128d A){
72-
// CHECK-LABEL: test_mm512_castpd128_pd512
73-
// CHECK: # %bb.0:
74-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
75-
// CHECK-NEXT: ret{{[l|q]}}
76168
return _mm512_castpd128_pd512(A);
77169
}
78170

171+
// CHECK-LABEL: define dso_local <16 x float> @test_mm512_castps128_ps512(
172+
// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
173+
// CHECK-NEXT: [[ENTRY:.*:]]
174+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x float>, align 16
175+
// CHECK-NEXT: [[__B_I:%.*]] = alloca <8 x float>, align 32
176+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x float>, align 16
177+
// CHECK-NEXT: store <4 x float> [[A]], ptr [[A_ADDR]], align 16
178+
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16
179+
// CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[__A_ADDR_I]], align 16
180+
// CHECK-NEXT: [[TMP1:%.*]] = freeze <8 x float> poison
181+
// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[__B_I]], align 32
182+
// CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__A_ADDR_I]], align 16
183+
// CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x float> poison
184+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
185+
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[__B_I]], align 32
186+
// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <8 x float> [[SHUFFLE_I]], <8 x float> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
187+
// CHECK-NEXT: ret <16 x float> [[SHUFFLE1_I]]
188+
//
79189
__m512 test_mm512_castps128_ps512(__m128 A){
80-
// CHECK-LABEL: test_mm512_castps128_ps512
81-
// CHECK: # %bb.0:
82-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
83-
// CHECK-NEXT: ret{{[l|q]}}
84190
return _mm512_castps128_ps512(A);
85191
}
86192

193+
// CHECK-LABEL: define dso_local <8 x i64> @test_mm512_castsi128_si512(
194+
// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR1]] {
195+
// CHECK-NEXT: [[ENTRY:.*:]]
196+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <2 x i64>, align 16
197+
// CHECK-NEXT: [[__B_I:%.*]] = alloca <4 x i64>, align 32
198+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <2 x i64>, align 16
199+
// CHECK-NEXT: store <2 x i64> [[A]], ptr [[A_ADDR]], align 16
200+
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[A_ADDR]], align 16
201+
// CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[__A_ADDR_I]], align 16
202+
// CHECK-NEXT: [[TMP1:%.*]] = freeze <4 x i64> poison
203+
// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[__B_I]], align 32
204+
// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[__A_ADDR_I]], align 16
205+
// CHECK-NEXT: [[TMP3:%.*]] = freeze <2 x i64> poison
206+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
207+
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[__B_I]], align 32
208+
// CHECK-NEXT: [[SHUFFLE1_I:%.*]] = shufflevector <4 x i64> [[SHUFFLE_I]], <4 x i64> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
209+
// CHECK-NEXT: ret <8 x i64> [[SHUFFLE1_I]]
210+
//
87211
__m512i test_mm512_castsi128_si512(__m128i A){
88-
// CHECK-LABEL: test_mm512_castsi128_si512
89-
// CHECK: # %bb.0:
90-
// CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
91-
// CHECK-NEXT: ret{{[l|q]}}
92212
return _mm512_castsi128_si512(A);
93213
}
94214

215+
// CHECK-LABEL: define dso_local <8 x i64> @test_mm512_castsi256_si512(
216+
// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR1]] {
217+
// CHECK-NEXT: [[ENTRY:.*:]]
218+
// CHECK-NEXT: [[__A_ADDR_I:%.*]] = alloca <4 x i64>, align 32
219+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca <4 x i64>, align 32
220+
// CHECK-NEXT: store <4 x i64> [[A]], ptr [[A_ADDR]], align 32
221+
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[A_ADDR]], align 32
222+
// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[__A_ADDR_I]], align 32
223+
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[__A_ADDR_I]], align 32
224+
// CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i64> poison
225+
// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
226+
// CHECK-NEXT: ret <8 x i64> [[SHUFFLE_I]]
227+
//
95228
__m512i test_mm512_castsi256_si512(__m256i A){
96-
// CHECK-LABEL: test_mm512_castsi256_si512
97-
// CHECK: # %bb.0:
98-
// CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
99-
// CHECK-NEXT: ret{{[l|q]}}
100229
return _mm512_castsi256_si512(A);
101230
}

0 commit comments

Comments
 (0)