1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
- ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
3
- ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
2
+ ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
3
+ ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
4
4
5
5
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6
6
@@ -269,36 +269,50 @@ define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl
269
269
ret <8 x i16 > %r
270
270
}
271
271
272
- ; Negative test - can 't safely load the offset vector, but could load+shuffle.
272
+ ; Can 't safely load the offset vector, but can load+shuffle if it is profitable .
273
273
274
274
define <8 x i16 > @gep01_load_i16_insert_v8i16_deref (<8 x i16 >* align 16 dereferenceable (17 ) %p ) {
275
- ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref(
276
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
277
- ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
278
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
279
- ; CHECK-NEXT: ret <8 x i16> [[R]]
275
+ ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref(
276
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
277
+ ; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 2
278
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
279
+ ; SSE2-NEXT: ret <8 x i16> [[R]]
280
+ ;
281
+ ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref(
282
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 16
283
+ ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
284
+ ; AVX2-NEXT: ret <8 x i16> [[R]]
280
285
;
281
286
%gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 0 , i64 1
282
287
%s = load i16 , i16* %gep , align 2
283
288
%r = insertelement <8 x i16 > undef , i16 %s , i64 0
284
289
ret <8 x i16 > %r
285
290
}
286
291
287
- ; TODO: Verify that alignment of the new load is not over-specified.
292
+ ; Verify that alignment of the new load is not over-specified.
288
293
289
294
define <8 x i16 > @gep01_load_i16_insert_v8i16_deref_minalign (<8 x i16 >* align 2 dereferenceable (16 ) %p ) {
290
- ; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
291
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
292
- ; CHECK-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 8
293
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
294
- ; CHECK-NEXT: ret <8 x i16> [[R]]
295
+ ; SSE2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
296
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1
297
+ ; SSE2-NEXT: [[S:%.*]] = load i16, i16* [[GEP]], align 8
298
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
299
+ ; SSE2-NEXT: ret <8 x i16> [[R]]
300
+ ;
301
+ ; AVX2-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
302
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[P:%.*]], align 2
303
+ ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
304
+ ; AVX2-NEXT: ret <8 x i16> [[R]]
295
305
;
296
306
%gep = getelementptr inbounds <8 x i16 >, <8 x i16 >* %p , i64 0 , i64 1
297
307
%s = load i16 , i16* %gep , align 8
298
308
%r = insertelement <8 x i16 > undef , i16 %s , i64 0
299
309
ret <8 x i16 > %r
300
310
}
301
311
312
+ ; Negative test - if we are shuffling a load from the base pointer, the address offset
313
+ ; must be a multiple of element size.
314
+ ; TODO: Could bitcast around this limitation.
315
+
302
316
define <4 x i32 > @gep01_bitcast_load_i32_insert_v4i32 (<16 x i8 >* align 1 dereferenceable (16 ) %p ) {
303
317
; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
304
318
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 1
@@ -316,10 +330,9 @@ define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefer
316
330
317
331
define <4 x i32 > @gep012_bitcast_load_i32_insert_v4i32 (<16 x i8 >* align 1 dereferenceable (20 ) %p ) {
318
332
; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
319
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 12
320
- ; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[GEP]] to i32*
321
- ; CHECK-NEXT: [[S:%.*]] = load i32, i32* [[B]], align 1
322
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
333
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
334
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 1
335
+ ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
323
336
; CHECK-NEXT: ret <4 x i32> [[R]]
324
337
;
325
338
%gep = getelementptr inbounds <16 x i8 >, <16 x i8 >* %p , i64 0 , i64 12
@@ -329,6 +342,10 @@ define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 derefe
329
342
ret <4 x i32 > %r
330
343
}
331
344
345
+ ; Negative test - if we are shuffling a load from the base pointer, the address offset
346
+ ; must be a multiple of element size and the offset must be low enough to fit in the vector
347
+ ; (bitcasting would not help this case).
348
+
332
349
define <4 x i32 > @gep013_bitcast_load_i32_insert_v4i32 (<16 x i8 >* align 1 dereferenceable (20 ) %p ) {
333
350
; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32(
334
351
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13
@@ -608,15 +625,21 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16
608
625
ret <8 x i32 > %r
609
626
}
610
627
611
- ; TODO: Can't safely load the offset vector, but can load+shuffle if it is profitable.
628
+ ; Can't safely load the offset vector, but can load+shuffle if it is profitable.
612
629
613
630
define <8 x i16 > @gep1_load_v2i16_extract_insert_v8i16 (<2 x i16 >* align 1 dereferenceable (16 ) %p ) {
614
- ; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
615
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1
616
- ; CHECK-NEXT: [[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 8
617
- ; CHECK-NEXT: [[S:%.*]] = extractelement <2 x i16> [[L]], i32 0
618
- ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
619
- ; CHECK-NEXT: ret <8 x i16> [[R]]
631
+ ; SSE2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
632
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1
633
+ ; SSE2-NEXT: [[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 8
634
+ ; SSE2-NEXT: [[S:%.*]] = extractelement <2 x i16> [[L]], i32 0
635
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
636
+ ; SSE2-NEXT: ret <8 x i16> [[R]]
637
+ ;
638
+ ; AVX2-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
639
+ ; AVX2-NEXT: [[TMP1:%.*]] = bitcast <2 x i16>* [[P:%.*]] to <8 x i16>*
640
+ ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4
641
+ ; AVX2-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> <i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
642
+ ; AVX2-NEXT: ret <8 x i16> [[R]]
620
643
;
621
644
%gep = getelementptr inbounds <2 x i16 >, <2 x i16 >* %p , i64 1
622
645
%l = load <2 x i16 >, <2 x i16 >* %gep , align 8
0 commit comments