@@ -290,16 +290,18 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
290
290
ret <8 x i16 > %r
291
291
}
292
292
293
- ; Negative test - if we are shuffling a load from the base pointer, the address offset
294
- ; must be a multiple of element size.
295
- ; TODO: Could bitcast around this limitation.
296
-
297
293
define <4 x i32 > @gep01_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
298
- ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
299
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
300
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
301
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
302
- ; CHECK-NEXT: ret <4 x i32> [[R]]
294
+ ; SSE2-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
295
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
296
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
297
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
298
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
299
+ ;
300
+ ; AVX2-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
301
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1
302
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
303
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
304
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
303
305
;
304
306
%gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 1
305
307
%s = load i32 , ptr %gep , align 1
@@ -308,11 +310,17 @@ define <4 x i32> @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 der
308
310
}
309
311
310
312
define <2 x i64 > @gep01_bitcast_load_i64_from_v16i8_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
311
- ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
312
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
313
- ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
314
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
315
- ; CHECK-NEXT: ret <2 x i64> [[R]]
313
+ ; SSE2-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
314
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
315
+ ; SSE2-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
316
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
317
+ ; SSE2-NEXT: ret <2 x i64> [[R]]
318
+ ;
319
+ ; AVX2-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
320
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1
321
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
322
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
323
+ ; AVX2-NEXT: ret <2 x i64> [[R]]
316
324
;
317
325
%gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 1
318
326
%s = load i64 , ptr %gep , align 1
@@ -321,11 +329,17 @@ define <2 x i64> @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 der
321
329
}
322
330
323
331
define <4 x i32 > @gep11_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
324
- ; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
325
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
326
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
327
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
328
- ; CHECK-NEXT: ret <4 x i32> [[R]]
332
+ ; SSE2-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
333
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
334
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
335
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
336
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
337
+ ;
338
+ ; AVX2-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
339
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1
340
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
341
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
342
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
329
343
;
330
344
%gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 11
331
345
%s = load i32 , ptr %gep , align 1
@@ -334,11 +348,17 @@ define <4 x i32> @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 der
334
348
}
335
349
336
350
define <4 x i32 > @gep01_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
337
- ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
338
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
339
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
340
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
341
- ; CHECK-NEXT: ret <4 x i32> [[R]]
351
+ ; SSE2-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
352
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
353
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
354
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
355
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
356
+ ;
357
+ ; AVX2-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
358
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
359
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
360
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
361
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
342
362
;
343
363
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
344
364
%s = load i32 , ptr %gep , align 1
@@ -347,11 +367,17 @@ define <4 x i32> @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 der
347
367
}
348
368
349
369
define <2 x i64 > @gep01_bitcast_load_i64_from_v8i16_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
350
- ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
351
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
352
- ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
353
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
354
- ; CHECK-NEXT: ret <2 x i64> [[R]]
370
+ ; SSE2-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
371
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
372
+ ; SSE2-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
373
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
374
+ ; SSE2-NEXT: ret <2 x i64> [[R]]
375
+ ;
376
+ ; AVX2-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
377
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
378
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
379
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64>
380
+ ; AVX2-NEXT: ret <2 x i64> [[R]]
355
381
;
356
382
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
357
383
%s = load i64 , ptr %gep , align 1
@@ -360,23 +386,29 @@ define <2 x i64> @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 der
360
386
}
361
387
362
388
define <4 x i32 > @gep05_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
363
- ; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
364
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
365
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
366
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
367
- ; CHECK-NEXT: ret <4 x i32> [[R]]
389
+ ; SSE2-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
390
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
391
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
392
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
393
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
394
+ ;
395
+ ; AVX2-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
396
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
397
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
398
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
399
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
368
400
;
369
401
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 5
370
402
%s = load i32 , ptr %gep , align 1
371
403
%r = insertelement <4 x i32 > poison, i32 %s , i64 0
372
404
ret <4 x i32 > %r
373
405
}
374
406
375
- define <2 x i64 > @gep01_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) nofree nosync {
407
+ define <2 x i64 > @gep01_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
376
408
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
377
- ; CHECK-NEXT: [[GEP :%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
378
- ; CHECK-NEXT: [[S :%.*]] = load i64, ptr [[GEP ]], align 1
379
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
409
+ ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
410
+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <4 x i32> [[TMP1 ]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
411
+ ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
380
412
; CHECK-NEXT: ret <2 x i64> [[R]]
381
413
;
382
414
%gep = getelementptr inbounds <4 x i32 >, ptr %p , i64 0 , i64 1
0 commit comments