@@ -302,16 +302,18 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(ptr align 2 derefer
302
302
ret <8 x i16 > %r
303
303
}
304
304
305
- ; Negative test - if we are shuffling a load from the base pointer, the address offset
306
- ; must be a multiple of element size.
307
- ; TODO: Could bitcast around this limitation.
308
-
309
305
define <4 x i32 > @gep01_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
310
- ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
311
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
312
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
313
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
314
- ; CHECK-NEXT: ret <4 x i32> [[R]]
306
+ ; SSE2-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
307
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
308
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
309
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
310
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
311
+ ;
312
+ ; AVX2-LABEL: @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(
313
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1
314
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
315
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
316
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
315
317
;
316
318
%gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 1
317
319
%s = load i32 , ptr %gep , align 1
@@ -320,11 +322,17 @@ define <4 x i32> @gep01_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 der
320
322
}
321
323
322
324
define <2 x i64 > @gep01_bitcast_load_i64_from_v16i8_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
323
- ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
324
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
325
- ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
326
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
327
- ; CHECK-NEXT: ret <2 x i64> [[R]]
325
+ ; SSE2-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
326
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 1
327
+ ; SSE2-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
328
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
329
+ ; SSE2-NEXT: ret <2 x i64> [[R]]
330
+ ;
331
+ ; AVX2-LABEL: @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(
332
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1
333
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
334
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
335
+ ; AVX2-NEXT: ret <2 x i64> [[R]]
328
336
;
329
337
%gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 1
330
338
%s = load i64 , ptr %gep , align 1
@@ -333,11 +341,17 @@ define <2 x i64> @gep01_bitcast_load_i64_from_v16i8_insert_v2i64(ptr align 1 der
333
341
}
334
342
335
343
define <4 x i32 > @gep11_bitcast_load_i32_from_v16i8_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
336
- ; CHECK-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
337
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
338
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
339
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
340
- ; CHECK-NEXT: ret <4 x i32> [[R]]
344
+ ; SSE2-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
345
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <16 x i8>, ptr [[P:%.*]], i64 0, i64 11
346
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
347
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
348
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
349
+ ;
350
+ ; AVX2-LABEL: @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(
351
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1
352
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
353
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
354
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
341
355
;
342
356
%gep = getelementptr inbounds <16 x i8 >, ptr %p , i64 0 , i64 11
343
357
%s = load i32 , ptr %gep , align 1
@@ -346,11 +360,17 @@ define <4 x i32> @gep11_bitcast_load_i32_from_v16i8_insert_v4i32(ptr align 1 der
346
360
}
347
361
348
362
define <4 x i32 > @gep01_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
349
- ; CHECK-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
350
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
351
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
352
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
353
- ; CHECK-NEXT: ret <4 x i32> [[R]]
363
+ ; SSE2-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
364
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
365
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
366
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
367
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
368
+ ;
369
+ ; AVX2-LABEL: @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(
370
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
371
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
372
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
373
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
354
374
;
355
375
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
356
376
%s = load i32 , ptr %gep , align 1
@@ -359,11 +379,17 @@ define <4 x i32> @gep01_bitcast_load_i32_from_v8i16_insert_v4i32(ptr align 1 der
359
379
}
360
380
361
381
define <2 x i64 > @gep01_bitcast_load_i64_from_v8i16_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
362
- ; CHECK-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
363
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
364
- ; CHECK-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
365
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
366
- ; CHECK-NEXT: ret <2 x i64> [[R]]
382
+ ; SSE2-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
383
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 1
384
+ ; SSE2-NEXT: [[S:%.*]] = load i64, ptr [[GEP]], align 1
385
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
386
+ ; SSE2-NEXT: ret <2 x i64> [[R]]
387
+ ;
388
+ ; AVX2-LABEL: @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(
389
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
390
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
391
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64>
392
+ ; AVX2-NEXT: ret <2 x i64> [[R]]
367
393
;
368
394
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 1
369
395
%s = load i64 , ptr %gep , align 1
@@ -372,23 +398,29 @@ define <2 x i64> @gep01_bitcast_load_i64_from_v8i16_insert_v2i64(ptr align 1 der
372
398
}
373
399
374
400
define <4 x i32 > @gep05_bitcast_load_i32_from_v8i16_insert_v4i32 (ptr align 1 dereferenceable (16 ) %p ) {
375
- ; CHECK-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
376
- ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
377
- ; CHECK-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
378
- ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
379
- ; CHECK-NEXT: ret <4 x i32> [[R]]
401
+ ; SSE2-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
402
+ ; SSE2-NEXT: [[GEP:%.*]] = getelementptr inbounds <8 x i16>, ptr [[P:%.*]], i64 0, i64 5
403
+ ; SSE2-NEXT: [[S:%.*]] = load i32, ptr [[GEP]], align 1
404
+ ; SSE2-NEXT: [[R:%.*]] = insertelement <4 x i32> poison, i32 [[S]], i64 0
405
+ ; SSE2-NEXT: ret <4 x i32> [[R]]
406
+ ;
407
+ ; AVX2-LABEL: @gep05_bitcast_load_i32_from_v8i16_insert_v4i32(
408
+ ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 1
409
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
410
+ ; AVX2-NEXT: [[R:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
411
+ ; AVX2-NEXT: ret <4 x i32> [[R]]
380
412
;
381
413
%gep = getelementptr inbounds <8 x i16 >, ptr %p , i64 0 , i64 5
382
414
%s = load i32 , ptr %gep , align 1
383
415
%r = insertelement <4 x i32 > poison, i32 %s , i64 0
384
416
ret <4 x i32 > %r
385
417
}
386
418
387
- define <2 x i64 > @gep01_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) nofree nosync {
419
+ define <2 x i64 > @gep01_bitcast_load_i32_from_v4i32_insert_v2i64 (ptr align 1 dereferenceable (16 ) %p ) {
388
420
; CHECK-LABEL: @gep01_bitcast_load_i32_from_v4i32_insert_v2i64(
389
- ; CHECK-NEXT: [[GEP :%.*]] = getelementptr inbounds <4 x i32>, ptr [[P:%.*]], i64 0, i64 1
390
- ; CHECK-NEXT: [[S :%.*]] = load i64, ptr [[GEP ]], align 1
391
- ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i64> poison, i64 [[S]], i64 0
421
+ ; CHECK-NEXT: [[TMP1 :%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
422
+ ; CHECK-NEXT: [[TMP2 :%.*]] = shufflevector <4 x i32> [[TMP1 ]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
423
+ ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
392
424
; CHECK-NEXT: ret <2 x i64> [[R]]
393
425
;
394
426
%gep = getelementptr inbounds <4 x i32 >, ptr %p , i64 0 , i64 1
0 commit comments